sp_x86_64_asm.S 1.1 MB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474164751647616477164781647916480164811648216483164841648516486164871648816489164901649116492164931649416495164961649716498164991650016501165021650316504165051650616507165081650916510165111651216513165141651516516165171651816519165201652116522165231652416525165261652716528165291653016531165321653316534165351653616537165381653916540165411654216543165441654516546165471654816549165501655116552165531655416555165561655716558165591656016561165621656316564165651656616567165681656916570165711657216573165741657516576165771657816579165801658116582165831658416585165861658716588165891659016591165921659316594165951659616597165981659916600166011660216603166041660516606166071660816609166101661116612166131661416615166161661716618166191662016621166221662316624166251662616627166281662916630166311663216633166341663516636166371663816639166401664116642166431664416645166461664716648166491665016651166521665316654166551665616657166581665916660166611666216663166641666516666166671666816669166701667116672166731667416675166761667716678166791668016681166821668316684166851668616687166881668916690166911669216693166941669516696166971669816699167001670116702167031670416705167061670716708167091671016711167121671316714167151671616717167181671916720167211672216723167241672516726167271672816729167301673116732167331673416735167361673716738167391674016741167421674316744167451674616747167481674916750167511675216753167541675516756167571675816759167601676116762167631676416765167661676716768167691677016771167721677316774167751677616777167781677916780167811678216783167841678516786167871678816789167901679116792167931679416795167961679716798167991680016801168021680316804168051680616807168081680916810168111681216813168141681516816168171681816819168201682116822168231682416825168261682716828168291683016831168321683316834168351683616837168381683916840168411684216843168441684516846168471684816849168501685116852168531685416855168561685716858168591686016861168621686316864168651686616867168681686916870168711687216873168741687516876168771687816879168801688116882168831688416885168861688716888168891689016891168921689316894168951689616897168981689916900169011690216903169041690516906169071690816909169101691116912169131691416915169161691716918169191692016921169221692316924169251692616927169281692916930169311693216933169341693516936169371693816939169401694116942169431694416945169461694716948169491695016951169521695316954169551695616957169581695916960169611696216963169641696516966169671696816969169701697116972169731697416975169761697716978169791698016981169821698316984169851698616987169881698916990169911699216993169941699516996169971699816999170001700117002170031700417005170061700717008170091701017011170121701317014170151701617017170181701917020170211702217023170241702517026170271702817029170301703117032170331703417035170361703717038170391704017041170421704317044170451704617047170481704917050170511705217053170541705517056170571705817059170601706117062170631706417065170661706717068170691707017071170721707317074170751707617077170781707917080170811708217083170841708517086170871708817089170901709117092170931709417095170961709717098170991710017101171021710317104171051710617107171081710917110171111711217113171141711517116171171711817119171201712117122171231712417125171261712717128171291713017131171321713317134171351713617137171381713917140171411714217143171441714517146171471714817149171501715117152171531715417155171561715717158171591716017161171621716317164171651716617167171681716917170171711717217173171741717517176171771717817179171801718117182171831718417185171861718717188171891719017191171921719317194171951719617197171981719917200172011720217203172041720517206172071720817209172101721117212172131721417215172161721717218172191722017221172221722317224172251722617227172281722917230172311723217233172341723517236172371723817239172401724117242172431724417245172461724717248172491725017251172521725317254172551725617257172581725917260172611726217263172641726517266172671726817269172701727117272172731727417275172761727717278172791728017281172821728317284172851728617287172881728917290172911729217293172941729517296172971729817299173001730117302173031730417305173061730717308173091731017311173121731317314173151731617317173181731917320173211732217323173241732517326173271732817329173301733117332173331733417335173361733717338173391734017341173421734317344173451734617347173481734917350173511735217353173541735517356173571735817359173601736117362173631736417365173661736717368173691737017371173721737317374173751737617377173781737917380173811738217383173841738517386173871738817389173901739117392173931739417395173961739717398173991740017401174021740317404174051740617407174081740917410174111741217413174141741517416174171741817419174201742117422174231742417425174261742717428174291743017431174321743317434174351743617437174381743917440174411744217443174441744517446174471744817449174501745117452174531745417455174561745717458174591746017461174621746317464174651746617467174681746917470174711747217473174741747517476174771747817479174801748117482174831748417485174861748717488174891749017491174921749317494174951749617497174981749917500175011750217503175041750517506175071750817509175101751117512175131751417515175161751717518175191752017521175221752317524175251752617527175281752917530175311753217533175341753517536175371753817539175401754117542175431754417545175461754717548175491755017551175521755317554175551755617557175581755917560175611756217563175641756517566175671756817569175701757117572175731757417575175761757717578175791758017581175821758317584175851758617587175881758917590175911759217593175941759517596175971759817599176001760117602176031760417605176061760717608176091761017611176121761317614176151761617617176181761917620176211762217623176241762517626176271762817629176301763117632176331763417635176361763717638176391764017641176421764317644176451764617647176481764917650176511765217653176541765517656176571765817659176601766117662176631766417665176661766717668176691767017671176721767317674176751767617677176781767917680176811768217683176841768517686176871768817689176901769117692176931769417695176961769717698176991770017701177021770317704177051770617707177081770917710177111771217713177141771517716177171771817719177201772117722177231772417725177261772717728177291773017731177321773317734177351773617737177381773917740177411774217743177441774517746177471774817749177501775117752177531775417755177561775717758177591776017761177621776317764177651776617767177681776917770177711777217773177741777517776177771777817779177801778117782177831778417785177861778717788177891779017791177921779317794177951779617797177981779917800178011780217803178041780517806178071780817809178101781117812178131781417815178161781717818178191782017821178221782317824178251782617827178281782917830178311783217833178341783517836178371783817839178401784117842178431784417845178461784717848178491785017851178521785317854178551785617857178581785917860178611786217863178641786517866178671786817869178701787117872178731787417875178761787717878178791788017881178821788317884178851788617887178881788917890178911789217893178941789517896178971789817899179001790117902179031790417905179061790717908179091791017911179121791317914179151791617917179181791917920179211792217923179241792517926179271792817929179301793117932179331793417935179361793717938179391794017941179421794317944179451794617947179481794917950179511795217953179541795517956179571795817959179601796117962179631796417965179661796717968179691797017971179721797317974179751797617977179781797917980179811798217983179841798517986179871798817989179901799117992179931799417995179961799717998179991800018001180021800318004180051800618007180081800918010180111801218013180141801518016180171801818019180201802118022180231802418025180261802718028180291803018031180321803318034180351803618037180381803918040180411804218043180441804518046180471804818049180501805118052180531805418055180561805718058180591806018061180621806318064180651806618067180681806918070180711807218073180741807518076180771807818079180801808118082180831808418085180861808718088180891809018091180921809318094180951809618097180981809918100181011810218103181041810518106181071810818109181101811118112181131811418115181161811718118181191812018121181221812318124181251812618127181281812918130181311813218133181341813518136181371813818139181401814118142181431814418145181461814718148181491815018151181521815318154181551815618157181581815918160181611816218163181641816518166181671816818169181701817118172181731817418175181761817718178181791818018181181821818318184181851818618187181881818918190181911819218193181941819518196181971819818199182001820118202182031820418205182061820718208182091821018211182121821318214182151821618217182181821918220182211822218223182241822518226182271822818229182301823118232182331823418235182361823718238182391824018241182421824318244182451824618247182481824918250182511825218253182541825518256182571825818259182601826118262182631826418265182661826718268182691827018271182721827318274182751827618277182781827918280182811828218283182841828518286182871828818289182901829118292182931829418295182961829718298182991830018301183021830318304183051830618307183081830918310183111831218313183141831518316183171831818319183201832118322183231832418325183261832718328183291833018331183321833318334183351833618337183381833918340183411834218343183441834518346183471834818349183501835118352183531835418355183561835718358183591836018361183621836318364183651836618367183681836918370183711837218373183741837518376183771837818379183801838118382183831838418385183861838718388183891839018391183921839318394183951839618397183981839918400184011840218403184041840518406184071840818409184101841118412184131841418415184161841718418184191842018421184221842318424184251842618427184281842918430184311843218433184341843518436184371843818439184401844118442184431844418445184461844718448184491845018451184521845318454184551845618457184581845918460184611846218463184641846518466184671846818469184701847118472184731847418475184761847718478184791848018481184821848318484184851848618487184881848918490184911849218493184941849518496184971849818499185001850118502185031850418505185061850718508185091851018511185121851318514185151851618517185181851918520185211852218523185241852518526185271852818529185301853118532185331853418535185361853718538185391854018541185421854318544185451854618547185481854918550185511855218553185541855518556185571855818559185601856118562185631856418565185661856718568185691857018571185721857318574185751857618577185781857918580185811858218583185841858518586185871858818589185901859118592185931859418595185961859718598185991860018601186021860318604186051860618607186081860918610186111861218613186141861518616186171861818619186201862118622186231862418625186261862718628186291863018631186321863318634186351863618637186381863918640186411864218643186441864518646186471864818649186501865118652186531865418655186561865718658186591866018661186621866318664186651866618667186681866918670186711867218673186741867518676186771867818679186801868118682186831868418685186861868718688186891869018691186921869318694186951869618697186981869918700187011870218703187041870518706187071870818709187101871118712187131871418715187161871718718187191872018721187221872318724187251872618727187281872918730187311873218733187341873518736187371873818739187401874118742187431874418745187461874718748187491875018751187521875318754187551875618757187581875918760187611876218763187641876518766187671876818769187701877118772187731877418775187761877718778187791878018781187821878318784187851878618787187881878918790187911879218793187941879518796187971879818799188001880118802188031880418805188061880718808188091881018811188121881318814188151881618817188181881918820188211882218823188241882518826188271882818829188301883118832188331883418835188361883718838188391884018841188421884318844188451884618847188481884918850188511885218853188541885518856188571885818859188601886118862188631886418865188661886718868188691887018871188721887318874188751887618877188781887918880188811888218883188841888518886188871888818889188901889118892188931889418895188961889718898188991890018901189021890318904189051890618907189081890918910189111891218913189141891518916189171891818919189201892118922189231892418925189261892718928189291893018931189321893318934189351893618937189381893918940189411894218943189441894518946189471894818949189501895118952189531895418955189561895718958189591896018961189621896318964189651896618967189681896918970189711897218973189741897518976189771897818979189801898118982189831898418985189861898718988189891899018991189921899318994189951899618997189981899919000190011900219003190041900519006190071900819009190101901119012190131901419015190161901719018190191902019021190221902319024190251902619027190281902919030190311903219033190341903519036190371903819039190401904119042190431904419045190461904719048190491905019051190521905319054190551905619057190581905919060190611906219063190641906519066190671906819069190701907119072190731907419075190761907719078190791908019081190821908319084190851908619087190881908919090190911909219093190941909519096190971909819099191001910119102191031910419105191061910719108191091911019111191121911319114191151911619117191181911919120191211912219123191241912519126191271912819129191301913119132191331913419135191361913719138191391914019141191421914319144191451914619147191481914919150191511915219153191541915519156191571915819159191601916119162191631916419165191661916719168191691917019171191721917319174191751917619177191781917919180191811918219183191841918519186191871918819189191901919119192191931919419195191961919719198191991920019201192021920319204192051920619207192081920919210192111921219213192141921519216192171921819219192201922119222192231922419225192261922719228192291923019231192321923319234192351923619237192381923919240192411924219243192441924519246192471924819249192501925119252192531925419255192561925719258192591926019261192621926319264192651926619267192681926919270192711927219273192741927519276192771927819279192801928119282192831928419285192861928719288192891929019291192921929319294192951929619297192981929919300193011930219303193041930519306193071930819309193101931119312193131931419315193161931719318193191932019321193221932319324193251932619327193281932919330193311933219333193341933519336193371933819339193401934119342193431934419345193461934719348193491935019351193521935319354193551935619357193581935919360193611936219363193641936519366193671936819369193701937119372193731937419375193761937719378193791938019381193821938319384193851938619387193881938919390193911939219393193941939519396193971939819399194001940119402194031940419405194061940719408194091941019411194121941319414194151941619417194181941919420194211942219423194241942519426194271942819429194301943119432194331943419435194361943719438194391944019441194421944319444194451944619447194481944919450194511945219453194541945519456194571945819459194601946119462194631946419465194661946719468194691947019471194721947319474194751947619477194781947919480194811948219483194841948519486194871948819489194901949119492194931949419495194961949719498194991950019501195021950319504195051950619507195081950919510195111951219513195141951519516195171951819519195201952119522195231952419525195261952719528195291953019531195321953319534195351953619537195381953919540195411954219543195441954519546195471954819549195501955119552195531955419555195561955719558195591956019561195621956319564195651956619567195681956919570195711957219573195741957519576195771957819579195801958119582195831958419585195861958719588195891959019591195921959319594195951959619597195981959919600196011960219603196041960519606196071960819609196101961119612196131961419615196161961719618196191962019621196221962319624196251962619627196281962919630196311963219633196341963519636196371963819639196401964119642196431964419645196461964719648196491965019651196521965319654196551965619657196581965919660196611966219663196641966519666196671966819669196701967119672196731967419675196761967719678196791968019681196821968319684196851968619687196881968919690196911969219693196941969519696196971969819699197001970119702197031970419705197061970719708197091971019711197121971319714197151971619717197181971919720197211972219723197241972519726197271972819729197301973119732197331973419735197361973719738197391974019741197421974319744197451974619747197481974919750197511975219753197541975519756197571975819759197601976119762197631976419765197661976719768197691977019771197721977319774197751977619777197781977919780197811978219783197841978519786197871978819789197901979119792197931979419795197961979719798197991980019801198021980319804198051980619807198081980919810198111981219813198141981519816198171981819819198201982119822198231982419825198261982719828198291983019831198321983319834198351983619837198381983919840198411984219843198441984519846198471984819849198501985119852198531985419855198561985719858198591986019861198621986319864198651986619867198681986919870198711987219873198741987519876198771987819879198801988119882198831988419885198861988719888198891989019891198921989319894198951989619897198981989919900199011990219903199041990519906199071990819909199101991119912199131991419915199161991719918199191992019921199221992319924199251992619927199281992919930199311993219933199341993519936199371993819939199401994119942199431994419945199461994719948199491995019951199521995319954199551995619957199581995919960199611996219963199641996519966199671996819969199701997119972199731997419975199761997719978199791998019981199821998319984199851998619987199881998919990199911999219993199941999519996199971999819999200002000120002200032000420005200062000720008200092001020011200122001320014200152001620017200182001920020200212002220023200242002520026200272002820029200302003120032200332003420035200362003720038200392004020041200422004320044200452004620047200482004920050200512005220053200542005520056200572005820059200602006120062200632006420065200662006720068200692007020071200722007320074200752007620077200782007920080200812008220083200842008520086200872008820089200902009120092200932009420095200962009720098200992010020101201022010320104201052010620107201082010920110201112011220113201142011520116201172011820119201202012120122201232012420125201262012720128201292013020131201322013320134201352013620137201382013920140201412014220143201442014520146201472014820149201502015120152201532015420155201562015720158201592016020161201622016320164201652016620167201682016920170201712017220173201742017520176201772017820179201802018120182201832018420185201862018720188201892019020191201922019320194201952019620197201982019920200202012020220203202042020520206202072020820209202102021120212202132021420215202162021720218202192022020221202222022320224202252022620227202282022920230202312023220233202342023520236202372023820239202402024120242202432024420245202462024720248202492025020251202522025320254202552025620257202582025920260202612026220263202642026520266202672026820269202702027120272202732027420275202762027720278202792028020281202822028320284202852028620287202882028920290202912029220293202942029520296202972029820299203002030120302203032030420305203062030720308203092031020311203122031320314203152031620317203182031920320203212032220323203242032520326203272032820329203302033120332203332033420335203362033720338203392034020341203422034320344203452034620347203482034920350203512035220353203542035520356203572035820359203602036120362203632036420365203662036720368203692037020371203722037320374203752037620377203782037920380203812038220383203842038520386203872038820389203902039120392203932039420395203962039720398203992040020401204022040320404204052040620407204082040920410204112041220413204142041520416204172041820419204202042120422204232042420425204262042720428204292043020431204322043320434204352043620437204382043920440204412044220443204442044520446204472044820449204502045120452204532045420455204562045720458204592046020461204622046320464204652046620467204682046920470204712047220473204742047520476204772047820479204802048120482204832048420485204862048720488204892049020491204922049320494204952049620497204982049920500205012050220503205042050520506205072050820509205102051120512205132051420515205162051720518205192052020521205222052320524205252052620527205282052920530205312053220533205342053520536205372053820539205402054120542205432054420545205462054720548205492055020551205522055320554205552055620557205582055920560205612056220563205642056520566205672056820569205702057120572205732057420575205762057720578205792058020581205822058320584205852058620587205882058920590205912059220593205942059520596205972059820599206002060120602206032060420605206062060720608206092061020611206122061320614206152061620617206182061920620206212062220623206242062520626206272062820629206302063120632206332063420635206362063720638206392064020641206422064320644206452064620647206482064920650206512065220653206542065520656206572065820659206602066120662206632066420665206662066720668206692067020671206722067320674206752067620677206782067920680206812068220683206842068520686206872068820689206902069120692206932069420695206962069720698206992070020701207022070320704207052070620707207082070920710207112071220713207142071520716207172071820719207202072120722207232072420725207262072720728207292073020731207322073320734207352073620737207382073920740207412074220743207442074520746207472074820749207502075120752207532075420755207562075720758207592076020761207622076320764207652076620767207682076920770207712077220773207742077520776207772077820779207802078120782207832078420785207862078720788207892079020791207922079320794207952079620797207982079920800208012080220803208042080520806208072080820809208102081120812208132081420815208162081720818208192082020821208222082320824208252082620827208282082920830208312083220833208342083520836208372083820839208402084120842208432084420845208462084720848208492085020851208522085320854208552085620857208582085920860208612086220863208642086520866208672086820869208702087120872208732087420875208762087720878208792088020881208822088320884208852088620887208882088920890208912089220893208942089520896208972089820899209002090120902209032090420905209062090720908209092091020911209122091320914209152091620917209182091920920209212092220923209242092520926209272092820929209302093120932209332093420935209362093720938209392094020941209422094320944209452094620947209482094920950209512095220953209542095520956209572095820959209602096120962209632096420965209662096720968209692097020971209722097320974209752097620977209782097920980209812098220983209842098520986209872098820989209902099120992209932099420995209962099720998209992100021001210022100321004210052100621007210082100921010210112101221013210142101521016210172101821019210202102121022210232102421025210262102721028210292103021031210322103321034210352103621037210382103921040210412104221043210442104521046210472104821049210502105121052210532105421055210562105721058210592106021061210622106321064210652106621067210682106921070210712107221073210742107521076210772107821079210802108121082210832108421085210862108721088210892109021091210922109321094210952109621097210982109921100211012110221103211042110521106211072110821109211102111121112211132111421115211162111721118211192112021121211222112321124211252112621127211282112921130211312113221133211342113521136211372113821139211402114121142211432114421145211462114721148211492115021151211522115321154211552115621157211582115921160211612116221163211642116521166211672116821169211702117121172211732117421175211762117721178211792118021181211822118321184211852118621187211882118921190211912119221193211942119521196211972119821199212002120121202212032120421205212062120721208212092121021211212122121321214212152121621217212182121921220212212122221223212242122521226212272122821229212302123121232212332123421235212362123721238212392124021241212422124321244212452124621247212482124921250212512125221253212542125521256212572125821259212602126121262212632126421265212662126721268212692127021271212722127321274212752127621277212782127921280212812128221283212842128521286212872128821289212902129121292212932129421295212962129721298212992130021301213022130321304213052130621307213082130921310213112131221313213142131521316213172131821319213202132121322213232132421325213262132721328213292133021331213322133321334213352133621337213382133921340213412134221343213442134521346213472134821349213502135121352213532135421355213562135721358213592136021361213622136321364213652136621367213682136921370213712137221373213742137521376213772137821379213802138121382213832138421385213862138721388213892139021391213922139321394213952139621397213982139921400214012140221403214042140521406214072140821409214102141121412214132141421415214162141721418214192142021421214222142321424214252142621427214282142921430214312143221433214342143521436214372143821439214402144121442214432144421445214462144721448214492145021451214522145321454214552145621457214582145921460214612146221463214642146521466214672146821469214702147121472214732147421475214762147721478214792148021481214822148321484214852148621487214882148921490214912149221493214942149521496214972149821499215002150121502215032150421505215062150721508215092151021511215122151321514215152151621517215182151921520215212152221523215242152521526215272152821529215302153121532215332153421535215362153721538215392154021541215422154321544215452154621547215482154921550215512155221553215542155521556215572155821559215602156121562215632156421565215662156721568215692157021571215722157321574215752157621577215782157921580215812158221583215842158521586215872158821589215902159121592215932159421595215962159721598215992160021601216022160321604216052160621607216082160921610216112161221613216142161521616216172161821619216202162121622216232162421625216262162721628216292163021631216322163321634216352163621637216382163921640216412164221643216442164521646216472164821649216502165121652216532165421655216562165721658216592166021661216622166321664216652166621667216682166921670216712167221673216742167521676216772167821679216802168121682216832168421685216862168721688216892169021691216922169321694216952169621697216982169921700217012170221703217042170521706217072170821709217102171121712217132171421715217162171721718217192172021721217222172321724217252172621727217282172921730217312173221733217342173521736217372173821739217402174121742217432174421745217462174721748217492175021751217522175321754217552175621757217582175921760217612176221763217642176521766217672176821769217702177121772217732177421775217762177721778217792178021781217822178321784217852178621787217882178921790217912179221793217942179521796217972179821799218002180121802218032180421805218062180721808218092181021811218122181321814218152181621817218182181921820218212182221823218242182521826218272182821829218302183121832218332183421835218362183721838218392184021841218422184321844218452184621847218482184921850218512185221853218542185521856218572185821859218602186121862218632186421865218662186721868218692187021871218722187321874218752187621877218782187921880218812188221883218842188521886218872188821889218902189121892218932189421895218962189721898218992190021901219022190321904219052190621907219082190921910219112191221913219142191521916219172191821919219202192121922219232192421925219262192721928219292193021931219322193321934219352193621937219382193921940219412194221943219442194521946219472194821949219502195121952219532195421955219562195721958219592196021961219622196321964219652196621967219682196921970219712197221973219742197521976219772197821979219802198121982219832198421985219862198721988219892199021991219922199321994219952199621997219982199922000220012200222003220042200522006220072200822009220102201122012220132201422015220162201722018220192202022021220222202322024220252202622027220282202922030220312203222033220342203522036220372203822039220402204122042220432204422045220462204722048220492205022051220522205322054220552205622057220582205922060220612206222063220642206522066220672206822069220702207122072220732207422075220762207722078220792208022081220822208322084220852208622087220882208922090220912209222093220942209522096220972209822099221002210122102221032210422105221062210722108221092211022111221122211322114221152211622117221182211922120221212212222123221242212522126221272212822129221302213122132221332213422135221362213722138221392214022141221422214322144221452214622147221482214922150221512215222153221542215522156221572215822159221602216122162221632216422165221662216722168221692217022171221722217322174221752217622177221782217922180221812218222183221842218522186221872218822189221902219122192221932219422195221962219722198221992220022201222022220322204222052220622207222082220922210222112221222213222142221522216222172221822219222202222122222222232222422225222262222722228222292223022231222322223322234222352223622237222382223922240222412224222243222442224522246222472224822249222502225122252222532225422255222562225722258222592226022261222622226322264222652226622267222682226922270222712227222273222742227522276222772227822279222802228122282222832228422285222862228722288222892229022291222922229322294222952229622297222982229922300223012230222303223042230522306223072230822309223102231122312223132231422315223162231722318223192232022321223222232322324223252232622327223282232922330223312233222333223342233522336223372233822339223402234122342223432234422345223462234722348223492235022351223522235322354223552235622357223582235922360223612236222363223642236522366223672236822369223702237122372223732237422375223762237722378223792238022381223822238322384223852238622387223882238922390223912239222393223942239522396223972239822399224002240122402224032240422405224062240722408224092241022411224122241322414224152241622417224182241922420224212242222423224242242522426224272242822429224302243122432224332243422435224362243722438224392244022441224422244322444224452244622447224482244922450224512245222453224542245522456224572245822459224602246122462224632246422465224662246722468224692247022471224722247322474224752247622477224782247922480224812248222483224842248522486224872248822489224902249122492224932249422495224962249722498224992250022501225022250322504225052250622507225082250922510225112251222513225142251522516225172251822519225202252122522225232252422525225262252722528225292253022531225322253322534225352253622537225382253922540225412254222543225442254522546225472254822549225502255122552225532255422555225562255722558225592256022561225622256322564225652256622567225682256922570225712257222573225742257522576225772257822579225802258122582225832258422585225862258722588225892259022591225922259322594225952259622597225982259922600226012260222603226042260522606226072260822609226102261122612226132261422615226162261722618226192262022621226222262322624226252262622627226282262922630226312263222633226342263522636226372263822639226402264122642226432264422645226462264722648226492265022651226522265322654226552265622657226582265922660226612266222663226642266522666226672266822669226702267122672226732267422675226762267722678226792268022681226822268322684226852268622687226882268922690226912269222693226942269522696226972269822699227002270122702227032270422705227062270722708227092271022711227122271322714227152271622717227182271922720227212272222723227242272522726227272272822729227302273122732227332273422735227362273722738227392274022741227422274322744227452274622747227482274922750227512275222753227542275522756227572275822759227602276122762227632276422765227662276722768227692277022771227722277322774227752277622777227782277922780227812278222783227842278522786227872278822789227902279122792227932279422795227962279722798227992280022801228022280322804228052280622807228082280922810228112281222813228142281522816228172281822819228202282122822228232282422825228262282722828228292283022831228322283322834228352283622837228382283922840228412284222843228442284522846228472284822849228502285122852228532285422855228562285722858228592286022861228622286322864228652286622867228682286922870228712287222873228742287522876228772287822879228802288122882228832288422885228862288722888228892289022891228922289322894228952289622897228982289922900229012290222903229042290522906229072290822909229102291122912229132291422915229162291722918229192292022921229222292322924229252292622927229282292922930229312293222933229342293522936229372293822939229402294122942229432294422945229462294722948229492295022951229522295322954229552295622957229582295922960229612296222963229642296522966229672296822969229702297122972229732297422975229762297722978229792298022981229822298322984229852298622987229882298922990229912299222993229942299522996229972299822999230002300123002230032300423005230062300723008230092301023011230122301323014230152301623017230182301923020230212302223023230242302523026230272302823029230302303123032230332303423035230362303723038230392304023041230422304323044230452304623047230482304923050230512305223053230542305523056230572305823059230602306123062230632306423065230662306723068230692307023071230722307323074230752307623077230782307923080230812308223083230842308523086230872308823089230902309123092230932309423095230962309723098230992310023101231022310323104231052310623107231082310923110231112311223113231142311523116231172311823119231202312123122231232312423125231262312723128231292313023131231322313323134231352313623137231382313923140231412314223143231442314523146231472314823149231502315123152231532315423155231562315723158231592316023161231622316323164231652316623167231682316923170231712317223173231742317523176231772317823179231802318123182231832318423185231862318723188231892319023191231922319323194231952319623197231982319923200232012320223203232042320523206232072320823209232102321123212232132321423215232162321723218232192322023221232222322323224232252322623227232282322923230232312323223233232342323523236232372323823239232402324123242232432324423245232462324723248232492325023251232522325323254232552325623257232582325923260232612326223263232642326523266232672326823269232702327123272232732327423275232762327723278232792328023281232822328323284232852328623287232882328923290232912329223293232942329523296232972329823299233002330123302233032330423305233062330723308233092331023311233122331323314233152331623317233182331923320233212332223323233242332523326233272332823329233302333123332233332333423335233362333723338233392334023341233422334323344233452334623347233482334923350233512335223353233542335523356233572335823359233602336123362233632336423365233662336723368233692337023371233722337323374233752337623377233782337923380233812338223383233842338523386233872338823389233902339123392233932339423395233962339723398233992340023401234022340323404234052340623407234082340923410234112341223413234142341523416234172341823419234202342123422234232342423425234262342723428234292343023431234322343323434234352343623437234382343923440234412344223443234442344523446234472344823449234502345123452234532345423455234562345723458234592346023461234622346323464234652346623467234682346923470234712347223473234742347523476234772347823479234802348123482234832348423485234862348723488234892349023491234922349323494234952349623497234982349923500235012350223503235042350523506235072350823509235102351123512235132351423515235162351723518235192352023521235222352323524235252352623527235282352923530235312353223533235342353523536235372353823539235402354123542235432354423545235462354723548235492355023551235522355323554235552355623557235582355923560235612356223563235642356523566235672356823569235702357123572235732357423575235762357723578235792358023581235822358323584235852358623587235882358923590235912359223593235942359523596235972359823599236002360123602236032360423605236062360723608236092361023611236122361323614236152361623617236182361923620236212362223623236242362523626236272362823629236302363123632236332363423635236362363723638236392364023641236422364323644236452364623647236482364923650236512365223653236542365523656236572365823659236602366123662236632366423665236662366723668236692367023671236722367323674236752367623677236782367923680236812368223683236842368523686236872368823689236902369123692236932369423695236962369723698236992370023701237022370323704237052370623707237082370923710237112371223713237142371523716237172371823719237202372123722237232372423725237262372723728237292373023731237322373323734237352373623737237382373923740237412374223743237442374523746237472374823749237502375123752237532375423755237562375723758237592376023761237622376323764237652376623767237682376923770237712377223773237742377523776237772377823779237802378123782237832378423785237862378723788237892379023791237922379323794237952379623797237982379923800238012380223803238042380523806238072380823809238102381123812238132381423815238162381723818238192382023821238222382323824238252382623827238282382923830238312383223833238342383523836238372383823839238402384123842238432384423845238462384723848238492385023851238522385323854238552385623857238582385923860238612386223863238642386523866238672386823869238702387123872238732387423875238762387723878238792388023881238822388323884238852388623887238882388923890238912389223893238942389523896238972389823899239002390123902239032390423905239062390723908239092391023911239122391323914239152391623917239182391923920239212392223923239242392523926239272392823929239302393123932239332393423935239362393723938239392394023941239422394323944239452394623947239482394923950239512395223953239542395523956239572395823959239602396123962239632396423965239662396723968239692397023971239722397323974239752397623977239782397923980239812398223983239842398523986239872398823989239902399123992239932399423995239962399723998239992400024001240022400324004240052400624007240082400924010240112401224013240142401524016240172401824019240202402124022240232402424025240262402724028240292403024031240322403324034240352403624037240382403924040240412404224043240442404524046240472404824049240502405124052240532405424055240562405724058240592406024061240622406324064240652406624067240682406924070240712407224073240742407524076240772407824079240802408124082240832408424085240862408724088240892409024091240922409324094240952409624097240982409924100241012410224103241042410524106241072410824109241102411124112241132411424115241162411724118241192412024121241222412324124241252412624127241282412924130241312413224133241342413524136241372413824139241402414124142241432414424145241462414724148241492415024151241522415324154241552415624157241582415924160241612416224163241642416524166241672416824169241702417124172241732417424175241762417724178241792418024181241822418324184241852418624187241882418924190241912419224193241942419524196241972419824199242002420124202242032420424205242062420724208242092421024211242122421324214242152421624217242182421924220242212422224223242242422524226242272422824229242302423124232242332423424235242362423724238242392424024241242422424324244242452424624247242482424924250242512425224253242542425524256242572425824259242602426124262242632426424265242662426724268242692427024271242722427324274242752427624277242782427924280242812428224283242842428524286242872428824289242902429124292242932429424295242962429724298242992430024301243022430324304243052430624307243082430924310243112431224313243142431524316243172431824319243202432124322243232432424325243262432724328243292433024331243322433324334243352433624337243382433924340243412434224343243442434524346243472434824349243502435124352243532435424355243562435724358243592436024361243622436324364243652436624367243682436924370243712437224373243742437524376243772437824379243802438124382243832438424385243862438724388243892439024391243922439324394243952439624397243982439924400244012440224403244042440524406244072440824409244102441124412244132441424415244162441724418244192442024421244222442324424244252442624427244282442924430244312443224433244342443524436244372443824439244402444124442244432444424445244462444724448244492445024451244522445324454244552445624457244582445924460244612446224463244642446524466244672446824469244702447124472244732447424475244762447724478244792448024481244822448324484244852448624487244882448924490244912449224493244942449524496244972449824499245002450124502245032450424505245062450724508245092451024511245122451324514245152451624517245182451924520245212452224523245242452524526245272452824529245302453124532245332453424535245362453724538245392454024541245422454324544245452454624547245482454924550245512455224553245542455524556245572455824559245602456124562245632456424565245662456724568245692457024571245722457324574245752457624577245782457924580245812458224583245842458524586245872458824589245902459124592245932459424595245962459724598245992460024601246022460324604246052460624607246082460924610246112461224613246142461524616246172461824619246202462124622246232462424625246262462724628246292463024631246322463324634246352463624637246382463924640246412464224643246442464524646246472464824649246502465124652246532465424655246562465724658246592466024661246622466324664246652466624667246682466924670246712467224673246742467524676246772467824679246802468124682246832468424685246862468724688246892469024691246922469324694246952469624697246982469924700247012470224703247042470524706247072470824709247102471124712247132471424715247162471724718247192472024721247222472324724247252472624727247282472924730247312473224733247342473524736247372473824739247402474124742247432474424745247462474724748247492475024751247522475324754247552475624757247582475924760247612476224763247642476524766247672476824769247702477124772247732477424775247762477724778247792478024781247822478324784247852478624787247882478924790247912479224793247942479524796247972479824799248002480124802248032480424805248062480724808248092481024811248122481324814248152481624817248182481924820248212482224823248242482524826248272482824829248302483124832248332483424835248362483724838248392484024841248422484324844248452484624847248482484924850248512485224853248542485524856248572485824859248602486124862248632486424865248662486724868248692487024871248722487324874248752487624877248782487924880248812488224883248842488524886248872488824889248902489124892248932489424895248962489724898248992490024901249022490324904249052490624907249082490924910249112491224913249142491524916249172491824919249202492124922249232492424925249262492724928249292493024931249322493324934249352493624937249382493924940249412494224943249442494524946249472494824949249502495124952249532495424955249562495724958249592496024961249622496324964249652496624967249682496924970249712497224973249742497524976249772497824979249802498124982249832498424985249862498724988249892499024991249922499324994249952499624997249982499925000250012500225003250042500525006250072500825009250102501125012250132501425015250162501725018250192502025021250222502325024250252502625027250282502925030250312503225033250342503525036250372503825039250402504125042250432504425045250462504725048250492505025051250522505325054250552505625057250582505925060250612506225063250642506525066250672506825069250702507125072250732507425075250762507725078250792508025081250822508325084250852508625087250882508925090250912509225093250942509525096250972509825099251002510125102251032510425105251062510725108251092511025111251122511325114251152511625117251182511925120251212512225123251242512525126251272512825129251302513125132251332513425135251362513725138251392514025141251422514325144251452514625147251482514925150251512515225153251542515525156251572515825159251602516125162251632516425165251662516725168251692517025171251722517325174251752517625177251782517925180251812518225183251842518525186251872518825189251902519125192251932519425195251962519725198251992520025201252022520325204252052520625207252082520925210252112521225213252142521525216252172521825219252202522125222252232522425225252262522725228252292523025231252322523325234252352523625237252382523925240252412524225243252442524525246252472524825249252502525125252252532525425255252562525725258252592526025261252622526325264252652526625267252682526925270252712527225273252742527525276252772527825279252802528125282252832528425285252862528725288252892529025291252922529325294252952529625297252982529925300253012530225303253042530525306253072530825309253102531125312253132531425315253162531725318253192532025321253222532325324253252532625327253282532925330253312533225333253342533525336253372533825339253402534125342253432534425345253462534725348253492535025351253522535325354253552535625357253582535925360253612536225363253642536525366253672536825369253702537125372253732537425375253762537725378253792538025381253822538325384253852538625387253882538925390253912539225393253942539525396253972539825399254002540125402254032540425405254062540725408254092541025411254122541325414254152541625417254182541925420254212542225423254242542525426254272542825429254302543125432254332543425435254362543725438254392544025441254422544325444254452544625447254482544925450254512545225453254542545525456254572545825459254602546125462254632546425465254662546725468254692547025471254722547325474254752547625477254782547925480254812548225483254842548525486254872548825489254902549125492254932549425495254962549725498254992550025501255022550325504255052550625507255082550925510255112551225513255142551525516255172551825519255202552125522255232552425525255262552725528255292553025531255322553325534255352553625537255382553925540255412554225543255442554525546255472554825549255502555125552255532555425555255562555725558255592556025561255622556325564255652556625567255682556925570255712557225573255742557525576255772557825579255802558125582255832558425585255862558725588255892559025591255922559325594255952559625597255982559925600256012560225603256042560525606256072560825609256102561125612256132561425615256162561725618256192562025621256222562325624256252562625627256282562925630256312563225633256342563525636256372563825639256402564125642256432564425645256462564725648256492565025651256522565325654256552565625657256582565925660256612566225663256642566525666256672566825669256702567125672256732567425675256762567725678256792568025681256822568325684256852568625687256882568925690256912569225693256942569525696256972569825699257002570125702257032570425705257062570725708257092571025711257122571325714257152571625717257182571925720257212572225723257242572525726257272572825729257302573125732257332573425735257362573725738257392574025741257422574325744257452574625747257482574925750257512575225753257542575525756257572575825759257602576125762257632576425765257662576725768257692577025771257722577325774257752577625777257782577925780257812578225783257842578525786257872578825789257902579125792257932579425795257962579725798257992580025801258022580325804258052580625807258082580925810258112581225813258142581525816258172581825819258202582125822258232582425825258262582725828258292583025831258322583325834258352583625837258382583925840258412584225843258442584525846258472584825849258502585125852258532585425855258562585725858258592586025861258622586325864258652586625867258682586925870258712587225873258742587525876258772587825879258802588125882258832588425885258862588725888258892589025891258922589325894258952589625897258982589925900259012590225903259042590525906259072590825909259102591125912259132591425915259162591725918259192592025921259222592325924259252592625927259282592925930259312593225933259342593525936259372593825939259402594125942259432594425945259462594725948259492595025951259522595325954259552595625957259582595925960259612596225963259642596525966259672596825969259702597125972259732597425975259762597725978259792598025981259822598325984259852598625987259882598925990259912599225993259942599525996259972599825999260002600126002260032600426005260062600726008260092601026011260122601326014260152601626017260182601926020260212602226023260242602526026260272602826029260302603126032260332603426035260362603726038260392604026041260422604326044260452604626047260482604926050260512605226053260542605526056260572605826059260602606126062260632606426065260662606726068260692607026071260722607326074260752607626077260782607926080260812608226083260842608526086260872608826089260902609126092260932609426095260962609726098260992610026101261022610326104261052610626107261082610926110261112611226113261142611526116261172611826119261202612126122261232612426125261262612726128261292613026131261322613326134261352613626137261382613926140261412614226143261442614526146261472614826149261502615126152261532615426155261562615726158261592616026161261622616326164261652616626167261682616926170261712617226173261742617526176261772617826179261802618126182261832618426185261862618726188261892619026191261922619326194261952619626197261982619926200262012620226203262042620526206262072620826209262102621126212262132621426215262162621726218262192622026221262222622326224262252622626227262282622926230262312623226233262342623526236262372623826239262402624126242262432624426245262462624726248262492625026251262522625326254262552625626257262582625926260262612626226263262642626526266262672626826269262702627126272262732627426275262762627726278262792628026281262822628326284262852628626287262882628926290262912629226293262942629526296262972629826299263002630126302263032630426305263062630726308263092631026311263122631326314263152631626317263182631926320263212632226323263242632526326263272632826329263302633126332263332633426335263362633726338263392634026341263422634326344263452634626347263482634926350263512635226353263542635526356263572635826359263602636126362263632636426365263662636726368263692637026371263722637326374263752637626377263782637926380263812638226383263842638526386263872638826389263902639126392263932639426395263962639726398263992640026401264022640326404264052640626407264082640926410264112641226413264142641526416264172641826419264202642126422264232642426425264262642726428264292643026431264322643326434264352643626437264382643926440264412644226443264442644526446264472644826449264502645126452264532645426455264562645726458264592646026461264622646326464264652646626467264682646926470264712647226473264742647526476264772647826479264802648126482264832648426485264862648726488264892649026491264922649326494264952649626497264982649926500265012650226503265042650526506265072650826509265102651126512265132651426515265162651726518265192652026521265222652326524265252652626527265282652926530265312653226533265342653526536265372653826539265402654126542265432654426545265462654726548265492655026551265522655326554265552655626557265582655926560265612656226563265642656526566265672656826569265702657126572265732657426575265762657726578265792658026581265822658326584265852658626587265882658926590265912659226593265942659526596265972659826599266002660126602266032660426605266062660726608266092661026611266122661326614266152661626617266182661926620266212662226623266242662526626266272662826629266302663126632266332663426635266362663726638266392664026641266422664326644266452664626647266482664926650266512665226653266542665526656266572665826659266602666126662266632666426665266662666726668266692667026671266722667326674266752667626677266782667926680266812668226683266842668526686266872668826689266902669126692266932669426695266962669726698266992670026701267022670326704267052670626707267082670926710267112671226713267142671526716267172671826719267202672126722267232672426725267262672726728267292673026731267322673326734267352673626737267382673926740267412674226743267442674526746267472674826749267502675126752267532675426755267562675726758267592676026761267622676326764267652676626767267682676926770267712677226773267742677526776267772677826779267802678126782267832678426785267862678726788267892679026791267922679326794267952679626797267982679926800268012680226803268042680526806268072680826809268102681126812268132681426815268162681726818268192682026821268222682326824268252682626827268282682926830268312683226833268342683526836268372683826839268402684126842268432684426845268462684726848268492685026851268522685326854268552685626857268582685926860268612686226863268642686526866268672686826869268702687126872268732687426875268762687726878268792688026881268822688326884268852688626887268882688926890268912689226893268942689526896268972689826899269002690126902269032690426905269062690726908269092691026911269122691326914269152691626917269182691926920269212692226923269242692526926269272692826929269302693126932269332693426935269362693726938269392694026941269422694326944269452694626947269482694926950269512695226953269542695526956269572695826959269602696126962269632696426965269662696726968269692697026971269722697326974269752697626977269782697926980269812698226983269842698526986269872698826989269902699126992269932699426995269962699726998269992700027001270022700327004270052700627007270082700927010270112701227013270142701527016270172701827019270202702127022270232702427025270262702727028270292703027031270322703327034270352703627037270382703927040270412704227043270442704527046270472704827049270502705127052270532705427055270562705727058270592706027061270622706327064270652706627067270682706927070270712707227073270742707527076270772707827079270802708127082270832708427085270862708727088270892709027091270922709327094270952709627097270982709927100271012710227103271042710527106271072710827109271102711127112271132711427115271162711727118271192712027121271222712327124271252712627127271282712927130271312713227133271342713527136271372713827139271402714127142271432714427145271462714727148271492715027151271522715327154271552715627157271582715927160271612716227163271642716527166271672716827169271702717127172271732717427175271762717727178271792718027181271822718327184271852718627187271882718927190271912719227193271942719527196271972719827199272002720127202272032720427205272062720727208272092721027211272122721327214272152721627217272182721927220272212722227223272242722527226272272722827229272302723127232272332723427235272362723727238272392724027241272422724327244272452724627247272482724927250272512725227253272542725527256272572725827259272602726127262272632726427265272662726727268272692727027271272722727327274272752727627277272782727927280272812728227283272842728527286272872728827289272902729127292272932729427295272962729727298272992730027301273022730327304273052730627307273082730927310273112731227313273142731527316273172731827319273202732127322273232732427325273262732727328273292733027331273322733327334273352733627337273382733927340273412734227343273442734527346273472734827349273502735127352273532735427355273562735727358273592736027361273622736327364273652736627367273682736927370273712737227373273742737527376273772737827379273802738127382273832738427385273862738727388273892739027391273922739327394273952739627397273982739927400274012740227403274042740527406274072740827409274102741127412274132741427415274162741727418274192742027421274222742327424274252742627427274282742927430274312743227433274342743527436274372743827439274402744127442274432744427445274462744727448274492745027451274522745327454274552745627457274582745927460274612746227463274642746527466274672746827469274702747127472274732747427475274762747727478274792748027481274822748327484274852748627487274882748927490274912749227493274942749527496274972749827499275002750127502275032750427505275062750727508275092751027511275122751327514275152751627517275182751927520275212752227523275242752527526275272752827529275302753127532275332753427535275362753727538275392754027541275422754327544275452754627547275482754927550275512755227553275542755527556275572755827559275602756127562275632756427565275662756727568275692757027571275722757327574275752757627577275782757927580275812758227583275842758527586275872758827589275902759127592275932759427595275962759727598275992760027601276022760327604276052760627607276082760927610276112761227613276142761527616276172761827619276202762127622276232762427625276262762727628276292763027631276322763327634276352763627637276382763927640276412764227643276442764527646276472764827649276502765127652276532765427655276562765727658276592766027661276622766327664276652766627667276682766927670276712767227673276742767527676276772767827679276802768127682276832768427685276862768727688276892769027691276922769327694276952769627697276982769927700277012770227703277042770527706277072770827709277102771127712277132771427715277162771727718277192772027721277222772327724277252772627727277282772927730277312773227733277342773527736277372773827739277402774127742277432774427745277462774727748277492775027751277522775327754277552775627757277582775927760277612776227763277642776527766277672776827769277702777127772277732777427775277762777727778277792778027781277822778327784277852778627787277882778927790277912779227793277942779527796277972779827799278002780127802278032780427805278062780727808278092781027811278122781327814278152781627817278182781927820278212782227823278242782527826278272782827829278302783127832278332783427835278362783727838278392784027841278422784327844278452784627847278482784927850278512785227853278542785527856278572785827859278602786127862278632786427865278662786727868278692787027871278722787327874278752787627877278782787927880278812788227883278842788527886278872788827889278902789127892278932789427895278962789727898278992790027901279022790327904279052790627907279082790927910279112791227913279142791527916279172791827919279202792127922279232792427925279262792727928279292793027931279322793327934279352793627937279382793927940279412794227943279442794527946279472794827949279502795127952279532795427955279562795727958279592796027961279622796327964279652796627967279682796927970279712797227973279742797527976279772797827979279802798127982279832798427985279862798727988279892799027991279922799327994279952799627997279982799928000280012800228003280042800528006280072800828009280102801128012280132801428015280162801728018280192802028021280222802328024280252802628027280282802928030280312803228033280342803528036280372803828039280402804128042280432804428045280462804728048280492805028051280522805328054280552805628057280582805928060280612806228063280642806528066280672806828069280702807128072280732807428075280762807728078280792808028081280822808328084280852808628087280882808928090280912809228093280942809528096280972809828099281002810128102281032810428105281062810728108281092811028111281122811328114281152811628117281182811928120281212812228123281242812528126281272812828129281302813128132281332813428135281362813728138281392814028141281422814328144281452814628147281482814928150281512815228153281542815528156281572815828159281602816128162281632816428165281662816728168281692817028171281722817328174281752817628177281782817928180281812818228183281842818528186281872818828189281902819128192281932819428195281962819728198281992820028201282022820328204282052820628207282082820928210282112821228213282142821528216282172821828219282202822128222282232822428225282262822728228282292823028231282322823328234282352823628237282382823928240282412824228243282442824528246282472824828249282502825128252282532825428255282562825728258282592826028261282622826328264282652826628267282682826928270282712827228273282742827528276282772827828279282802828128282282832828428285282862828728288282892829028291282922829328294282952829628297282982829928300283012830228303283042830528306283072830828309283102831128312283132831428315283162831728318283192832028321283222832328324283252832628327283282832928330283312833228333283342833528336283372833828339283402834128342283432834428345283462834728348283492835028351283522835328354283552835628357283582835928360283612836228363283642836528366283672836828369283702837128372283732837428375283762837728378283792838028381283822838328384283852838628387283882838928390283912839228393283942839528396283972839828399284002840128402284032840428405284062840728408284092841028411284122841328414284152841628417284182841928420284212842228423284242842528426284272842828429284302843128432284332843428435284362843728438284392844028441284422844328444284452844628447284482844928450284512845228453284542845528456284572845828459284602846128462284632846428465284662846728468284692847028471284722847328474284752847628477284782847928480284812848228483284842848528486284872848828489284902849128492284932849428495284962849728498284992850028501285022850328504285052850628507285082850928510285112851228513285142851528516285172851828519285202852128522285232852428525285262852728528285292853028531285322853328534285352853628537285382853928540285412854228543285442854528546285472854828549285502855128552285532855428555285562855728558285592856028561285622856328564285652856628567285682856928570285712857228573285742857528576285772857828579285802858128582285832858428585285862858728588285892859028591285922859328594285952859628597285982859928600286012860228603286042860528606286072860828609286102861128612286132861428615286162861728618286192862028621286222862328624286252862628627286282862928630286312863228633286342863528636286372863828639286402864128642286432864428645286462864728648286492865028651286522865328654286552865628657286582865928660286612866228663286642866528666286672866828669286702867128672286732867428675286762867728678286792868028681286822868328684286852868628687286882868928690286912869228693286942869528696286972869828699287002870128702287032870428705287062870728708287092871028711287122871328714287152871628717287182871928720287212872228723287242872528726287272872828729287302873128732287332873428735287362873728738287392874028741287422874328744287452874628747287482874928750287512875228753287542875528756287572875828759287602876128762287632876428765287662876728768287692877028771287722877328774287752877628777287782877928780287812878228783287842878528786287872878828789287902879128792287932879428795287962879728798287992880028801288022880328804288052880628807288082880928810288112881228813288142881528816288172881828819288202882128822288232882428825288262882728828288292883028831288322883328834288352883628837288382883928840288412884228843288442884528846288472884828849288502885128852288532885428855288562885728858288592886028861288622886328864288652886628867288682886928870288712887228873288742887528876288772887828879288802888128882288832888428885288862888728888288892889028891288922889328894288952889628897288982889928900289012890228903289042890528906289072890828909289102891128912289132891428915289162891728918289192892028921289222892328924289252892628927289282892928930289312893228933289342893528936289372893828939289402894128942289432894428945289462894728948289492895028951289522895328954289552895628957289582895928960289612896228963289642896528966289672896828969289702897128972289732897428975289762897728978289792898028981289822898328984289852898628987289882898928990289912899228993289942899528996289972899828999290002900129002290032900429005290062900729008290092901029011290122901329014290152901629017290182901929020290212902229023290242902529026290272902829029290302903129032290332903429035290362903729038290392904029041290422904329044290452904629047290482904929050290512905229053290542905529056290572905829059290602906129062290632906429065290662906729068290692907029071290722907329074290752907629077290782907929080290812908229083290842908529086290872908829089290902909129092290932909429095290962909729098290992910029101291022910329104291052910629107291082910929110291112911229113291142911529116291172911829119291202912129122291232912429125291262912729128291292913029131291322913329134291352913629137291382913929140291412914229143291442914529146291472914829149291502915129152291532915429155291562915729158291592916029161291622916329164291652916629167291682916929170291712917229173291742917529176291772917829179291802918129182291832918429185291862918729188291892919029191291922919329194291952919629197291982919929200292012920229203292042920529206292072920829209292102921129212292132921429215292162921729218292192922029221292222922329224292252922629227292282922929230292312923229233292342923529236292372923829239292402924129242292432924429245292462924729248292492925029251292522925329254292552925629257292582925929260292612926229263292642926529266292672926829269292702927129272292732927429275292762927729278292792928029281292822928329284292852928629287292882928929290292912929229293292942929529296292972929829299293002930129302293032930429305293062930729308293092931029311293122931329314293152931629317293182931929320293212932229323293242932529326293272932829329293302933129332293332933429335293362933729338293392934029341293422934329344293452934629347293482934929350293512935229353293542935529356293572935829359293602936129362293632936429365293662936729368293692937029371293722937329374293752937629377293782937929380293812938229383293842938529386293872938829389293902939129392293932939429395293962939729398293992940029401294022940329404294052940629407294082940929410294112941229413294142941529416294172941829419294202942129422294232942429425294262942729428294292943029431294322943329434294352943629437294382943929440294412944229443294442944529446294472944829449294502945129452294532945429455294562945729458294592946029461294622946329464294652946629467294682946929470294712947229473294742947529476294772947829479294802948129482294832948429485294862948729488294892949029491294922949329494294952949629497294982949929500295012950229503295042950529506295072950829509295102951129512295132951429515295162951729518295192952029521295222952329524295252952629527295282952929530295312953229533295342953529536295372953829539295402954129542295432954429545295462954729548295492955029551295522955329554295552955629557295582955929560295612956229563295642956529566295672956829569295702957129572295732957429575295762957729578295792958029581295822958329584295852958629587295882958929590295912959229593295942959529596295972959829599296002960129602296032960429605296062960729608296092961029611296122961329614296152961629617296182961929620296212962229623296242962529626296272962829629296302963129632296332963429635296362963729638296392964029641296422964329644296452964629647296482964929650296512965229653296542965529656296572965829659296602966129662296632966429665296662966729668296692967029671296722967329674296752967629677296782967929680296812968229683296842968529686296872968829689296902969129692296932969429695296962969729698296992970029701297022970329704297052970629707297082970929710297112971229713297142971529716297172971829719297202972129722297232972429725297262972729728297292973029731297322973329734297352973629737297382973929740297412974229743297442974529746297472974829749297502975129752297532975429755297562975729758297592976029761297622976329764297652976629767297682976929770297712977229773297742977529776297772977829779297802978129782297832978429785297862978729788297892979029791297922979329794297952979629797297982979929800298012980229803298042980529806298072980829809298102981129812298132981429815298162981729818298192982029821298222982329824298252982629827298282982929830298312983229833298342983529836298372983829839298402984129842298432984429845298462984729848298492985029851298522985329854298552985629857298582985929860298612986229863298642986529866298672986829869298702987129872298732987429875298762987729878298792988029881298822988329884298852988629887298882988929890298912989229893298942989529896298972989829899299002990129902299032990429905299062990729908299092991029911299122991329914299152991629917299182991929920299212992229923299242992529926299272992829929299302993129932299332993429935299362993729938299392994029941299422994329944299452994629947299482994929950299512995229953299542995529956299572995829959299602996129962299632996429965299662996729968299692997029971299722997329974299752997629977299782997929980299812998229983299842998529986299872998829989299902999129992299932999429995299962999729998299993000030001300023000330004300053000630007300083000930010300113001230013300143001530016300173001830019300203002130022300233002430025300263002730028300293003030031300323003330034300353003630037300383003930040300413004230043300443004530046300473004830049300503005130052300533005430055300563005730058300593006030061300623006330064300653006630067300683006930070300713007230073300743007530076300773007830079300803008130082300833008430085300863008730088300893009030091300923009330094300953009630097300983009930100301013010230103301043010530106301073010830109301103011130112301133011430115301163011730118301193012030121301223012330124301253012630127301283012930130301313013230133301343013530136301373013830139301403014130142301433014430145301463014730148301493015030151301523015330154301553015630157301583015930160301613016230163301643016530166301673016830169301703017130172301733017430175301763017730178301793018030181301823018330184301853018630187301883018930190301913019230193301943019530196301973019830199302003020130202302033020430205302063020730208302093021030211302123021330214302153021630217302183021930220302213022230223302243022530226302273022830229302303023130232302333023430235302363023730238302393024030241302423024330244302453024630247302483024930250302513025230253302543025530256302573025830259302603026130262302633026430265302663026730268302693027030271302723027330274302753027630277302783027930280302813028230283302843028530286302873028830289302903029130292302933029430295302963029730298302993030030301303023030330304303053030630307303083030930310303113031230313303143031530316303173031830319303203032130322303233032430325303263032730328303293033030331303323033330334303353033630337303383033930340303413034230343303443034530346303473034830349303503035130352303533035430355303563035730358303593036030361303623036330364303653036630367303683036930370303713037230373303743037530376303773037830379303803038130382303833038430385303863038730388303893039030391303923039330394303953039630397303983039930400304013040230403304043040530406304073040830409304103041130412304133041430415304163041730418304193042030421304223042330424304253042630427304283042930430304313043230433304343043530436304373043830439304403044130442304433044430445304463044730448304493045030451304523045330454304553045630457304583045930460304613046230463304643046530466304673046830469304703047130472304733047430475304763047730478304793048030481304823048330484304853048630487304883048930490304913049230493304943049530496304973049830499305003050130502305033050430505305063050730508305093051030511305123051330514305153051630517305183051930520305213052230523305243052530526305273052830529305303053130532305333053430535305363053730538305393054030541305423054330544305453054630547305483054930550305513055230553305543055530556305573055830559305603056130562305633056430565305663056730568305693057030571305723057330574305753057630577305783057930580305813058230583305843058530586305873058830589305903059130592305933059430595305963059730598305993060030601306023060330604306053060630607306083060930610306113061230613306143061530616306173061830619306203062130622306233062430625306263062730628306293063030631306323063330634306353063630637306383063930640306413064230643306443064530646306473064830649306503065130652306533065430655306563065730658306593066030661306623066330664306653066630667306683066930670306713067230673306743067530676306773067830679306803068130682306833068430685306863068730688306893069030691306923069330694306953069630697306983069930700307013070230703307043070530706307073070830709307103071130712307133071430715307163071730718307193072030721307223072330724307253072630727307283072930730307313073230733307343073530736307373073830739307403074130742307433074430745307463074730748307493075030751307523075330754307553075630757307583075930760307613076230763307643076530766307673076830769307703077130772307733077430775307763077730778307793078030781307823078330784307853078630787307883078930790307913079230793307943079530796307973079830799308003080130802308033080430805308063080730808308093081030811308123081330814308153081630817308183081930820308213082230823308243082530826308273082830829308303083130832308333083430835308363083730838308393084030841308423084330844308453084630847308483084930850308513085230853308543085530856308573085830859308603086130862308633086430865308663086730868308693087030871308723087330874308753087630877308783087930880308813088230883308843088530886308873088830889308903089130892308933089430895308963089730898308993090030901309023090330904309053090630907309083090930910309113091230913309143091530916309173091830919309203092130922309233092430925309263092730928309293093030931309323093330934309353093630937309383093930940309413094230943309443094530946309473094830949309503095130952309533095430955309563095730958309593096030961309623096330964309653096630967309683096930970309713097230973309743097530976309773097830979309803098130982309833098430985309863098730988309893099030991309923099330994309953099630997309983099931000310013100231003310043100531006310073100831009310103101131012310133101431015310163101731018310193102031021310223102331024310253102631027310283102931030310313103231033310343103531036310373103831039310403104131042310433104431045310463104731048310493105031051310523105331054310553105631057310583105931060310613106231063310643106531066310673106831069310703107131072310733107431075310763107731078310793108031081310823108331084310853108631087310883108931090310913109231093310943109531096310973109831099311003110131102311033110431105311063110731108311093111031111311123111331114311153111631117311183111931120311213112231123311243112531126311273112831129311303113131132311333113431135311363113731138311393114031141311423114331144311453114631147311483114931150311513115231153311543115531156311573115831159311603116131162311633116431165311663116731168311693117031171311723117331174311753117631177311783117931180311813118231183311843118531186311873118831189311903119131192311933119431195311963119731198311993120031201312023120331204312053120631207312083120931210312113121231213312143121531216312173121831219312203122131222312233122431225312263122731228312293123031231312323123331234312353123631237312383123931240312413124231243312443124531246312473124831249312503125131252312533125431255312563125731258312593126031261312623126331264312653126631267312683126931270312713127231273312743127531276312773127831279312803128131282312833128431285312863128731288312893129031291312923129331294312953129631297312983129931300313013130231303313043130531306313073130831309313103131131312313133131431315313163131731318313193132031321313223132331324313253132631327313283132931330313313133231333313343133531336313373133831339313403134131342313433134431345313463134731348313493135031351313523135331354313553135631357313583135931360313613136231363313643136531366313673136831369313703137131372313733137431375313763137731378313793138031381313823138331384313853138631387313883138931390313913139231393313943139531396313973139831399314003140131402314033140431405314063140731408314093141031411314123141331414314153141631417314183141931420314213142231423314243142531426314273142831429314303143131432314333143431435314363143731438314393144031441314423144331444314453144631447314483144931450314513145231453314543145531456314573145831459314603146131462314633146431465314663146731468314693147031471314723147331474314753147631477314783147931480314813148231483314843148531486314873148831489314903149131492314933149431495314963149731498314993150031501315023150331504315053150631507315083150931510315113151231513315143151531516315173151831519315203152131522315233152431525315263152731528315293153031531315323153331534315353153631537315383153931540315413154231543315443154531546315473154831549315503155131552315533155431555315563155731558315593156031561315623156331564315653156631567315683156931570315713157231573315743157531576315773157831579315803158131582315833158431585315863158731588315893159031591315923159331594315953159631597315983159931600316013160231603316043160531606316073160831609316103161131612316133161431615316163161731618316193162031621316223162331624316253162631627316283162931630316313163231633316343163531636316373163831639316403164131642316433164431645316463164731648316493165031651316523165331654316553165631657316583165931660316613166231663316643166531666316673166831669316703167131672316733167431675316763167731678316793168031681316823168331684316853168631687316883168931690316913169231693316943169531696316973169831699317003170131702317033170431705317063170731708317093171031711317123171331714317153171631717317183171931720317213172231723317243172531726317273172831729317303173131732317333173431735317363173731738317393174031741317423174331744317453174631747317483174931750317513175231753317543175531756317573175831759317603176131762317633176431765317663176731768317693177031771317723177331774317753177631777317783177931780317813178231783317843178531786317873178831789317903179131792317933179431795317963179731798317993180031801318023180331804318053180631807318083180931810318113181231813318143181531816318173181831819318203182131822318233182431825318263182731828318293183031831318323183331834318353183631837318383183931840318413184231843318443184531846318473184831849318503185131852318533185431855318563185731858318593186031861318623186331864318653186631867318683186931870318713187231873318743187531876318773187831879318803188131882318833188431885318863188731888318893189031891318923189331894318953189631897318983189931900319013190231903319043190531906319073190831909319103191131912319133191431915319163191731918319193192031921319223192331924319253192631927319283192931930319313193231933319343193531936319373193831939319403194131942319433194431945319463194731948319493195031951319523195331954319553195631957319583195931960319613196231963319643196531966319673196831969319703197131972319733197431975319763197731978319793198031981319823198331984319853198631987319883198931990319913199231993319943199531996319973199831999320003200132002320033200432005320063200732008320093201032011320123201332014320153201632017320183201932020320213202232023320243202532026320273202832029320303203132032320333203432035320363203732038320393204032041320423204332044320453204632047320483204932050320513205232053320543205532056320573205832059320603206132062320633206432065320663206732068320693207032071320723207332074320753207632077320783207932080320813208232083320843208532086320873208832089320903209132092320933209432095320963209732098320993210032101321023210332104321053210632107321083210932110321113211232113321143211532116321173211832119321203212132122321233212432125321263212732128321293213032131321323213332134321353213632137321383213932140321413214232143321443214532146321473214832149321503215132152321533215432155321563215732158321593216032161321623216332164321653216632167321683216932170321713217232173321743217532176321773217832179321803218132182321833218432185321863218732188321893219032191321923219332194321953219632197321983219932200322013220232203322043220532206322073220832209322103221132212322133221432215322163221732218322193222032221322223222332224322253222632227322283222932230322313223232233322343223532236322373223832239322403224132242322433224432245322463224732248322493225032251322523225332254322553225632257322583225932260322613226232263322643226532266322673226832269322703227132272322733227432275322763227732278322793228032281322823228332284322853228632287322883228932290322913229232293322943229532296322973229832299323003230132302323033230432305323063230732308323093231032311323123231332314323153231632317323183231932320323213232232323323243232532326323273232832329323303233132332323333233432335323363233732338323393234032341323423234332344323453234632347323483234932350323513235232353323543235532356323573235832359323603236132362323633236432365323663236732368323693237032371323723237332374323753237632377323783237932380323813238232383323843238532386323873238832389323903239132392323933239432395323963239732398323993240032401324023240332404324053240632407324083240932410324113241232413324143241532416324173241832419324203242132422324233242432425324263242732428324293243032431324323243332434324353243632437324383243932440324413244232443324443244532446324473244832449324503245132452324533245432455324563245732458324593246032461324623246332464324653246632467324683246932470324713247232473324743247532476324773247832479324803248132482324833248432485324863248732488324893249032491324923249332494324953249632497324983249932500325013250232503325043250532506325073250832509325103251132512325133251432515325163251732518325193252032521325223252332524325253252632527325283252932530325313253232533325343253532536325373253832539325403254132542325433254432545325463254732548325493255032551325523255332554325553255632557325583255932560325613256232563325643256532566325673256832569325703257132572325733257432575325763257732578325793258032581325823258332584325853258632587325883258932590325913259232593325943259532596325973259832599326003260132602326033260432605326063260732608326093261032611326123261332614326153261632617326183261932620326213262232623326243262532626326273262832629326303263132632326333263432635326363263732638326393264032641326423264332644326453264632647326483264932650326513265232653326543265532656326573265832659326603266132662326633266432665326663266732668326693267032671326723267332674326753267632677326783267932680326813268232683326843268532686326873268832689326903269132692326933269432695326963269732698326993270032701327023270332704327053270632707327083270932710327113271232713327143271532716327173271832719327203272132722327233272432725327263272732728327293273032731327323273332734327353273632737327383273932740327413274232743327443274532746327473274832749327503275132752327533275432755327563275732758327593276032761327623276332764327653276632767327683276932770327713277232773327743277532776327773277832779327803278132782327833278432785327863278732788327893279032791327923279332794327953279632797327983279932800328013280232803328043280532806328073280832809328103281132812328133281432815328163281732818328193282032821328223282332824328253282632827328283282932830328313283232833328343283532836328373283832839328403284132842328433284432845328463284732848328493285032851328523285332854328553285632857328583285932860328613286232863328643286532866328673286832869328703287132872328733287432875328763287732878328793288032881328823288332884328853288632887328883288932890328913289232893328943289532896328973289832899329003290132902329033290432905329063290732908329093291032911329123291332914329153291632917329183291932920329213292232923329243292532926329273292832929329303293132932329333293432935329363293732938329393294032941329423294332944329453294632947329483294932950329513295232953329543295532956329573295832959329603296132962329633296432965329663296732968329693297032971329723297332974329753297632977329783297932980329813298232983329843298532986329873298832989329903299132992329933299432995329963299732998329993300033001330023300333004330053300633007330083300933010330113301233013330143301533016330173301833019330203302133022330233302433025330263302733028330293303033031330323303333034330353303633037330383303933040330413304233043330443304533046330473304833049330503305133052330533305433055330563305733058330593306033061330623306333064330653306633067330683306933070330713307233073330743307533076330773307833079330803308133082330833308433085330863308733088330893309033091330923309333094330953309633097330983309933100331013310233103331043310533106331073310833109331103311133112331133311433115331163311733118331193312033121331223312333124331253312633127331283312933130331313313233133331343313533136331373313833139331403314133142331433314433145331463314733148331493315033151331523315333154331553315633157331583315933160331613316233163331643316533166331673316833169331703317133172331733317433175331763317733178331793318033181331823318333184331853318633187331883318933190331913319233193331943319533196331973319833199332003320133202332033320433205332063320733208332093321033211332123321333214332153321633217332183321933220332213322233223332243322533226332273322833229332303323133232332333323433235332363323733238332393324033241332423324333244332453324633247332483324933250332513325233253332543325533256332573325833259332603326133262332633326433265332663326733268332693327033271332723327333274332753327633277332783327933280332813328233283332843328533286332873328833289332903329133292332933329433295332963329733298332993330033301333023330333304333053330633307333083330933310333113331233313333143331533316333173331833319333203332133322333233332433325333263332733328333293333033331333323333333334333353333633337333383333933340333413334233343333443334533346333473334833349333503335133352333533335433355333563335733358333593336033361333623336333364333653336633367333683336933370333713337233373333743337533376333773337833379333803338133382333833338433385333863338733388333893339033391333923339333394333953339633397333983339933400334013340233403334043340533406334073340833409334103341133412334133341433415334163341733418334193342033421334223342333424334253342633427334283342933430334313343233433334343343533436334373343833439334403344133442334433344433445334463344733448334493345033451334523345333454334553345633457334583345933460334613346233463334643346533466334673346833469334703347133472334733347433475334763347733478334793348033481334823348333484334853348633487334883348933490334913349233493334943349533496334973349833499335003350133502335033350433505335063350733508335093351033511335123351333514335153351633517335183351933520335213352233523335243352533526335273352833529335303353133532335333353433535335363353733538335393354033541335423354333544335453354633547335483354933550335513355233553335543355533556335573355833559335603356133562335633356433565335663356733568335693357033571335723357333574335753357633577335783357933580335813358233583335843358533586335873358833589335903359133592335933359433595335963359733598335993360033601336023360333604336053360633607336083360933610336113361233613336143361533616336173361833619336203362133622336233362433625336263362733628336293363033631336323363333634336353363633637336383363933640336413364233643336443364533646336473364833649336503365133652336533365433655336563365733658336593366033661336623366333664336653366633667336683366933670336713367233673336743367533676336773367833679336803368133682336833368433685336863368733688336893369033691336923369333694336953369633697336983369933700337013370233703337043370533706337073370833709337103371133712337133371433715337163371733718337193372033721337223372333724337253372633727337283372933730337313373233733337343373533736337373373833739337403374133742337433374433745337463374733748337493375033751337523375333754337553375633757337583375933760337613376233763337643376533766337673376833769337703377133772337733377433775337763377733778337793378033781337823378333784337853378633787337883378933790337913379233793337943379533796337973379833799338003380133802338033380433805338063380733808338093381033811338123381333814338153381633817338183381933820338213382233823338243382533826338273382833829338303383133832338333383433835338363383733838338393384033841338423384333844338453384633847338483384933850338513385233853338543385533856338573385833859338603386133862338633386433865338663386733868338693387033871338723387333874338753387633877338783387933880338813388233883338843388533886338873388833889338903389133892338933389433895338963389733898338993390033901339023390333904339053390633907339083390933910339113391233913339143391533916339173391833919339203392133922339233392433925339263392733928339293393033931339323393333934339353393633937339383393933940339413394233943339443394533946339473394833949339503395133952339533395433955339563395733958339593396033961339623396333964339653396633967339683396933970339713397233973339743397533976339773397833979339803398133982339833398433985339863398733988339893399033991339923399333994339953399633997339983399934000340013400234003340043400534006340073400834009340103401134012340133401434015340163401734018340193402034021340223402334024340253402634027340283402934030340313403234033340343403534036340373403834039340403404134042340433404434045340463404734048340493405034051340523405334054340553405634057340583405934060340613406234063340643406534066340673406834069340703407134072340733407434075340763407734078340793408034081340823408334084340853408634087340883408934090340913409234093340943409534096340973409834099341003410134102341033410434105341063410734108341093411034111341123411334114341153411634117341183411934120341213412234123341243412534126341273412834129341303413134132341333413434135341363413734138341393414034141341423414334144341453414634147341483414934150341513415234153341543415534156341573415834159341603416134162341633416434165341663416734168341693417034171341723417334174341753417634177341783417934180341813418234183341843418534186341873418834189341903419134192341933419434195341963419734198341993420034201342023420334204342053420634207342083420934210342113421234213342143421534216342173421834219342203422134222342233422434225342263422734228342293423034231342323423334234342353423634237342383423934240342413424234243342443424534246342473424834249342503425134252342533425434255342563425734258342593426034261342623426334264342653426634267342683426934270342713427234273342743427534276342773427834279342803428134282342833428434285342863428734288342893429034291342923429334294342953429634297342983429934300343013430234303343043430534306343073430834309343103431134312343133431434315343163431734318343193432034321343223432334324343253432634327343283432934330343313433234333343343433534336343373433834339343403434134342343433434434345343463434734348343493435034351343523435334354343553435634357343583435934360343613436234363343643436534366343673436834369343703437134372343733437434375343763437734378343793438034381343823438334384343853438634387343883438934390343913439234393343943439534396343973439834399344003440134402344033440434405344063440734408344093441034411344123441334414344153441634417344183441934420344213442234423344243442534426344273442834429344303443134432344333443434435344363443734438344393444034441344423444334444344453444634447344483444934450344513445234453344543445534456344573445834459344603446134462344633446434465344663446734468344693447034471344723447334474344753447634477344783447934480344813448234483344843448534486344873448834489344903449134492344933449434495344963449734498344993450034501345023450334504345053450634507345083450934510345113451234513345143451534516345173451834519345203452134522345233452434525345263452734528345293453034531345323453334534345353453634537345383453934540345413454234543345443454534546345473454834549345503455134552345533455434555345563455734558345593456034561345623456334564345653456634567345683456934570345713457234573345743457534576345773457834579345803458134582345833458434585345863458734588345893459034591345923459334594345953459634597345983459934600346013460234603346043460534606346073460834609346103461134612346133461434615346163461734618346193462034621346223462334624346253462634627346283462934630346313463234633346343463534636346373463834639346403464134642346433464434645346463464734648346493465034651346523465334654346553465634657346583465934660346613466234663346643466534666346673466834669346703467134672346733467434675346763467734678346793468034681346823468334684346853468634687346883468934690346913469234693346943469534696346973469834699347003470134702347033470434705347063470734708347093471034711347123471334714347153471634717347183471934720347213472234723347243472534726347273472834729347303473134732347333473434735347363473734738347393474034741347423474334744347453474634747347483474934750347513475234753347543475534756347573475834759347603476134762347633476434765347663476734768347693477034771347723477334774347753477634777347783477934780347813478234783347843478534786347873478834789347903479134792347933479434795347963479734798347993480034801348023480334804348053480634807348083480934810348113481234813348143481534816348173481834819348203482134822348233482434825348263482734828348293483034831348323483334834348353483634837348383483934840348413484234843348443484534846348473484834849348503485134852348533485434855348563485734858348593486034861348623486334864348653486634867348683486934870348713487234873348743487534876348773487834879348803488134882348833488434885348863488734888348893489034891348923489334894348953489634897348983489934900349013490234903349043490534906349073490834909349103491134912349133491434915349163491734918349193492034921349223492334924349253492634927349283492934930349313493234933349343493534936349373493834939349403494134942349433494434945349463494734948349493495034951349523495334954349553495634957349583495934960349613496234963349643496534966349673496834969349703497134972349733497434975349763497734978349793498034981349823498334984349853498634987349883498934990349913499234993349943499534996349973499834999350003500135002350033500435005350063500735008350093501035011350123501335014350153501635017350183501935020350213502235023350243502535026350273502835029350303503135032350333503435035350363503735038350393504035041350423504335044350453504635047350483504935050350513505235053350543505535056350573505835059350603506135062350633506435065350663506735068350693507035071350723507335074350753507635077350783507935080350813508235083350843508535086350873508835089350903509135092350933509435095350963509735098350993510035101351023510335104351053510635107351083510935110351113511235113351143511535116351173511835119351203512135122351233512435125351263512735128351293513035131351323513335134351353513635137351383513935140351413514235143351443514535146351473514835149351503515135152351533515435155351563515735158351593516035161351623516335164351653516635167351683516935170351713517235173351743517535176351773517835179351803518135182351833518435185351863518735188351893519035191351923519335194351953519635197351983519935200352013520235203352043520535206352073520835209352103521135212352133521435215352163521735218352193522035221352223522335224352253522635227352283522935230352313523235233352343523535236352373523835239352403524135242352433524435245352463524735248352493525035251352523525335254352553525635257352583525935260352613526235263352643526535266352673526835269352703527135272352733527435275352763527735278352793528035281352823528335284352853528635287352883528935290352913529235293352943529535296352973529835299353003530135302353033530435305353063530735308353093531035311353123531335314353153531635317353183531935320353213532235323353243532535326353273532835329353303533135332353333533435335353363533735338353393534035341353423534335344353453534635347353483534935350353513535235353353543535535356353573535835359353603536135362353633536435365353663536735368353693537035371353723537335374353753537635377353783537935380353813538235383353843538535386353873538835389353903539135392353933539435395353963539735398353993540035401354023540335404354053540635407354083540935410354113541235413354143541535416354173541835419354203542135422354233542435425354263542735428354293543035431354323543335434354353543635437354383543935440354413544235443354443544535446354473544835449354503545135452354533545435455354563545735458354593546035461354623546335464354653546635467354683546935470354713547235473354743547535476354773547835479354803548135482354833548435485354863548735488354893549035491354923549335494354953549635497354983549935500355013550235503355043550535506355073550835509355103551135512355133551435515355163551735518355193552035521355223552335524355253552635527355283552935530355313553235533355343553535536355373553835539355403554135542355433554435545355463554735548355493555035551355523555335554355553555635557355583555935560355613556235563355643556535566355673556835569355703557135572355733557435575355763557735578355793558035581355823558335584355853558635587355883558935590355913559235593355943559535596355973559835599356003560135602356033560435605356063560735608356093561035611356123561335614356153561635617356183561935620356213562235623356243562535626356273562835629356303563135632356333563435635356363563735638356393564035641356423564335644356453564635647356483564935650356513565235653356543565535656356573565835659356603566135662356633566435665356663566735668356693567035671356723567335674356753567635677356783567935680356813568235683356843568535686356873568835689356903569135692356933569435695356963569735698356993570035701357023570335704357053570635707357083570935710357113571235713357143571535716357173571835719357203572135722357233572435725357263572735728357293573035731357323573335734357353573635737357383573935740357413574235743357443574535746357473574835749357503575135752357533575435755357563575735758357593576035761357623576335764357653576635767357683576935770357713577235773357743577535776357773577835779357803578135782357833578435785357863578735788357893579035791357923579335794357953579635797357983579935800358013580235803358043580535806358073580835809358103581135812358133581435815358163581735818358193582035821358223582335824358253582635827358283582935830358313583235833358343583535836358373583835839358403584135842358433584435845358463584735848358493585035851358523585335854358553585635857358583585935860358613586235863358643586535866358673586835869358703587135872358733587435875358763587735878358793588035881358823588335884358853588635887358883588935890358913589235893358943589535896358973589835899359003590135902359033590435905359063590735908359093591035911359123591335914359153591635917359183591935920359213592235923359243592535926359273592835929359303593135932359333593435935359363593735938359393594035941359423594335944359453594635947359483594935950359513595235953359543595535956359573595835959359603596135962359633596435965359663596735968359693597035971359723597335974359753597635977359783597935980359813598235983359843598535986359873598835989359903599135992359933599435995359963599735998359993600036001360023600336004360053600636007360083600936010360113601236013360143601536016360173601836019360203602136022360233602436025360263602736028360293603036031360323603336034360353603636037360383603936040360413604236043360443604536046360473604836049360503605136052360533605436055360563605736058360593606036061360623606336064360653606636067360683606936070360713607236073360743607536076360773607836079360803608136082360833608436085360863608736088360893609036091360923609336094360953609636097360983609936100361013610236103361043610536106361073610836109361103611136112361133611436115361163611736118361193612036121361223612336124361253612636127361283612936130361313613236133361343613536136361373613836139361403614136142361433614436145361463614736148361493615036151361523615336154361553615636157361583615936160361613616236163361643616536166361673616836169361703617136172361733617436175361763617736178361793618036181361823618336184361853618636187361883618936190361913619236193361943619536196361973619836199362003620136202362033620436205362063620736208362093621036211362123621336214362153621636217362183621936220362213622236223362243622536226362273622836229362303623136232362333623436235362363623736238362393624036241362423624336244362453624636247362483624936250362513625236253362543625536256362573625836259362603626136262362633626436265362663626736268362693627036271362723627336274362753627636277362783627936280362813628236283362843628536286362873628836289362903629136292362933629436295362963629736298362993630036301363023630336304363053630636307363083630936310363113631236313363143631536316363173631836319363203632136322363233632436325363263632736328363293633036331363323633336334363353633636337363383633936340363413634236343363443634536346363473634836349363503635136352363533635436355363563635736358363593636036361363623636336364363653636636367363683636936370363713637236373363743637536376363773637836379363803638136382363833638436385363863638736388363893639036391363923639336394363953639636397363983639936400364013640236403364043640536406364073640836409364103641136412364133641436415364163641736418364193642036421364223642336424364253642636427364283642936430364313643236433364343643536436364373643836439364403644136442364433644436445364463644736448364493645036451364523645336454364553645636457364583645936460364613646236463364643646536466364673646836469364703647136472364733647436475364763647736478364793648036481364823648336484364853648636487364883648936490364913649236493364943649536496364973649836499365003650136502365033650436505365063650736508365093651036511365123651336514365153651636517365183651936520365213652236523365243652536526365273652836529365303653136532365333653436535365363653736538365393654036541365423654336544365453654636547365483654936550365513655236553365543655536556365573655836559365603656136562365633656436565365663656736568365693657036571365723657336574365753657636577365783657936580365813658236583365843658536586365873658836589365903659136592365933659436595365963659736598365993660036601366023660336604366053660636607366083660936610366113661236613366143661536616366173661836619366203662136622366233662436625366263662736628366293663036631366323663336634366353663636637366383663936640366413664236643366443664536646366473664836649366503665136652366533665436655366563665736658366593666036661366623666336664366653666636667366683666936670366713667236673366743667536676366773667836679366803668136682366833668436685366863668736688366893669036691366923669336694366953669636697366983669936700367013670236703367043670536706367073670836709367103671136712367133671436715367163671736718367193672036721367223672336724367253672636727367283672936730367313673236733367343673536736367373673836739367403674136742367433674436745367463674736748367493675036751367523675336754367553675636757367583675936760367613676236763367643676536766367673676836769367703677136772367733677436775367763677736778367793678036781367823678336784367853678636787367883678936790367913679236793367943679536796367973679836799368003680136802368033680436805368063680736808368093681036811368123681336814368153681636817368183681936820368213682236823368243682536826368273682836829368303683136832368333683436835368363683736838368393684036841368423684336844368453684636847368483684936850368513685236853368543685536856368573685836859368603686136862368633686436865368663686736868368693687036871368723687336874368753687636877368783687936880368813688236883368843688536886368873688836889368903689136892368933689436895368963689736898368993690036901369023690336904369053690636907369083690936910369113691236913369143691536916369173691836919369203692136922369233692436925369263692736928369293693036931369323693336934369353693636937369383693936940369413694236943369443694536946369473694836949369503695136952369533695436955369563695736958369593696036961369623696336964369653696636967369683696936970369713697236973369743697536976369773697836979369803698136982369833698436985369863698736988369893699036991369923699336994369953699636997369983699937000370013700237003370043700537006370073700837009370103701137012370133701437015370163701737018370193702037021370223702337024370253702637027370283702937030370313703237033370343703537036370373703837039370403704137042370433704437045370463704737048370493705037051370523705337054370553705637057370583705937060370613706237063370643706537066370673706837069370703707137072370733707437075370763707737078370793708037081370823708337084370853708637087370883708937090370913709237093370943709537096370973709837099371003710137102371033710437105371063710737108371093711037111371123711337114371153711637117371183711937120371213712237123371243712537126371273712837129371303713137132371333713437135371363713737138371393714037141371423714337144371453714637147371483714937150371513715237153371543715537156371573715837159371603716137162371633716437165371663716737168371693717037171371723717337174371753717637177371783717937180371813718237183371843718537186371873718837189371903719137192371933719437195371963719737198371993720037201372023720337204372053720637207372083720937210372113721237213372143721537216372173721837219372203722137222372233722437225372263722737228372293723037231372323723337234372353723637237372383723937240372413724237243372443724537246372473724837249372503725137252372533725437255372563725737258372593726037261372623726337264372653726637267372683726937270372713727237273372743727537276372773727837279372803728137282372833728437285372863728737288372893729037291372923729337294372953729637297372983729937300373013730237303373043730537306373073730837309373103731137312373133731437315373163731737318373193732037321373223732337324373253732637327373283732937330373313733237333373343733537336373373733837339373403734137342373433734437345373463734737348373493735037351373523735337354373553735637357373583735937360373613736237363373643736537366373673736837369373703737137372373733737437375373763737737378373793738037381373823738337384373853738637387373883738937390373913739237393373943739537396373973739837399374003740137402374033740437405374063740737408374093741037411374123741337414374153741637417374183741937420374213742237423374243742537426374273742837429374303743137432374333743437435374363743737438374393744037441374423744337444374453744637447374483744937450374513745237453374543745537456374573745837459374603746137462374633746437465374663746737468374693747037471374723747337474374753747637477374783747937480374813748237483374843748537486374873748837489374903749137492374933749437495374963749737498374993750037501375023750337504375053750637507375083750937510375113751237513375143751537516375173751837519375203752137522375233752437525375263752737528375293753037531375323753337534375353753637537375383753937540375413754237543375443754537546375473754837549375503755137552375533755437555375563755737558375593756037561375623756337564375653756637567375683756937570375713757237573375743757537576375773757837579375803758137582375833758437585375863758737588375893759037591375923759337594375953759637597375983759937600376013760237603376043760537606376073760837609376103761137612376133761437615376163761737618376193762037621376223762337624376253762637627376283762937630376313763237633376343763537636376373763837639376403764137642376433764437645376463764737648376493765037651376523765337654376553765637657376583765937660376613766237663376643766537666376673766837669376703767137672376733767437675376763767737678376793768037681376823768337684376853768637687376883768937690376913769237693376943769537696376973769837699377003770137702377033770437705377063770737708377093771037711377123771337714377153771637717377183771937720377213772237723377243772537726377273772837729377303773137732377333773437735377363773737738377393774037741377423774337744377453774637747377483774937750377513775237753377543775537756377573775837759377603776137762377633776437765377663776737768377693777037771377723777337774377753777637777377783777937780377813778237783377843778537786377873778837789377903779137792377933779437795377963779737798377993780037801378023780337804378053780637807378083780937810378113781237813378143781537816378173781837819378203782137822378233782437825378263782737828378293783037831378323783337834378353783637837378383783937840378413784237843378443784537846378473784837849378503785137852378533785437855378563785737858378593786037861378623786337864378653786637867378683786937870378713787237873378743787537876378773787837879378803788137882378833788437885378863788737888378893789037891378923789337894378953789637897378983789937900379013790237903379043790537906379073790837909379103791137912379133791437915379163791737918379193792037921379223792337924379253792637927379283792937930379313793237933379343793537936379373793837939379403794137942379433794437945379463794737948379493795037951379523795337954379553795637957379583795937960379613796237963379643796537966379673796837969379703797137972379733797437975379763797737978379793798037981379823798337984379853798637987379883798937990379913799237993379943799537996379973799837999380003800138002380033800438005380063800738008380093801038011380123801338014380153801638017380183801938020380213802238023380243802538026380273802838029380303803138032380333803438035380363803738038380393804038041380423804338044380453804638047380483804938050380513805238053380543805538056380573805838059380603806138062380633806438065380663806738068380693807038071380723807338074380753807638077380783807938080380813808238083380843808538086380873808838089380903809138092380933809438095380963809738098380993810038101381023810338104381053810638107381083810938110381113811238113381143811538116381173811838119381203812138122381233812438125381263812738128381293813038131381323813338134381353813638137381383813938140381413814238143381443814538146381473814838149381503815138152381533815438155381563815738158381593816038161381623816338164381653816638167381683816938170381713817238173381743817538176381773817838179381803818138182381833818438185381863818738188381893819038191381923819338194381953819638197381983819938200382013820238203382043820538206382073820838209382103821138212382133821438215382163821738218382193822038221382223822338224382253822638227382283822938230382313823238233382343823538236382373823838239382403824138242382433824438245382463824738248382493825038251382523825338254382553825638257382583825938260382613826238263382643826538266382673826838269382703827138272382733827438275382763827738278382793828038281382823828338284382853828638287382883828938290382913829238293382943829538296382973829838299383003830138302383033830438305383063830738308383093831038311383123831338314383153831638317383183831938320383213832238323383243832538326383273832838329383303833138332383333833438335383363833738338383393834038341383423834338344383453834638347383483834938350383513835238353383543835538356383573835838359383603836138362383633836438365383663836738368383693837038371383723837338374383753837638377383783837938380383813838238383383843838538386383873838838389383903839138392383933839438395383963839738398383993840038401384023840338404384053840638407384083840938410384113841238413384143841538416384173841838419384203842138422384233842438425384263842738428384293843038431384323843338434384353843638437384383843938440384413844238443384443844538446384473844838449384503845138452384533845438455384563845738458384593846038461384623846338464384653846638467384683846938470384713847238473384743847538476384773847838479384803848138482384833848438485384863848738488384893849038491384923849338494384953849638497384983849938500385013850238503385043850538506385073850838509385103851138512385133851438515385163851738518385193852038521385223852338524385253852638527385283852938530385313853238533385343853538536385373853838539385403854138542385433854438545385463854738548385493855038551385523855338554385553855638557385583855938560385613856238563385643856538566385673856838569385703857138572385733857438575385763857738578385793858038581385823858338584385853858638587385883858938590385913859238593385943859538596385973859838599386003860138602386033860438605386063860738608386093861038611386123861338614386153861638617386183861938620386213862238623386243862538626386273862838629386303863138632386333863438635386363863738638386393864038641386423864338644386453864638647386483864938650386513865238653386543865538656386573865838659386603866138662386633866438665386663866738668386693867038671386723867338674386753867638677386783867938680386813868238683386843868538686386873868838689386903869138692386933869438695386963869738698386993870038701387023870338704387053870638707387083870938710387113871238713387143871538716387173871838719387203872138722387233872438725387263872738728387293873038731387323873338734387353873638737387383873938740387413874238743387443874538746387473874838749387503875138752387533875438755387563875738758387593876038761387623876338764387653876638767387683876938770387713877238773387743877538776387773877838779387803878138782387833878438785387863878738788387893879038791387923879338794387953879638797387983879938800388013880238803388043880538806388073880838809388103881138812388133881438815388163881738818388193882038821388223882338824388253882638827388283882938830388313883238833388343883538836388373883838839388403884138842388433884438845388463884738848388493885038851388523885338854388553885638857388583885938860388613886238863388643886538866388673886838869388703887138872388733887438875388763887738878388793888038881388823888338884388853888638887388883888938890388913889238893388943889538896388973889838899389003890138902389033890438905389063890738908389093891038911389123891338914389153891638917389183891938920389213892238923389243892538926389273892838929389303893138932389333893438935389363893738938389393894038941389423894338944389453894638947389483894938950389513895238953389543895538956389573895838959389603896138962389633896438965389663896738968389693897038971389723897338974389753897638977389783897938980389813898238983389843898538986389873898838989389903899138992389933899438995389963899738998389993900039001390023900339004390053900639007390083900939010390113901239013390143901539016390173901839019390203902139022390233902439025390263902739028390293903039031390323903339034390353903639037390383903939040390413904239043390443904539046390473904839049390503905139052390533905439055390563905739058390593906039061390623906339064390653906639067390683906939070390713907239073390743907539076390773907839079390803908139082390833908439085390863908739088390893909039091390923909339094390953909639097390983909939100391013910239103391043910539106391073910839109391103911139112391133911439115391163911739118391193912039121391223912339124391253912639127391283912939130391313913239133391343913539136391373913839139391403914139142391433914439145391463914739148391493915039151391523915339154391553915639157391583915939160391613916239163391643916539166391673916839169391703917139172391733917439175391763917739178391793918039181391823918339184391853918639187391883918939190391913919239193391943919539196391973919839199392003920139202392033920439205392063920739208392093921039211392123921339214392153921639217392183921939220392213922239223392243922539226392273922839229392303923139232392333923439235392363923739238392393924039241392423924339244392453924639247392483924939250392513925239253392543925539256392573925839259392603926139262392633926439265392663926739268392693927039271392723927339274392753927639277392783927939280392813928239283392843928539286392873928839289392903929139292392933929439295392963929739298392993930039301393023930339304393053930639307393083930939310393113931239313393143931539316393173931839319393203932139322393233932439325393263932739328393293933039331393323933339334393353933639337393383933939340393413934239343393443934539346393473934839349393503935139352393533935439355393563935739358393593936039361393623936339364393653936639367393683936939370393713937239373393743937539376393773937839379393803938139382393833938439385393863938739388393893939039391393923939339394393953939639397393983939939400394013940239403394043940539406394073940839409394103941139412394133941439415394163941739418394193942039421394223942339424394253942639427394283942939430394313943239433394343943539436394373943839439394403944139442394433944439445394463944739448394493945039451394523945339454394553945639457394583945939460394613946239463394643946539466394673946839469394703947139472394733947439475394763947739478394793948039481394823948339484394853948639487394883948939490394913949239493394943949539496394973949839499395003950139502395033950439505395063950739508395093951039511395123951339514395153951639517395183951939520395213952239523395243952539526395273952839529395303953139532395333953439535395363953739538395393954039541395423954339544395453954639547395483954939550395513955239553395543955539556395573955839559395603956139562395633956439565395663956739568395693957039571395723957339574395753957639577395783957939580395813958239583395843958539586395873958839589395903959139592395933959439595395963959739598395993960039601396023960339604396053960639607396083960939610396113961239613396143961539616396173961839619396203962139622396233962439625396263962739628396293963039631396323963339634396353963639637396383963939640396413964239643396443964539646396473964839649396503965139652396533965439655396563965739658396593966039661396623966339664396653966639667396683966939670396713967239673396743967539676396773967839679396803968139682396833968439685396863968739688396893969039691396923969339694396953969639697396983969939700397013970239703397043970539706397073970839709397103971139712397133971439715397163971739718397193972039721397223972339724397253972639727397283972939730397313973239733397343973539736397373973839739397403974139742397433974439745397463974739748397493975039751397523975339754397553975639757397583975939760397613976239763397643976539766397673976839769397703977139772397733977439775397763977739778397793978039781397823978339784397853978639787397883978939790397913979239793397943979539796397973979839799398003980139802398033980439805398063980739808398093981039811398123981339814398153981639817398183981939820398213982239823398243982539826398273982839829398303983139832398333983439835398363983739838398393984039841398423984339844398453984639847398483984939850398513985239853398543985539856398573985839859398603986139862398633986439865398663986739868398693987039871398723987339874398753987639877398783987939880398813988239883398843988539886398873988839889398903989139892398933989439895398963989739898398993990039901399023990339904399053990639907399083990939910399113991239913399143991539916399173991839919399203992139922399233992439925399263992739928399293993039931399323993339934399353993639937399383993939940399413994239943399443994539946399473994839949399503995139952399533995439955399563995739958399593996039961399623996339964399653996639967399683996939970399713997239973399743997539976399773997839979399803998139982399833998439985399863998739988399893999039991399923999339994399953999639997399983999940000400014000240003400044000540006400074000840009400104001140012400134001440015400164001740018400194002040021400224002340024400254002640027400284002940030400314003240033400344003540036400374003840039400404004140042400434004440045400464004740048400494005040051400524005340054400554005640057400584005940060400614006240063400644006540066400674006840069400704007140072400734007440075400764007740078400794008040081400824008340084400854008640087400884008940090400914009240093400944009540096400974009840099401004010140102401034010440105401064010740108401094011040111401124011340114401154011640117401184011940120401214012240123401244012540126401274012840129401304013140132401334013440135401364013740138401394014040141401424014340144401454014640147401484014940150401514015240153401544015540156401574015840159401604016140162401634016440165401664016740168401694017040171401724017340174401754017640177401784017940180401814018240183401844018540186401874018840189401904019140192401934019440195401964019740198401994020040201402024020340204402054020640207402084020940210402114021240213402144021540216402174021840219402204022140222402234022440225402264022740228402294023040231402324023340234402354023640237402384023940240402414024240243402444024540246402474024840249402504025140252402534025440255402564025740258402594026040261402624026340264402654026640267402684026940270402714027240273402744027540276402774027840279402804028140282402834028440285402864028740288402894029040291402924029340294402954029640297402984029940300403014030240303403044030540306403074030840309403104031140312403134031440315403164031740318403194032040321403224032340324403254032640327403284032940330403314033240333403344033540336403374033840339403404034140342403434034440345403464034740348403494035040351403524035340354403554035640357403584035940360403614036240363403644036540366403674036840369403704037140372403734037440375403764037740378403794038040381403824038340384403854038640387403884038940390403914039240393403944039540396403974039840399404004040140402404034040440405404064040740408404094041040411404124041340414404154041640417404184041940420404214042240423404244042540426404274042840429404304043140432404334043440435404364043740438404394044040441404424044340444404454044640447404484044940450404514045240453404544045540456404574045840459404604046140462404634046440465404664046740468404694047040471404724047340474404754047640477404784047940480404814048240483404844048540486404874048840489404904049140492404934049440495404964049740498404994050040501405024050340504405054050640507405084050940510405114051240513405144051540516405174051840519405204052140522405234052440525405264052740528405294053040531405324053340534405354053640537405384053940540405414054240543405444054540546405474054840549405504055140552405534055440555405564055740558405594056040561405624056340564405654056640567405684056940570405714057240573405744057540576405774057840579405804058140582405834058440585405864058740588405894059040591405924059340594405954059640597405984059940600406014060240603406044060540606406074060840609406104061140612406134061440615406164061740618406194062040621406224062340624406254062640627406284062940630406314063240633406344063540636406374063840639406404064140642406434064440645406464064740648406494065040651406524065340654406554065640657406584065940660406614066240663406644066540666406674066840669406704067140672406734067440675406764067740678406794068040681406824068340684406854068640687406884068940690406914069240693406944069540696406974069840699407004070140702407034070440705407064070740708407094071040711407124071340714407154071640717407184071940720407214072240723407244072540726407274072840729407304073140732407334073440735407364073740738407394074040741407424074340744407454074640747407484074940750407514075240753407544075540756407574075840759407604076140762407634076440765407664076740768407694077040771407724077340774407754077640777407784077940780407814078240783407844078540786407874078840789407904079140792407934079440795407964079740798407994080040801408024080340804408054080640807408084080940810408114081240813408144081540816408174081840819408204082140822408234082440825408264082740828408294083040831408324083340834408354083640837408384083940840408414084240843408444084540846408474084840849408504085140852408534085440855408564085740858408594086040861408624086340864408654086640867408684086940870408714087240873408744087540876408774087840879408804088140882408834088440885408864088740888408894089040891408924089340894408954089640897408984089940900409014090240903409044090540906409074090840909409104091140912409134091440915409164091740918409194092040921409224092340924409254092640927409284092940930409314093240933409344093540936409374093840939409404094140942409434094440945409464094740948409494095040951409524095340954409554095640957409584095940960409614096240963409644096540966409674096840969409704097140972409734097440975409764097740978409794098040981409824098340984409854098640987409884098940990409914099240993409944099540996409974099840999410004100141002410034100441005410064100741008410094101041011410124101341014410154101641017410184101941020410214102241023410244102541026410274102841029410304103141032410334103441035410364103741038410394104041041410424104341044410454104641047410484104941050410514105241053410544105541056410574105841059410604106141062410634106441065410664106741068410694107041071410724107341074410754107641077410784107941080410814108241083410844108541086410874108841089410904109141092410934109441095410964109741098410994110041101411024110341104411054110641107411084110941110411114111241113411144111541116411174111841119411204112141122411234112441125411264112741128411294113041131411324113341134411354113641137411384113941140411414114241143411444114541146411474114841149411504115141152411534115441155411564115741158411594116041161411624116341164411654116641167411684116941170411714117241173411744117541176411774117841179411804118141182411834118441185411864118741188411894119041191411924119341194411954119641197411984119941200412014120241203412044120541206412074120841209412104121141212412134121441215412164121741218412194122041221412224122341224412254122641227412284122941230412314123241233412344123541236412374123841239412404124141242412434124441245412464124741248412494125041251412524125341254412554125641257412584125941260412614126241263412644126541266412674126841269412704127141272412734127441275412764127741278412794128041281412824128341284412854128641287412884128941290412914129241293412944129541296412974129841299413004130141302413034130441305413064130741308413094131041311413124131341314413154131641317413184131941320413214132241323413244132541326413274132841329413304133141332413334133441335413364133741338413394134041341413424134341344413454134641347413484134941350413514135241353413544135541356413574135841359413604136141362413634136441365413664136741368413694137041371413724137341374413754137641377413784137941380413814138241383413844138541386413874138841389413904139141392413934139441395413964139741398413994140041401414024140341404414054140641407414084140941410414114141241413414144141541416414174141841419414204142141422414234142441425414264142741428414294143041431414324143341434414354143641437414384143941440414414144241443414444144541446414474144841449414504145141452414534145441455414564145741458414594146041461414624146341464414654146641467414684146941470414714147241473414744147541476414774147841479414804148141482414834148441485414864148741488414894149041491414924149341494414954149641497414984149941500415014150241503415044150541506415074150841509415104151141512415134151441515415164151741518415194152041521415224152341524415254152641527415284152941530415314153241533415344153541536415374153841539415404154141542415434154441545415464154741548415494155041551415524155341554415554155641557415584155941560415614156241563415644156541566415674156841569415704157141572415734157441575415764157741578415794158041581415824158341584415854158641587415884158941590415914159241593415944159541596415974159841599416004160141602416034160441605416064160741608416094161041611416124161341614416154161641617416184161941620416214162241623416244162541626416274162841629416304163141632416334163441635416364163741638416394164041641416424164341644416454164641647416484164941650416514165241653416544165541656416574165841659416604166141662416634166441665416664166741668416694167041671416724167341674416754167641677416784167941680416814168241683416844168541686416874168841689416904169141692416934169441695416964169741698416994170041701417024170341704417054170641707417084170941710417114171241713417144171541716417174171841719417204172141722417234172441725417264172741728417294173041731417324173341734417354173641737417384173941740417414174241743417444174541746417474174841749417504175141752417534175441755417564175741758417594176041761417624176341764417654176641767417684176941770417714177241773417744177541776417774177841779417804178141782417834178441785417864178741788417894179041791417924179341794417954179641797417984179941800418014180241803418044180541806418074180841809418104181141812418134181441815418164181741818418194182041821418224182341824418254182641827418284182941830418314183241833418344183541836418374183841839418404184141842418434184441845418464184741848418494185041851418524185341854418554185641857418584185941860418614186241863418644186541866418674186841869418704187141872418734187441875418764187741878418794188041881418824188341884418854188641887418884188941890418914189241893418944189541896418974189841899419004190141902419034190441905419064190741908419094191041911419124191341914419154191641917419184191941920419214192241923419244192541926419274192841929419304193141932419334193441935419364193741938419394194041941419424194341944419454194641947419484194941950419514195241953419544195541956419574195841959419604196141962419634196441965419664196741968419694197041971419724197341974419754197641977419784197941980419814198241983419844198541986419874198841989419904199141992419934199441995419964199741998419994200042001420024200342004420054200642007420084200942010420114201242013420144201542016420174201842019420204202142022420234202442025420264202742028420294203042031420324203342034420354203642037420384203942040420414204242043420444204542046420474204842049420504205142052420534205442055420564205742058420594206042061420624206342064420654206642067420684206942070420714207242073420744207542076420774207842079420804208142082420834208442085420864208742088420894209042091420924209342094420954209642097420984209942100421014210242103421044210542106421074210842109421104211142112421134211442115421164211742118421194212042121421224212342124421254212642127421284212942130421314213242133421344213542136421374213842139421404214142142421434214442145421464214742148421494215042151421524215342154421554215642157421584215942160421614216242163421644216542166421674216842169421704217142172421734217442175421764217742178421794218042181421824218342184421854218642187421884218942190421914219242193421944219542196421974219842199422004220142202422034220442205422064220742208422094221042211422124221342214422154221642217422184221942220422214222242223422244222542226422274222842229422304223142232422334223442235422364223742238422394224042241422424224342244422454224642247422484224942250422514225242253422544225542256422574225842259422604226142262422634226442265422664226742268422694227042271422724227342274422754227642277422784227942280422814228242283422844228542286422874228842289422904229142292422934229442295422964229742298422994230042301423024230342304423054230642307423084230942310423114231242313423144231542316423174231842319423204232142322423234232442325423264232742328423294233042331423324233342334423354233642337423384233942340423414234242343423444234542346423474234842349423504235142352423534235442355423564235742358423594236042361423624236342364423654236642367423684236942370423714237242373423744237542376423774237842379423804238142382423834238442385423864238742388423894239042391423924239342394423954239642397423984239942400424014240242403424044240542406424074240842409424104241142412424134241442415424164241742418424194242042421424224242342424424254242642427424284242942430424314243242433424344243542436424374243842439424404244142442424434244442445424464244742448424494245042451424524245342454424554245642457424584245942460424614246242463424644246542466424674246842469424704247142472424734247442475424764247742478424794248042481424824248342484424854248642487424884248942490424914249242493424944249542496424974249842499425004250142502425034250442505425064250742508425094251042511425124251342514425154251642517425184251942520425214252242523425244252542526425274252842529425304253142532425334253442535425364253742538425394254042541425424254342544425454254642547425484254942550425514255242553425544255542556425574255842559425604256142562425634256442565425664256742568425694257042571425724257342574425754257642577425784257942580425814258242583425844258542586425874258842589425904259142592425934259442595425964259742598425994260042601426024260342604426054260642607426084260942610426114261242613426144261542616426174261842619426204262142622426234262442625426264262742628426294263042631426324263342634426354263642637426384263942640426414264242643426444264542646426474264842649426504265142652426534265442655426564265742658426594266042661426624266342664426654266642667426684266942670426714267242673426744267542676426774267842679426804268142682426834268442685426864268742688426894269042691426924269342694426954269642697426984269942700427014270242703427044270542706427074270842709427104271142712427134271442715427164271742718427194272042721427224272342724427254272642727427284272942730427314273242733427344273542736427374273842739427404274142742427434274442745427464274742748427494275042751427524275342754427554275642757427584275942760427614276242763427644276542766427674276842769427704277142772427734277442775427764277742778427794278042781427824278342784427854278642787427884278942790427914279242793427944279542796427974279842799428004280142802428034280442805428064280742808428094281042811428124281342814428154281642817428184281942820428214282242823428244282542826428274282842829428304283142832428334283442835428364283742838428394284042841428424284342844428454284642847428484284942850428514285242853428544285542856428574285842859428604286142862428634286442865428664286742868428694287042871428724287342874428754287642877428784287942880428814288242883428844288542886428874288842889428904289142892428934289442895428964289742898428994290042901429024290342904429054290642907429084290942910429114291242913429144291542916429174291842919429204292142922429234292442925429264292742928429294293042931429324293342934429354293642937429384293942940429414294242943429444294542946429474294842949429504295142952429534295442955429564295742958429594296042961429624296342964429654296642967429684296942970429714297242973429744297542976429774297842979429804298142982429834298442985429864298742988429894299042991429924299342994429954299642997429984299943000430014300243003430044300543006430074300843009430104301143012430134301443015430164301743018430194302043021430224302343024430254302643027430284302943030430314303243033430344303543036430374303843039430404304143042430434304443045430464304743048430494305043051430524305343054430554305643057430584305943060430614306243063430644306543066430674306843069430704307143072430734307443075430764307743078430794308043081430824308343084430854308643087430884308943090430914309243093430944309543096430974309843099431004310143102431034310443105431064310743108431094311043111431124311343114431154311643117431184311943120431214312243123431244312543126431274312843129431304313143132431334313443135431364313743138431394314043141431424314343144431454314643147431484314943150431514315243153431544315543156431574315843159431604316143162431634316443165431664316743168431694317043171431724317343174431754317643177431784317943180431814318243183431844318543186431874318843189431904319143192431934319443195431964319743198431994320043201432024320343204432054320643207432084320943210432114321243213432144321543216432174321843219432204322143222432234322443225432264322743228432294323043231432324323343234432354323643237432384323943240432414324243243432444324543246432474324843249432504325143252432534325443255432564325743258432594326043261432624326343264432654326643267432684326943270432714327243273432744327543276432774327843279432804328143282432834328443285432864328743288432894329043291432924329343294432954329643297432984329943300433014330243303433044330543306433074330843309433104331143312433134331443315433164331743318433194332043321433224332343324433254332643327433284332943330433314333243333433344333543336433374333843339433404334143342433434334443345433464334743348433494335043351433524335343354433554335643357433584335943360433614336243363433644336543366433674336843369433704337143372433734337443375433764337743378433794338043381433824338343384433854338643387433884338943390433914339243393433944339543396433974339843399434004340143402434034340443405434064340743408434094341043411434124341343414434154341643417434184341943420434214342243423434244342543426434274342843429434304343143432434334343443435434364343743438434394344043441434424344343444434454344643447434484344943450434514345243453434544345543456434574345843459434604346143462434634346443465434664346743468434694347043471434724347343474434754347643477434784347943480434814348243483434844348543486434874348843489434904349143492434934349443495434964349743498434994350043501435024350343504435054350643507435084350943510435114351243513435144351543516435174351843519435204352143522435234352443525435264352743528435294353043531435324353343534435354353643537435384353943540435414354243543435444354543546435474354843549435504355143552435534355443555435564355743558435594356043561435624356343564435654356643567435684356943570435714357243573435744357543576435774357843579435804358143582435834358443585435864358743588435894359043591435924359343594435954359643597435984359943600436014360243603436044360543606436074360843609436104361143612436134361443615436164361743618436194362043621436224362343624436254362643627436284362943630436314363243633436344363543636436374363843639436404364143642436434364443645436464364743648436494365043651436524365343654436554365643657436584365943660436614366243663436644366543666436674366843669436704367143672436734367443675436764367743678436794368043681436824368343684436854368643687436884368943690436914369243693436944369543696436974369843699437004370143702437034370443705437064370743708437094371043711437124371343714437154371643717437184371943720437214372243723437244372543726437274372843729437304373143732437334373443735437364373743738437394374043741437424374343744437454374643747437484374943750437514375243753437544375543756437574375843759437604376143762437634376443765437664376743768437694377043771437724377343774437754377643777437784377943780437814378243783437844378543786437874378843789437904379143792437934379443795437964379743798437994380043801438024380343804438054380643807438084380943810438114381243813438144381543816438174381843819438204382143822438234382443825438264382743828438294383043831438324383343834438354383643837438384383943840438414384243843438444384543846438474384843849438504385143852438534385443855438564385743858438594386043861438624386343864438654386643867438684386943870438714387243873438744387543876438774387843879438804388143882438834388443885438864388743888438894389043891438924389343894438954389643897438984389943900439014390243903439044390543906439074390843909439104391143912439134391443915439164391743918439194392043921439224392343924439254392643927439284392943930439314393243933439344393543936439374393843939439404394143942439434394443945439464394743948439494395043951439524395343954439554395643957439584395943960439614396243963439644396543966439674396843969439704397143972439734397443975439764397743978439794398043981439824398343984439854398643987439884398943990439914399243993439944399543996439974399843999440004400144002440034400444005440064400744008440094401044011440124401344014440154401644017440184401944020440214402244023440244402544026440274402844029440304403144032440334403444035440364403744038440394404044041440424404344044440454404644047440484404944050440514405244053440544405544056440574405844059440604406144062440634406444065440664406744068440694407044071440724407344074440754407644077440784407944080440814408244083440844408544086440874408844089440904409144092440934409444095440964409744098440994410044101441024410344104441054410644107441084410944110441114411244113441144411544116441174411844119441204412144122441234412444125441264412744128441294413044131441324413344134441354413644137441384413944140441414414244143441444414544146441474414844149441504415144152441534415444155441564415744158441594416044161441624416344164441654416644167441684416944170441714417244173441744417544176441774417844179441804418144182441834418444185441864418744188441894419044191441924419344194441954419644197441984419944200442014420244203442044420544206442074420844209442104421144212442134421444215442164421744218442194422044221442224422344224442254422644227442284422944230442314423244233442344423544236442374423844239442404424144242442434424444245442464424744248442494425044251442524425344254442554425644257442584425944260442614426244263442644426544266442674426844269442704427144272442734427444275442764427744278442794428044281442824428344284442854428644287442884428944290442914429244293442944429544296442974429844299443004430144302443034430444305443064430744308443094431044311443124431344314443154431644317443184431944320443214432244323443244432544326443274432844329443304433144332443334433444335443364433744338443394434044341443424434344344443454434644347443484434944350443514435244353443544435544356443574435844359443604436144362443634436444365443664436744368443694437044371443724437344374443754437644377443784437944380443814438244383443844438544386443874438844389443904439144392443934439444395443964439744398443994440044401444024440344404444054440644407444084440944410444114441244413444144441544416444174441844419444204442144422444234442444425444264442744428444294443044431444324443344434444354443644437444384443944440444414444244443444444444544446444474444844449444504445144452444534445444455444564445744458444594446044461444624446344464444654446644467444684446944470444714447244473444744447544476444774447844479444804448144482444834448444485444864448744488444894449044491444924449344494444954449644497444984449944500445014450244503445044450544506445074450844509445104451144512445134451444515445164451744518445194452044521445224452344524445254452644527445284452944530445314453244533445344453544536445374453844539445404454144542445434454444545445464454744548445494455044551445524455344554445554455644557445584455944560445614456244563445644456544566445674456844569445704457144572445734457444575445764457744578445794458044581445824458344584445854458644587445884458944590445914459244593445944459544596445974459844599446004460144602446034460444605446064460744608446094461044611446124461344614446154461644617446184461944620446214462244623446244462544626446274462844629446304463144632446334463444635446364463744638446394464044641446424464344644446454464644647446484464944650446514465244653446544465544656446574465844659446604466144662446634466444665446664466744668446694467044671446724467344674446754467644677446784467944680446814468244683446844468544686446874468844689446904469144692446934469444695
  1. /* sp_x86_64_asm
  2. *
  3. * Copyright (C) 2006-2020 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. #ifndef HAVE_INTEL_AVX1
  22. #define HAVE_INTEL_AVX1
  23. #endif /* HAVE_INTEL_AVX1 */
  24. #ifndef NO_AVX2_SUPPORT
  25. #define HAVE_INTEL_AVX2
  26. #endif /* NO_AVX2_SUPPORT */
  27. #ifndef WOLFSSL_SP_NO_2048
  28. #ifndef WOLFSSL_SP_NO_2048
  29. /* Read big endian unsigned byte array into r.
  30. * Uses the bswap instruction.
  31. *
  32. * r A single precision integer.
  33. * size Maximum number of bytes to convert
  34. * a Byte array.
  35. * n Number of bytes in array to read.
  36. */
  37. #ifndef __APPLE__
  38. .text
  39. .globl sp_2048_from_bin_bswap
  40. .type sp_2048_from_bin_bswap,@function
  41. .align 16
  42. sp_2048_from_bin_bswap:
  43. #else
  44. .section __TEXT,__text
  45. .globl _sp_2048_from_bin_bswap
  46. .p2align 4
  47. _sp_2048_from_bin_bswap:
  48. #endif /* __APPLE__ */
  49. movq %rdx, %r9
  50. movq %rdi, %r10
  51. addq %rcx, %r9
  52. addq $0x100, %r10
  53. xorq %r11, %r11
  54. jmp L_2048_from_bin_bswap_64_end
  55. L_2048_from_bin_bswap_64_start:
  56. subq $0x40, %r9
  57. movq 56(%r9), %rax
  58. movq 48(%r9), %r8
  59. bswapq %rax
  60. bswapq %r8
  61. movq %rax, (%rdi)
  62. movq %r8, 8(%rdi)
  63. movq 40(%r9), %rax
  64. movq 32(%r9), %r8
  65. bswapq %rax
  66. bswapq %r8
  67. movq %rax, 16(%rdi)
  68. movq %r8, 24(%rdi)
  69. movq 24(%r9), %rax
  70. movq 16(%r9), %r8
  71. bswapq %rax
  72. bswapq %r8
  73. movq %rax, 32(%rdi)
  74. movq %r8, 40(%rdi)
  75. movq 8(%r9), %rax
  76. movq (%r9), %r8
  77. bswapq %rax
  78. bswapq %r8
  79. movq %rax, 48(%rdi)
  80. movq %r8, 56(%rdi)
  81. addq $0x40, %rdi
  82. subq $0x40, %rcx
  83. L_2048_from_bin_bswap_64_end:
  84. cmpq $63, %rcx
  85. jg L_2048_from_bin_bswap_64_start
  86. jmp L_2048_from_bin_bswap_8_end
  87. L_2048_from_bin_bswap_8_start:
  88. subq $8, %r9
  89. movq (%r9), %rax
  90. bswapq %rax
  91. movq %rax, (%rdi)
  92. addq $8, %rdi
  93. subq $8, %rcx
  94. L_2048_from_bin_bswap_8_end:
  95. cmpq $7, %rcx
  96. jg L_2048_from_bin_bswap_8_start
  97. cmpq %r11, %rcx
  98. je L_2048_from_bin_bswap_hi_end
  99. movq %r11, %r8
  100. movq %r11, %rax
  101. L_2048_from_bin_bswap_hi_start:
  102. movb (%rdx), %al
  103. shlq $8, %r8
  104. incq %rdx
  105. addq %rax, %r8
  106. decq %rcx
  107. jg L_2048_from_bin_bswap_hi_start
  108. movq %r8, (%rdi)
  109. addq $8, %rdi
  110. L_2048_from_bin_bswap_hi_end:
  111. cmpq %r10, %rdi
  112. je L_2048_from_bin_bswap_zero_end
  113. L_2048_from_bin_bswap_zero_start:
  114. movq %r11, (%rdi)
  115. addq $8, %rdi
  116. cmpq %r10, %rdi
  117. jl L_2048_from_bin_bswap_zero_start
  118. L_2048_from_bin_bswap_zero_end:
  119. repz retq
  120. #ifndef __APPLE__
  121. .size sp_2048_from_bin_bswap,.-sp_2048_from_bin_bswap
  122. #endif /* __APPLE__ */
  123. /* Read big endian unsigned byte array into r.
  124. * Uses the movbe instruction which is an optional instruction.
  125. *
  126. * r A single precision integer.
  127. * size Maximum number of bytes to convert
  128. * a Byte array.
  129. * n Number of bytes in array to read.
  130. */
  131. #ifndef __APPLE__
  132. .text
  133. .globl sp_2048_from_bin_movbe
  134. .type sp_2048_from_bin_movbe,@function
  135. .align 16
  136. sp_2048_from_bin_movbe:
  137. #else
  138. .section __TEXT,__text
  139. .globl _sp_2048_from_bin_movbe
  140. .p2align 4
  141. _sp_2048_from_bin_movbe:
  142. #endif /* __APPLE__ */
  143. movq %rdx, %r9
  144. movq %rdi, %r10
  145. addq %rcx, %r9
  146. addq $0x100, %r10
  147. xorq %r11, %r11
  148. jmp L_2048_from_bin_movbe_64_end
  149. L_2048_from_bin_movbe_64_start:
  150. subq $0x40, %r9
  151. movbeq 56(%r9), %rax
  152. movbeq 48(%r9), %r8
  153. movq %rax, (%rdi)
  154. movq %r8, 8(%rdi)
  155. movbeq 40(%r9), %rax
  156. movbeq 32(%r9), %r8
  157. movq %rax, 16(%rdi)
  158. movq %r8, 24(%rdi)
  159. movbeq 24(%r9), %rax
  160. movbeq 16(%r9), %r8
  161. movq %rax, 32(%rdi)
  162. movq %r8, 40(%rdi)
  163. movbeq 8(%r9), %rax
  164. movbeq (%r9), %r8
  165. movq %rax, 48(%rdi)
  166. movq %r8, 56(%rdi)
  167. addq $0x40, %rdi
  168. subq $0x40, %rcx
  169. L_2048_from_bin_movbe_64_end:
  170. cmpq $63, %rcx
  171. jg L_2048_from_bin_movbe_64_start
  172. jmp L_2048_from_bin_movbe_8_end
  173. L_2048_from_bin_movbe_8_start:
  174. subq $8, %r9
  175. movbeq (%r9), %rax
  176. movq %rax, (%rdi)
  177. addq $8, %rdi
  178. subq $8, %rcx
  179. L_2048_from_bin_movbe_8_end:
  180. cmpq $7, %rcx
  181. jg L_2048_from_bin_movbe_8_start
  182. cmpq %r11, %rcx
  183. je L_2048_from_bin_movbe_hi_end
  184. movq %r11, %r8
  185. movq %r11, %rax
  186. L_2048_from_bin_movbe_hi_start:
  187. movb (%rdx), %al
  188. shlq $8, %r8
  189. incq %rdx
  190. addq %rax, %r8
  191. decq %rcx
  192. jg L_2048_from_bin_movbe_hi_start
  193. movq %r8, (%rdi)
  194. addq $8, %rdi
  195. L_2048_from_bin_movbe_hi_end:
  196. cmpq %r10, %rdi
  197. je L_2048_from_bin_movbe_zero_end
  198. L_2048_from_bin_movbe_zero_start:
  199. movq %r11, (%rdi)
  200. addq $8, %rdi
  201. cmpq %r10, %rdi
  202. jl L_2048_from_bin_movbe_zero_start
  203. L_2048_from_bin_movbe_zero_end:
  204. repz retq
  205. #ifndef __APPLE__
  206. .size sp_2048_from_bin_movbe,.-sp_2048_from_bin_movbe
  207. #endif /* __APPLE__ */
  208. /* Write r as big endian to byte array.
  209. * Fixed length number of bytes written: 256
  210. * Uses the bswap instruction.
  211. *
  212. * r A single precision integer.
  213. * a Byte array.
  214. */
  215. #ifndef __APPLE__
  216. .text
  217. .globl sp_2048_to_bin_bswap
  218. .type sp_2048_to_bin_bswap,@function
  219. .align 16
  220. sp_2048_to_bin_bswap:
  221. #else
  222. .section __TEXT,__text
  223. .globl _sp_2048_to_bin_bswap
  224. .p2align 4
  225. _sp_2048_to_bin_bswap:
  226. #endif /* __APPLE__ */
  227. movq 248(%rdi), %rdx
  228. movq 240(%rdi), %rax
  229. bswapq %rdx
  230. bswapq %rax
  231. movq %rdx, (%rsi)
  232. movq %rax, 8(%rsi)
  233. movq 232(%rdi), %rdx
  234. movq 224(%rdi), %rax
  235. bswapq %rdx
  236. bswapq %rax
  237. movq %rdx, 16(%rsi)
  238. movq %rax, 24(%rsi)
  239. movq 216(%rdi), %rdx
  240. movq 208(%rdi), %rax
  241. bswapq %rdx
  242. bswapq %rax
  243. movq %rdx, 32(%rsi)
  244. movq %rax, 40(%rsi)
  245. movq 200(%rdi), %rdx
  246. movq 192(%rdi), %rax
  247. bswapq %rdx
  248. bswapq %rax
  249. movq %rdx, 48(%rsi)
  250. movq %rax, 56(%rsi)
  251. movq 184(%rdi), %rdx
  252. movq 176(%rdi), %rax
  253. bswapq %rdx
  254. bswapq %rax
  255. movq %rdx, 64(%rsi)
  256. movq %rax, 72(%rsi)
  257. movq 168(%rdi), %rdx
  258. movq 160(%rdi), %rax
  259. bswapq %rdx
  260. bswapq %rax
  261. movq %rdx, 80(%rsi)
  262. movq %rax, 88(%rsi)
  263. movq 152(%rdi), %rdx
  264. movq 144(%rdi), %rax
  265. bswapq %rdx
  266. bswapq %rax
  267. movq %rdx, 96(%rsi)
  268. movq %rax, 104(%rsi)
  269. movq 136(%rdi), %rdx
  270. movq 128(%rdi), %rax
  271. bswapq %rdx
  272. bswapq %rax
  273. movq %rdx, 112(%rsi)
  274. movq %rax, 120(%rsi)
  275. movq 120(%rdi), %rdx
  276. movq 112(%rdi), %rax
  277. bswapq %rdx
  278. bswapq %rax
  279. movq %rdx, 128(%rsi)
  280. movq %rax, 136(%rsi)
  281. movq 104(%rdi), %rdx
  282. movq 96(%rdi), %rax
  283. bswapq %rdx
  284. bswapq %rax
  285. movq %rdx, 144(%rsi)
  286. movq %rax, 152(%rsi)
  287. movq 88(%rdi), %rdx
  288. movq 80(%rdi), %rax
  289. bswapq %rdx
  290. bswapq %rax
  291. movq %rdx, 160(%rsi)
  292. movq %rax, 168(%rsi)
  293. movq 72(%rdi), %rdx
  294. movq 64(%rdi), %rax
  295. bswapq %rdx
  296. bswapq %rax
  297. movq %rdx, 176(%rsi)
  298. movq %rax, 184(%rsi)
  299. movq 56(%rdi), %rdx
  300. movq 48(%rdi), %rax
  301. bswapq %rdx
  302. bswapq %rax
  303. movq %rdx, 192(%rsi)
  304. movq %rax, 200(%rsi)
  305. movq 40(%rdi), %rdx
  306. movq 32(%rdi), %rax
  307. bswapq %rdx
  308. bswapq %rax
  309. movq %rdx, 208(%rsi)
  310. movq %rax, 216(%rsi)
  311. movq 24(%rdi), %rdx
  312. movq 16(%rdi), %rax
  313. bswapq %rdx
  314. bswapq %rax
  315. movq %rdx, 224(%rsi)
  316. movq %rax, 232(%rsi)
  317. movq 8(%rdi), %rdx
  318. movq (%rdi), %rax
  319. bswapq %rdx
  320. bswapq %rax
  321. movq %rdx, 240(%rsi)
  322. movq %rax, 248(%rsi)
  323. repz retq
  324. #ifndef __APPLE__
  325. .size sp_2048_to_bin_bswap,.-sp_2048_to_bin_bswap
  326. #endif /* __APPLE__ */
  327. /* Write r as big endian to byte array.
  328. * Fixed length number of bytes written: 256
  329. * Uses the movbe instruction which is optional.
  330. *
  331. * r A single precision integer.
  332. * a Byte array.
  333. */
  334. #ifndef __APPLE__
  335. .text
  336. .globl sp_2048_to_bin_movbe
  337. .type sp_2048_to_bin_movbe,@function
  338. .align 16
  339. sp_2048_to_bin_movbe:
  340. #else
  341. .section __TEXT,__text
  342. .globl _sp_2048_to_bin_movbe
  343. .p2align 4
  344. _sp_2048_to_bin_movbe:
  345. #endif /* __APPLE__ */
  346. movbeq 248(%rdi), %rdx
  347. movbeq 240(%rdi), %rax
  348. movq %rdx, (%rsi)
  349. movq %rax, 8(%rsi)
  350. movbeq 232(%rdi), %rdx
  351. movbeq 224(%rdi), %rax
  352. movq %rdx, 16(%rsi)
  353. movq %rax, 24(%rsi)
  354. movbeq 216(%rdi), %rdx
  355. movbeq 208(%rdi), %rax
  356. movq %rdx, 32(%rsi)
  357. movq %rax, 40(%rsi)
  358. movbeq 200(%rdi), %rdx
  359. movbeq 192(%rdi), %rax
  360. movq %rdx, 48(%rsi)
  361. movq %rax, 56(%rsi)
  362. movbeq 184(%rdi), %rdx
  363. movbeq 176(%rdi), %rax
  364. movq %rdx, 64(%rsi)
  365. movq %rax, 72(%rsi)
  366. movbeq 168(%rdi), %rdx
  367. movbeq 160(%rdi), %rax
  368. movq %rdx, 80(%rsi)
  369. movq %rax, 88(%rsi)
  370. movbeq 152(%rdi), %rdx
  371. movbeq 144(%rdi), %rax
  372. movq %rdx, 96(%rsi)
  373. movq %rax, 104(%rsi)
  374. movbeq 136(%rdi), %rdx
  375. movbeq 128(%rdi), %rax
  376. movq %rdx, 112(%rsi)
  377. movq %rax, 120(%rsi)
  378. movbeq 120(%rdi), %rdx
  379. movbeq 112(%rdi), %rax
  380. movq %rdx, 128(%rsi)
  381. movq %rax, 136(%rsi)
  382. movbeq 104(%rdi), %rdx
  383. movbeq 96(%rdi), %rax
  384. movq %rdx, 144(%rsi)
  385. movq %rax, 152(%rsi)
  386. movbeq 88(%rdi), %rdx
  387. movbeq 80(%rdi), %rax
  388. movq %rdx, 160(%rsi)
  389. movq %rax, 168(%rsi)
  390. movbeq 72(%rdi), %rdx
  391. movbeq 64(%rdi), %rax
  392. movq %rdx, 176(%rsi)
  393. movq %rax, 184(%rsi)
  394. movbeq 56(%rdi), %rdx
  395. movbeq 48(%rdi), %rax
  396. movq %rdx, 192(%rsi)
  397. movq %rax, 200(%rsi)
  398. movbeq 40(%rdi), %rdx
  399. movbeq 32(%rdi), %rax
  400. movq %rdx, 208(%rsi)
  401. movq %rax, 216(%rsi)
  402. movbeq 24(%rdi), %rdx
  403. movbeq 16(%rdi), %rax
  404. movq %rdx, 224(%rsi)
  405. movq %rax, 232(%rsi)
  406. movbeq 8(%rdi), %rdx
  407. movbeq (%rdi), %rax
  408. movq %rdx, 240(%rsi)
  409. movq %rax, 248(%rsi)
  410. repz retq
  411. #ifndef __APPLE__
  412. .size sp_2048_to_bin_movbe,.-sp_2048_to_bin_movbe
  413. #endif /* __APPLE__ */
  414. /* Multiply a and b into r. (r = a * b)
  415. *
  416. * r A single precision integer.
  417. * a A single precision integer.
  418. * b A single precision integer.
  419. */
  420. #ifndef __APPLE__
  421. .text
  422. .globl sp_2048_mul_16
  423. .type sp_2048_mul_16,@function
  424. .align 16
  425. sp_2048_mul_16:
  426. #else
  427. .section __TEXT,__text
  428. .globl _sp_2048_mul_16
  429. .p2align 4
  430. _sp_2048_mul_16:
  431. #endif /* __APPLE__ */
  432. movq %rdx, %rcx
  433. subq $0x80, %rsp
  434. # A[0] * B[0]
  435. movq (%rcx), %rax
  436. mulq (%rsi)
  437. xorq %r10, %r10
  438. movq %rax, (%rsp)
  439. movq %rdx, %r9
  440. # A[0] * B[1]
  441. movq 8(%rcx), %rax
  442. mulq (%rsi)
  443. xorq %r8, %r8
  444. addq %rax, %r9
  445. adcq %rdx, %r10
  446. adcq $0x00, %r8
  447. # A[1] * B[0]
  448. movq (%rcx), %rax
  449. mulq 8(%rsi)
  450. addq %rax, %r9
  451. adcq %rdx, %r10
  452. adcq $0x00, %r8
  453. movq %r9, 8(%rsp)
  454. # A[0] * B[2]
  455. movq 16(%rcx), %rax
  456. mulq (%rsi)
  457. xorq %r9, %r9
  458. addq %rax, %r10
  459. adcq %rdx, %r8
  460. adcq $0x00, %r9
  461. # A[1] * B[1]
  462. movq 8(%rcx), %rax
  463. mulq 8(%rsi)
  464. addq %rax, %r10
  465. adcq %rdx, %r8
  466. adcq $0x00, %r9
  467. # A[2] * B[0]
  468. movq (%rcx), %rax
  469. mulq 16(%rsi)
  470. addq %rax, %r10
  471. adcq %rdx, %r8
  472. adcq $0x00, %r9
  473. movq %r10, 16(%rsp)
  474. # A[0] * B[3]
  475. movq 24(%rcx), %rax
  476. mulq (%rsi)
  477. xorq %r10, %r10
  478. addq %rax, %r8
  479. adcq %rdx, %r9
  480. adcq $0x00, %r10
  481. # A[1] * B[2]
  482. movq 16(%rcx), %rax
  483. mulq 8(%rsi)
  484. addq %rax, %r8
  485. adcq %rdx, %r9
  486. adcq $0x00, %r10
  487. # A[2] * B[1]
  488. movq 8(%rcx), %rax
  489. mulq 16(%rsi)
  490. addq %rax, %r8
  491. adcq %rdx, %r9
  492. adcq $0x00, %r10
  493. # A[3] * B[0]
  494. movq (%rcx), %rax
  495. mulq 24(%rsi)
  496. addq %rax, %r8
  497. adcq %rdx, %r9
  498. adcq $0x00, %r10
  499. movq %r8, 24(%rsp)
  500. # A[0] * B[4]
  501. movq 32(%rcx), %rax
  502. mulq (%rsi)
  503. xorq %r8, %r8
  504. addq %rax, %r9
  505. adcq %rdx, %r10
  506. adcq $0x00, %r8
  507. # A[1] * B[3]
  508. movq 24(%rcx), %rax
  509. mulq 8(%rsi)
  510. addq %rax, %r9
  511. adcq %rdx, %r10
  512. adcq $0x00, %r8
  513. # A[2] * B[2]
  514. movq 16(%rcx), %rax
  515. mulq 16(%rsi)
  516. addq %rax, %r9
  517. adcq %rdx, %r10
  518. adcq $0x00, %r8
  519. # A[3] * B[1]
  520. movq 8(%rcx), %rax
  521. mulq 24(%rsi)
  522. addq %rax, %r9
  523. adcq %rdx, %r10
  524. adcq $0x00, %r8
  525. # A[4] * B[0]
  526. movq (%rcx), %rax
  527. mulq 32(%rsi)
  528. addq %rax, %r9
  529. adcq %rdx, %r10
  530. adcq $0x00, %r8
  531. movq %r9, 32(%rsp)
  532. # A[0] * B[5]
  533. movq 40(%rcx), %rax
  534. mulq (%rsi)
  535. xorq %r9, %r9
  536. addq %rax, %r10
  537. adcq %rdx, %r8
  538. adcq $0x00, %r9
  539. # A[1] * B[4]
  540. movq 32(%rcx), %rax
  541. mulq 8(%rsi)
  542. addq %rax, %r10
  543. adcq %rdx, %r8
  544. adcq $0x00, %r9
  545. # A[2] * B[3]
  546. movq 24(%rcx), %rax
  547. mulq 16(%rsi)
  548. addq %rax, %r10
  549. adcq %rdx, %r8
  550. adcq $0x00, %r9
  551. # A[3] * B[2]
  552. movq 16(%rcx), %rax
  553. mulq 24(%rsi)
  554. addq %rax, %r10
  555. adcq %rdx, %r8
  556. adcq $0x00, %r9
  557. # A[4] * B[1]
  558. movq 8(%rcx), %rax
  559. mulq 32(%rsi)
  560. addq %rax, %r10
  561. adcq %rdx, %r8
  562. adcq $0x00, %r9
  563. # A[5] * B[0]
  564. movq (%rcx), %rax
  565. mulq 40(%rsi)
  566. addq %rax, %r10
  567. adcq %rdx, %r8
  568. adcq $0x00, %r9
  569. movq %r10, 40(%rsp)
  570. # A[0] * B[6]
  571. movq 48(%rcx), %rax
  572. mulq (%rsi)
  573. xorq %r10, %r10
  574. addq %rax, %r8
  575. adcq %rdx, %r9
  576. adcq $0x00, %r10
  577. # A[1] * B[5]
  578. movq 40(%rcx), %rax
  579. mulq 8(%rsi)
  580. addq %rax, %r8
  581. adcq %rdx, %r9
  582. adcq $0x00, %r10
  583. # A[2] * B[4]
  584. movq 32(%rcx), %rax
  585. mulq 16(%rsi)
  586. addq %rax, %r8
  587. adcq %rdx, %r9
  588. adcq $0x00, %r10
  589. # A[3] * B[3]
  590. movq 24(%rcx), %rax
  591. mulq 24(%rsi)
  592. addq %rax, %r8
  593. adcq %rdx, %r9
  594. adcq $0x00, %r10
  595. # A[4] * B[2]
  596. movq 16(%rcx), %rax
  597. mulq 32(%rsi)
  598. addq %rax, %r8
  599. adcq %rdx, %r9
  600. adcq $0x00, %r10
  601. # A[5] * B[1]
  602. movq 8(%rcx), %rax
  603. mulq 40(%rsi)
  604. addq %rax, %r8
  605. adcq %rdx, %r9
  606. adcq $0x00, %r10
  607. # A[6] * B[0]
  608. movq (%rcx), %rax
  609. mulq 48(%rsi)
  610. addq %rax, %r8
  611. adcq %rdx, %r9
  612. adcq $0x00, %r10
  613. movq %r8, 48(%rsp)
  614. # A[0] * B[7]
  615. movq 56(%rcx), %rax
  616. mulq (%rsi)
  617. xorq %r8, %r8
  618. addq %rax, %r9
  619. adcq %rdx, %r10
  620. adcq $0x00, %r8
  621. # A[1] * B[6]
  622. movq 48(%rcx), %rax
  623. mulq 8(%rsi)
  624. addq %rax, %r9
  625. adcq %rdx, %r10
  626. adcq $0x00, %r8
  627. # A[2] * B[5]
  628. movq 40(%rcx), %rax
  629. mulq 16(%rsi)
  630. addq %rax, %r9
  631. adcq %rdx, %r10
  632. adcq $0x00, %r8
  633. # A[3] * B[4]
  634. movq 32(%rcx), %rax
  635. mulq 24(%rsi)
  636. addq %rax, %r9
  637. adcq %rdx, %r10
  638. adcq $0x00, %r8
  639. # A[4] * B[3]
  640. movq 24(%rcx), %rax
  641. mulq 32(%rsi)
  642. addq %rax, %r9
  643. adcq %rdx, %r10
  644. adcq $0x00, %r8
  645. # A[5] * B[2]
  646. movq 16(%rcx), %rax
  647. mulq 40(%rsi)
  648. addq %rax, %r9
  649. adcq %rdx, %r10
  650. adcq $0x00, %r8
  651. # A[6] * B[1]
  652. movq 8(%rcx), %rax
  653. mulq 48(%rsi)
  654. addq %rax, %r9
  655. adcq %rdx, %r10
  656. adcq $0x00, %r8
  657. # A[7] * B[0]
  658. movq (%rcx), %rax
  659. mulq 56(%rsi)
  660. addq %rax, %r9
  661. adcq %rdx, %r10
  662. adcq $0x00, %r8
  663. movq %r9, 56(%rsp)
  664. # A[0] * B[8]
  665. movq 64(%rcx), %rax
  666. mulq (%rsi)
  667. xorq %r9, %r9
  668. addq %rax, %r10
  669. adcq %rdx, %r8
  670. adcq $0x00, %r9
  671. # A[1] * B[7]
  672. movq 56(%rcx), %rax
  673. mulq 8(%rsi)
  674. addq %rax, %r10
  675. adcq %rdx, %r8
  676. adcq $0x00, %r9
  677. # A[2] * B[6]
  678. movq 48(%rcx), %rax
  679. mulq 16(%rsi)
  680. addq %rax, %r10
  681. adcq %rdx, %r8
  682. adcq $0x00, %r9
  683. # A[3] * B[5]
  684. movq 40(%rcx), %rax
  685. mulq 24(%rsi)
  686. addq %rax, %r10
  687. adcq %rdx, %r8
  688. adcq $0x00, %r9
  689. # A[4] * B[4]
  690. movq 32(%rcx), %rax
  691. mulq 32(%rsi)
  692. addq %rax, %r10
  693. adcq %rdx, %r8
  694. adcq $0x00, %r9
  695. # A[5] * B[3]
  696. movq 24(%rcx), %rax
  697. mulq 40(%rsi)
  698. addq %rax, %r10
  699. adcq %rdx, %r8
  700. adcq $0x00, %r9
  701. # A[6] * B[2]
  702. movq 16(%rcx), %rax
  703. mulq 48(%rsi)
  704. addq %rax, %r10
  705. adcq %rdx, %r8
  706. adcq $0x00, %r9
  707. # A[7] * B[1]
  708. movq 8(%rcx), %rax
  709. mulq 56(%rsi)
  710. addq %rax, %r10
  711. adcq %rdx, %r8
  712. adcq $0x00, %r9
  713. # A[8] * B[0]
  714. movq (%rcx), %rax
  715. mulq 64(%rsi)
  716. addq %rax, %r10
  717. adcq %rdx, %r8
  718. adcq $0x00, %r9
  719. movq %r10, 64(%rsp)
  720. # A[0] * B[9]
  721. movq 72(%rcx), %rax
  722. mulq (%rsi)
  723. xorq %r10, %r10
  724. addq %rax, %r8
  725. adcq %rdx, %r9
  726. adcq $0x00, %r10
  727. # A[1] * B[8]
  728. movq 64(%rcx), %rax
  729. mulq 8(%rsi)
  730. addq %rax, %r8
  731. adcq %rdx, %r9
  732. adcq $0x00, %r10
  733. # A[2] * B[7]
  734. movq 56(%rcx), %rax
  735. mulq 16(%rsi)
  736. addq %rax, %r8
  737. adcq %rdx, %r9
  738. adcq $0x00, %r10
  739. # A[3] * B[6]
  740. movq 48(%rcx), %rax
  741. mulq 24(%rsi)
  742. addq %rax, %r8
  743. adcq %rdx, %r9
  744. adcq $0x00, %r10
  745. # A[4] * B[5]
  746. movq 40(%rcx), %rax
  747. mulq 32(%rsi)
  748. addq %rax, %r8
  749. adcq %rdx, %r9
  750. adcq $0x00, %r10
  751. # A[5] * B[4]
  752. movq 32(%rcx), %rax
  753. mulq 40(%rsi)
  754. addq %rax, %r8
  755. adcq %rdx, %r9
  756. adcq $0x00, %r10
  757. # A[6] * B[3]
  758. movq 24(%rcx), %rax
  759. mulq 48(%rsi)
  760. addq %rax, %r8
  761. adcq %rdx, %r9
  762. adcq $0x00, %r10
  763. # A[7] * B[2]
  764. movq 16(%rcx), %rax
  765. mulq 56(%rsi)
  766. addq %rax, %r8
  767. adcq %rdx, %r9
  768. adcq $0x00, %r10
  769. # A[8] * B[1]
  770. movq 8(%rcx), %rax
  771. mulq 64(%rsi)
  772. addq %rax, %r8
  773. adcq %rdx, %r9
  774. adcq $0x00, %r10
  775. # A[9] * B[0]
  776. movq (%rcx), %rax
  777. mulq 72(%rsi)
  778. addq %rax, %r8
  779. adcq %rdx, %r9
  780. adcq $0x00, %r10
  781. movq %r8, 72(%rsp)
  782. # A[0] * B[10]
  783. movq 80(%rcx), %rax
  784. mulq (%rsi)
  785. xorq %r8, %r8
  786. addq %rax, %r9
  787. adcq %rdx, %r10
  788. adcq $0x00, %r8
  789. # A[1] * B[9]
  790. movq 72(%rcx), %rax
  791. mulq 8(%rsi)
  792. addq %rax, %r9
  793. adcq %rdx, %r10
  794. adcq $0x00, %r8
  795. # A[2] * B[8]
  796. movq 64(%rcx), %rax
  797. mulq 16(%rsi)
  798. addq %rax, %r9
  799. adcq %rdx, %r10
  800. adcq $0x00, %r8
  801. # A[3] * B[7]
  802. movq 56(%rcx), %rax
  803. mulq 24(%rsi)
  804. addq %rax, %r9
  805. adcq %rdx, %r10
  806. adcq $0x00, %r8
  807. # A[4] * B[6]
  808. movq 48(%rcx), %rax
  809. mulq 32(%rsi)
  810. addq %rax, %r9
  811. adcq %rdx, %r10
  812. adcq $0x00, %r8
  813. # A[5] * B[5]
  814. movq 40(%rcx), %rax
  815. mulq 40(%rsi)
  816. addq %rax, %r9
  817. adcq %rdx, %r10
  818. adcq $0x00, %r8
  819. # A[6] * B[4]
  820. movq 32(%rcx), %rax
  821. mulq 48(%rsi)
  822. addq %rax, %r9
  823. adcq %rdx, %r10
  824. adcq $0x00, %r8
  825. # A[7] * B[3]
  826. movq 24(%rcx), %rax
  827. mulq 56(%rsi)
  828. addq %rax, %r9
  829. adcq %rdx, %r10
  830. adcq $0x00, %r8
  831. # A[8] * B[2]
  832. movq 16(%rcx), %rax
  833. mulq 64(%rsi)
  834. addq %rax, %r9
  835. adcq %rdx, %r10
  836. adcq $0x00, %r8
  837. # A[9] * B[1]
  838. movq 8(%rcx), %rax
  839. mulq 72(%rsi)
  840. addq %rax, %r9
  841. adcq %rdx, %r10
  842. adcq $0x00, %r8
  843. # A[10] * B[0]
  844. movq (%rcx), %rax
  845. mulq 80(%rsi)
  846. addq %rax, %r9
  847. adcq %rdx, %r10
  848. adcq $0x00, %r8
  849. movq %r9, 80(%rsp)
  850. # A[0] * B[11]
  851. movq 88(%rcx), %rax
  852. mulq (%rsi)
  853. xorq %r9, %r9
  854. addq %rax, %r10
  855. adcq %rdx, %r8
  856. adcq $0x00, %r9
  857. # A[1] * B[10]
  858. movq 80(%rcx), %rax
  859. mulq 8(%rsi)
  860. addq %rax, %r10
  861. adcq %rdx, %r8
  862. adcq $0x00, %r9
  863. # A[2] * B[9]
  864. movq 72(%rcx), %rax
  865. mulq 16(%rsi)
  866. addq %rax, %r10
  867. adcq %rdx, %r8
  868. adcq $0x00, %r9
  869. # A[3] * B[8]
  870. movq 64(%rcx), %rax
  871. mulq 24(%rsi)
  872. addq %rax, %r10
  873. adcq %rdx, %r8
  874. adcq $0x00, %r9
  875. # A[4] * B[7]
  876. movq 56(%rcx), %rax
  877. mulq 32(%rsi)
  878. addq %rax, %r10
  879. adcq %rdx, %r8
  880. adcq $0x00, %r9
  881. # A[5] * B[6]
  882. movq 48(%rcx), %rax
  883. mulq 40(%rsi)
  884. addq %rax, %r10
  885. adcq %rdx, %r8
  886. adcq $0x00, %r9
  887. # A[6] * B[5]
  888. movq 40(%rcx), %rax
  889. mulq 48(%rsi)
  890. addq %rax, %r10
  891. adcq %rdx, %r8
  892. adcq $0x00, %r9
  893. # A[7] * B[4]
  894. movq 32(%rcx), %rax
  895. mulq 56(%rsi)
  896. addq %rax, %r10
  897. adcq %rdx, %r8
  898. adcq $0x00, %r9
  899. # A[8] * B[3]
  900. movq 24(%rcx), %rax
  901. mulq 64(%rsi)
  902. addq %rax, %r10
  903. adcq %rdx, %r8
  904. adcq $0x00, %r9
  905. # A[9] * B[2]
  906. movq 16(%rcx), %rax
  907. mulq 72(%rsi)
  908. addq %rax, %r10
  909. adcq %rdx, %r8
  910. adcq $0x00, %r9
  911. # A[10] * B[1]
  912. movq 8(%rcx), %rax
  913. mulq 80(%rsi)
  914. addq %rax, %r10
  915. adcq %rdx, %r8
  916. adcq $0x00, %r9
  917. # A[11] * B[0]
  918. movq (%rcx), %rax
  919. mulq 88(%rsi)
  920. addq %rax, %r10
  921. adcq %rdx, %r8
  922. adcq $0x00, %r9
  923. movq %r10, 88(%rsp)
  924. # A[0] * B[12]
  925. movq 96(%rcx), %rax
  926. mulq (%rsi)
  927. xorq %r10, %r10
  928. addq %rax, %r8
  929. adcq %rdx, %r9
  930. adcq $0x00, %r10
  931. # A[1] * B[11]
  932. movq 88(%rcx), %rax
  933. mulq 8(%rsi)
  934. addq %rax, %r8
  935. adcq %rdx, %r9
  936. adcq $0x00, %r10
  937. # A[2] * B[10]
  938. movq 80(%rcx), %rax
  939. mulq 16(%rsi)
  940. addq %rax, %r8
  941. adcq %rdx, %r9
  942. adcq $0x00, %r10
  943. # A[3] * B[9]
  944. movq 72(%rcx), %rax
  945. mulq 24(%rsi)
  946. addq %rax, %r8
  947. adcq %rdx, %r9
  948. adcq $0x00, %r10
  949. # A[4] * B[8]
  950. movq 64(%rcx), %rax
  951. mulq 32(%rsi)
  952. addq %rax, %r8
  953. adcq %rdx, %r9
  954. adcq $0x00, %r10
  955. # A[5] * B[7]
  956. movq 56(%rcx), %rax
  957. mulq 40(%rsi)
  958. addq %rax, %r8
  959. adcq %rdx, %r9
  960. adcq $0x00, %r10
  961. # A[6] * B[6]
  962. movq 48(%rcx), %rax
  963. mulq 48(%rsi)
  964. addq %rax, %r8
  965. adcq %rdx, %r9
  966. adcq $0x00, %r10
  967. # A[7] * B[5]
  968. movq 40(%rcx), %rax
  969. mulq 56(%rsi)
  970. addq %rax, %r8
  971. adcq %rdx, %r9
  972. adcq $0x00, %r10
  973. # A[8] * B[4]
  974. movq 32(%rcx), %rax
  975. mulq 64(%rsi)
  976. addq %rax, %r8
  977. adcq %rdx, %r9
  978. adcq $0x00, %r10
  979. # A[9] * B[3]
  980. movq 24(%rcx), %rax
  981. mulq 72(%rsi)
  982. addq %rax, %r8
  983. adcq %rdx, %r9
  984. adcq $0x00, %r10
  985. # A[10] * B[2]
  986. movq 16(%rcx), %rax
  987. mulq 80(%rsi)
  988. addq %rax, %r8
  989. adcq %rdx, %r9
  990. adcq $0x00, %r10
  991. # A[11] * B[1]
  992. movq 8(%rcx), %rax
  993. mulq 88(%rsi)
  994. addq %rax, %r8
  995. adcq %rdx, %r9
  996. adcq $0x00, %r10
  997. # A[12] * B[0]
  998. movq (%rcx), %rax
  999. mulq 96(%rsi)
  1000. addq %rax, %r8
  1001. adcq %rdx, %r9
  1002. adcq $0x00, %r10
  1003. movq %r8, 96(%rsp)
  1004. # A[0] * B[13]
  1005. movq 104(%rcx), %rax
  1006. mulq (%rsi)
  1007. xorq %r8, %r8
  1008. addq %rax, %r9
  1009. adcq %rdx, %r10
  1010. adcq $0x00, %r8
  1011. # A[1] * B[12]
  1012. movq 96(%rcx), %rax
  1013. mulq 8(%rsi)
  1014. addq %rax, %r9
  1015. adcq %rdx, %r10
  1016. adcq $0x00, %r8
  1017. # A[2] * B[11]
  1018. movq 88(%rcx), %rax
  1019. mulq 16(%rsi)
  1020. addq %rax, %r9
  1021. adcq %rdx, %r10
  1022. adcq $0x00, %r8
  1023. # A[3] * B[10]
  1024. movq 80(%rcx), %rax
  1025. mulq 24(%rsi)
  1026. addq %rax, %r9
  1027. adcq %rdx, %r10
  1028. adcq $0x00, %r8
  1029. # A[4] * B[9]
  1030. movq 72(%rcx), %rax
  1031. mulq 32(%rsi)
  1032. addq %rax, %r9
  1033. adcq %rdx, %r10
  1034. adcq $0x00, %r8
  1035. # A[5] * B[8]
  1036. movq 64(%rcx), %rax
  1037. mulq 40(%rsi)
  1038. addq %rax, %r9
  1039. adcq %rdx, %r10
  1040. adcq $0x00, %r8
  1041. # A[6] * B[7]
  1042. movq 56(%rcx), %rax
  1043. mulq 48(%rsi)
  1044. addq %rax, %r9
  1045. adcq %rdx, %r10
  1046. adcq $0x00, %r8
  1047. # A[7] * B[6]
  1048. movq 48(%rcx), %rax
  1049. mulq 56(%rsi)
  1050. addq %rax, %r9
  1051. adcq %rdx, %r10
  1052. adcq $0x00, %r8
  1053. # A[8] * B[5]
  1054. movq 40(%rcx), %rax
  1055. mulq 64(%rsi)
  1056. addq %rax, %r9
  1057. adcq %rdx, %r10
  1058. adcq $0x00, %r8
  1059. # A[9] * B[4]
  1060. movq 32(%rcx), %rax
  1061. mulq 72(%rsi)
  1062. addq %rax, %r9
  1063. adcq %rdx, %r10
  1064. adcq $0x00, %r8
  1065. # A[10] * B[3]
  1066. movq 24(%rcx), %rax
  1067. mulq 80(%rsi)
  1068. addq %rax, %r9
  1069. adcq %rdx, %r10
  1070. adcq $0x00, %r8
  1071. # A[11] * B[2]
  1072. movq 16(%rcx), %rax
  1073. mulq 88(%rsi)
  1074. addq %rax, %r9
  1075. adcq %rdx, %r10
  1076. adcq $0x00, %r8
  1077. # A[12] * B[1]
  1078. movq 8(%rcx), %rax
  1079. mulq 96(%rsi)
  1080. addq %rax, %r9
  1081. adcq %rdx, %r10
  1082. adcq $0x00, %r8
  1083. # A[13] * B[0]
  1084. movq (%rcx), %rax
  1085. mulq 104(%rsi)
  1086. addq %rax, %r9
  1087. adcq %rdx, %r10
  1088. adcq $0x00, %r8
  1089. movq %r9, 104(%rsp)
  1090. # A[0] * B[14]
  1091. movq 112(%rcx), %rax
  1092. mulq (%rsi)
  1093. xorq %r9, %r9
  1094. addq %rax, %r10
  1095. adcq %rdx, %r8
  1096. adcq $0x00, %r9
  1097. # A[1] * B[13]
  1098. movq 104(%rcx), %rax
  1099. mulq 8(%rsi)
  1100. addq %rax, %r10
  1101. adcq %rdx, %r8
  1102. adcq $0x00, %r9
  1103. # A[2] * B[12]
  1104. movq 96(%rcx), %rax
  1105. mulq 16(%rsi)
  1106. addq %rax, %r10
  1107. adcq %rdx, %r8
  1108. adcq $0x00, %r9
  1109. # A[3] * B[11]
  1110. movq 88(%rcx), %rax
  1111. mulq 24(%rsi)
  1112. addq %rax, %r10
  1113. adcq %rdx, %r8
  1114. adcq $0x00, %r9
  1115. # A[4] * B[10]
  1116. movq 80(%rcx), %rax
  1117. mulq 32(%rsi)
  1118. addq %rax, %r10
  1119. adcq %rdx, %r8
  1120. adcq $0x00, %r9
  1121. # A[5] * B[9]
  1122. movq 72(%rcx), %rax
  1123. mulq 40(%rsi)
  1124. addq %rax, %r10
  1125. adcq %rdx, %r8
  1126. adcq $0x00, %r9
  1127. # A[6] * B[8]
  1128. movq 64(%rcx), %rax
  1129. mulq 48(%rsi)
  1130. addq %rax, %r10
  1131. adcq %rdx, %r8
  1132. adcq $0x00, %r9
  1133. # A[7] * B[7]
  1134. movq 56(%rcx), %rax
  1135. mulq 56(%rsi)
  1136. addq %rax, %r10
  1137. adcq %rdx, %r8
  1138. adcq $0x00, %r9
  1139. # A[8] * B[6]
  1140. movq 48(%rcx), %rax
  1141. mulq 64(%rsi)
  1142. addq %rax, %r10
  1143. adcq %rdx, %r8
  1144. adcq $0x00, %r9
  1145. # A[9] * B[5]
  1146. movq 40(%rcx), %rax
  1147. mulq 72(%rsi)
  1148. addq %rax, %r10
  1149. adcq %rdx, %r8
  1150. adcq $0x00, %r9
  1151. # A[10] * B[4]
  1152. movq 32(%rcx), %rax
  1153. mulq 80(%rsi)
  1154. addq %rax, %r10
  1155. adcq %rdx, %r8
  1156. adcq $0x00, %r9
  1157. # A[11] * B[3]
  1158. movq 24(%rcx), %rax
  1159. mulq 88(%rsi)
  1160. addq %rax, %r10
  1161. adcq %rdx, %r8
  1162. adcq $0x00, %r9
  1163. # A[12] * B[2]
  1164. movq 16(%rcx), %rax
  1165. mulq 96(%rsi)
  1166. addq %rax, %r10
  1167. adcq %rdx, %r8
  1168. adcq $0x00, %r9
  1169. # A[13] * B[1]
  1170. movq 8(%rcx), %rax
  1171. mulq 104(%rsi)
  1172. addq %rax, %r10
  1173. adcq %rdx, %r8
  1174. adcq $0x00, %r9
  1175. # A[14] * B[0]
  1176. movq (%rcx), %rax
  1177. mulq 112(%rsi)
  1178. addq %rax, %r10
  1179. adcq %rdx, %r8
  1180. adcq $0x00, %r9
  1181. movq %r10, 112(%rsp)
  1182. # A[0] * B[15]
  1183. movq 120(%rcx), %rax
  1184. mulq (%rsi)
  1185. xorq %r10, %r10
  1186. addq %rax, %r8
  1187. adcq %rdx, %r9
  1188. adcq $0x00, %r10
  1189. # A[1] * B[14]
  1190. movq 112(%rcx), %rax
  1191. mulq 8(%rsi)
  1192. addq %rax, %r8
  1193. adcq %rdx, %r9
  1194. adcq $0x00, %r10
  1195. # A[2] * B[13]
  1196. movq 104(%rcx), %rax
  1197. mulq 16(%rsi)
  1198. addq %rax, %r8
  1199. adcq %rdx, %r9
  1200. adcq $0x00, %r10
  1201. # A[3] * B[12]
  1202. movq 96(%rcx), %rax
  1203. mulq 24(%rsi)
  1204. addq %rax, %r8
  1205. adcq %rdx, %r9
  1206. adcq $0x00, %r10
  1207. # A[4] * B[11]
  1208. movq 88(%rcx), %rax
  1209. mulq 32(%rsi)
  1210. addq %rax, %r8
  1211. adcq %rdx, %r9
  1212. adcq $0x00, %r10
  1213. # A[5] * B[10]
  1214. movq 80(%rcx), %rax
  1215. mulq 40(%rsi)
  1216. addq %rax, %r8
  1217. adcq %rdx, %r9
  1218. adcq $0x00, %r10
  1219. # A[6] * B[9]
  1220. movq 72(%rcx), %rax
  1221. mulq 48(%rsi)
  1222. addq %rax, %r8
  1223. adcq %rdx, %r9
  1224. adcq $0x00, %r10
  1225. # A[7] * B[8]
  1226. movq 64(%rcx), %rax
  1227. mulq 56(%rsi)
  1228. addq %rax, %r8
  1229. adcq %rdx, %r9
  1230. adcq $0x00, %r10
  1231. # A[8] * B[7]
  1232. movq 56(%rcx), %rax
  1233. mulq 64(%rsi)
  1234. addq %rax, %r8
  1235. adcq %rdx, %r9
  1236. adcq $0x00, %r10
  1237. # A[9] * B[6]
  1238. movq 48(%rcx), %rax
  1239. mulq 72(%rsi)
  1240. addq %rax, %r8
  1241. adcq %rdx, %r9
  1242. adcq $0x00, %r10
  1243. # A[10] * B[5]
  1244. movq 40(%rcx), %rax
  1245. mulq 80(%rsi)
  1246. addq %rax, %r8
  1247. adcq %rdx, %r9
  1248. adcq $0x00, %r10
  1249. # A[11] * B[4]
  1250. movq 32(%rcx), %rax
  1251. mulq 88(%rsi)
  1252. addq %rax, %r8
  1253. adcq %rdx, %r9
  1254. adcq $0x00, %r10
  1255. # A[12] * B[3]
  1256. movq 24(%rcx), %rax
  1257. mulq 96(%rsi)
  1258. addq %rax, %r8
  1259. adcq %rdx, %r9
  1260. adcq $0x00, %r10
  1261. # A[13] * B[2]
  1262. movq 16(%rcx), %rax
  1263. mulq 104(%rsi)
  1264. addq %rax, %r8
  1265. adcq %rdx, %r9
  1266. adcq $0x00, %r10
  1267. # A[14] * B[1]
  1268. movq 8(%rcx), %rax
  1269. mulq 112(%rsi)
  1270. addq %rax, %r8
  1271. adcq %rdx, %r9
  1272. adcq $0x00, %r10
  1273. # A[15] * B[0]
  1274. movq (%rcx), %rax
  1275. mulq 120(%rsi)
  1276. addq %rax, %r8
  1277. adcq %rdx, %r9
  1278. adcq $0x00, %r10
  1279. movq %r8, 120(%rsp)
  1280. # A[1] * B[15]
  1281. movq 120(%rcx), %rax
  1282. mulq 8(%rsi)
  1283. xorq %r8, %r8
  1284. addq %rax, %r9
  1285. adcq %rdx, %r10
  1286. adcq $0x00, %r8
  1287. # A[2] * B[14]
  1288. movq 112(%rcx), %rax
  1289. mulq 16(%rsi)
  1290. addq %rax, %r9
  1291. adcq %rdx, %r10
  1292. adcq $0x00, %r8
  1293. # A[3] * B[13]
  1294. movq 104(%rcx), %rax
  1295. mulq 24(%rsi)
  1296. addq %rax, %r9
  1297. adcq %rdx, %r10
  1298. adcq $0x00, %r8
  1299. # A[4] * B[12]
  1300. movq 96(%rcx), %rax
  1301. mulq 32(%rsi)
  1302. addq %rax, %r9
  1303. adcq %rdx, %r10
  1304. adcq $0x00, %r8
  1305. # A[5] * B[11]
  1306. movq 88(%rcx), %rax
  1307. mulq 40(%rsi)
  1308. addq %rax, %r9
  1309. adcq %rdx, %r10
  1310. adcq $0x00, %r8
  1311. # A[6] * B[10]
  1312. movq 80(%rcx), %rax
  1313. mulq 48(%rsi)
  1314. addq %rax, %r9
  1315. adcq %rdx, %r10
  1316. adcq $0x00, %r8
  1317. # A[7] * B[9]
  1318. movq 72(%rcx), %rax
  1319. mulq 56(%rsi)
  1320. addq %rax, %r9
  1321. adcq %rdx, %r10
  1322. adcq $0x00, %r8
  1323. # A[8] * B[8]
  1324. movq 64(%rcx), %rax
  1325. mulq 64(%rsi)
  1326. addq %rax, %r9
  1327. adcq %rdx, %r10
  1328. adcq $0x00, %r8
  1329. # A[9] * B[7]
  1330. movq 56(%rcx), %rax
  1331. mulq 72(%rsi)
  1332. addq %rax, %r9
  1333. adcq %rdx, %r10
  1334. adcq $0x00, %r8
  1335. # A[10] * B[6]
  1336. movq 48(%rcx), %rax
  1337. mulq 80(%rsi)
  1338. addq %rax, %r9
  1339. adcq %rdx, %r10
  1340. adcq $0x00, %r8
  1341. # A[11] * B[5]
  1342. movq 40(%rcx), %rax
  1343. mulq 88(%rsi)
  1344. addq %rax, %r9
  1345. adcq %rdx, %r10
  1346. adcq $0x00, %r8
  1347. # A[12] * B[4]
  1348. movq 32(%rcx), %rax
  1349. mulq 96(%rsi)
  1350. addq %rax, %r9
  1351. adcq %rdx, %r10
  1352. adcq $0x00, %r8
  1353. # A[13] * B[3]
  1354. movq 24(%rcx), %rax
  1355. mulq 104(%rsi)
  1356. addq %rax, %r9
  1357. adcq %rdx, %r10
  1358. adcq $0x00, %r8
  1359. # A[14] * B[2]
  1360. movq 16(%rcx), %rax
  1361. mulq 112(%rsi)
  1362. addq %rax, %r9
  1363. adcq %rdx, %r10
  1364. adcq $0x00, %r8
  1365. # A[15] * B[1]
  1366. movq 8(%rcx), %rax
  1367. mulq 120(%rsi)
  1368. addq %rax, %r9
  1369. adcq %rdx, %r10
  1370. adcq $0x00, %r8
  1371. movq %r9, 128(%rdi)
  1372. # A[2] * B[15]
  1373. movq 120(%rcx), %rax
  1374. mulq 16(%rsi)
  1375. xorq %r9, %r9
  1376. addq %rax, %r10
  1377. adcq %rdx, %r8
  1378. adcq $0x00, %r9
  1379. # A[3] * B[14]
  1380. movq 112(%rcx), %rax
  1381. mulq 24(%rsi)
  1382. addq %rax, %r10
  1383. adcq %rdx, %r8
  1384. adcq $0x00, %r9
  1385. # A[4] * B[13]
  1386. movq 104(%rcx), %rax
  1387. mulq 32(%rsi)
  1388. addq %rax, %r10
  1389. adcq %rdx, %r8
  1390. adcq $0x00, %r9
  1391. # A[5] * B[12]
  1392. movq 96(%rcx), %rax
  1393. mulq 40(%rsi)
  1394. addq %rax, %r10
  1395. adcq %rdx, %r8
  1396. adcq $0x00, %r9
  1397. # A[6] * B[11]
  1398. movq 88(%rcx), %rax
  1399. mulq 48(%rsi)
  1400. addq %rax, %r10
  1401. adcq %rdx, %r8
  1402. adcq $0x00, %r9
  1403. # A[7] * B[10]
  1404. movq 80(%rcx), %rax
  1405. mulq 56(%rsi)
  1406. addq %rax, %r10
  1407. adcq %rdx, %r8
  1408. adcq $0x00, %r9
  1409. # A[8] * B[9]
  1410. movq 72(%rcx), %rax
  1411. mulq 64(%rsi)
  1412. addq %rax, %r10
  1413. adcq %rdx, %r8
  1414. adcq $0x00, %r9
  1415. # A[9] * B[8]
  1416. movq 64(%rcx), %rax
  1417. mulq 72(%rsi)
  1418. addq %rax, %r10
  1419. adcq %rdx, %r8
  1420. adcq $0x00, %r9
  1421. # A[10] * B[7]
  1422. movq 56(%rcx), %rax
  1423. mulq 80(%rsi)
  1424. addq %rax, %r10
  1425. adcq %rdx, %r8
  1426. adcq $0x00, %r9
  1427. # A[11] * B[6]
  1428. movq 48(%rcx), %rax
  1429. mulq 88(%rsi)
  1430. addq %rax, %r10
  1431. adcq %rdx, %r8
  1432. adcq $0x00, %r9
  1433. # A[12] * B[5]
  1434. movq 40(%rcx), %rax
  1435. mulq 96(%rsi)
  1436. addq %rax, %r10
  1437. adcq %rdx, %r8
  1438. adcq $0x00, %r9
  1439. # A[13] * B[4]
  1440. movq 32(%rcx), %rax
  1441. mulq 104(%rsi)
  1442. addq %rax, %r10
  1443. adcq %rdx, %r8
  1444. adcq $0x00, %r9
  1445. # A[14] * B[3]
  1446. movq 24(%rcx), %rax
  1447. mulq 112(%rsi)
  1448. addq %rax, %r10
  1449. adcq %rdx, %r8
  1450. adcq $0x00, %r9
  1451. # A[15] * B[2]
  1452. movq 16(%rcx), %rax
  1453. mulq 120(%rsi)
  1454. addq %rax, %r10
  1455. adcq %rdx, %r8
  1456. adcq $0x00, %r9
  1457. movq %r10, 136(%rdi)
  1458. # A[3] * B[15]
  1459. movq 120(%rcx), %rax
  1460. mulq 24(%rsi)
  1461. xorq %r10, %r10
  1462. addq %rax, %r8
  1463. adcq %rdx, %r9
  1464. adcq $0x00, %r10
  1465. # A[4] * B[14]
  1466. movq 112(%rcx), %rax
  1467. mulq 32(%rsi)
  1468. addq %rax, %r8
  1469. adcq %rdx, %r9
  1470. adcq $0x00, %r10
  1471. # A[5] * B[13]
  1472. movq 104(%rcx), %rax
  1473. mulq 40(%rsi)
  1474. addq %rax, %r8
  1475. adcq %rdx, %r9
  1476. adcq $0x00, %r10
  1477. # A[6] * B[12]
  1478. movq 96(%rcx), %rax
  1479. mulq 48(%rsi)
  1480. addq %rax, %r8
  1481. adcq %rdx, %r9
  1482. adcq $0x00, %r10
  1483. # A[7] * B[11]
  1484. movq 88(%rcx), %rax
  1485. mulq 56(%rsi)
  1486. addq %rax, %r8
  1487. adcq %rdx, %r9
  1488. adcq $0x00, %r10
  1489. # A[8] * B[10]
  1490. movq 80(%rcx), %rax
  1491. mulq 64(%rsi)
  1492. addq %rax, %r8
  1493. adcq %rdx, %r9
  1494. adcq $0x00, %r10
  1495. # A[9] * B[9]
  1496. movq 72(%rcx), %rax
  1497. mulq 72(%rsi)
  1498. addq %rax, %r8
  1499. adcq %rdx, %r9
  1500. adcq $0x00, %r10
  1501. # A[10] * B[8]
  1502. movq 64(%rcx), %rax
  1503. mulq 80(%rsi)
  1504. addq %rax, %r8
  1505. adcq %rdx, %r9
  1506. adcq $0x00, %r10
  1507. # A[11] * B[7]
  1508. movq 56(%rcx), %rax
  1509. mulq 88(%rsi)
  1510. addq %rax, %r8
  1511. adcq %rdx, %r9
  1512. adcq $0x00, %r10
  1513. # A[12] * B[6]
  1514. movq 48(%rcx), %rax
  1515. mulq 96(%rsi)
  1516. addq %rax, %r8
  1517. adcq %rdx, %r9
  1518. adcq $0x00, %r10
  1519. # A[13] * B[5]
  1520. movq 40(%rcx), %rax
  1521. mulq 104(%rsi)
  1522. addq %rax, %r8
  1523. adcq %rdx, %r9
  1524. adcq $0x00, %r10
  1525. # A[14] * B[4]
  1526. movq 32(%rcx), %rax
  1527. mulq 112(%rsi)
  1528. addq %rax, %r8
  1529. adcq %rdx, %r9
  1530. adcq $0x00, %r10
  1531. # A[15] * B[3]
  1532. movq 24(%rcx), %rax
  1533. mulq 120(%rsi)
  1534. addq %rax, %r8
  1535. adcq %rdx, %r9
  1536. adcq $0x00, %r10
  1537. movq %r8, 144(%rdi)
  1538. # A[4] * B[15]
  1539. movq 120(%rcx), %rax
  1540. mulq 32(%rsi)
  1541. xorq %r8, %r8
  1542. addq %rax, %r9
  1543. adcq %rdx, %r10
  1544. adcq $0x00, %r8
  1545. # A[5] * B[14]
  1546. movq 112(%rcx), %rax
  1547. mulq 40(%rsi)
  1548. addq %rax, %r9
  1549. adcq %rdx, %r10
  1550. adcq $0x00, %r8
  1551. # A[6] * B[13]
  1552. movq 104(%rcx), %rax
  1553. mulq 48(%rsi)
  1554. addq %rax, %r9
  1555. adcq %rdx, %r10
  1556. adcq $0x00, %r8
  1557. # A[7] * B[12]
  1558. movq 96(%rcx), %rax
  1559. mulq 56(%rsi)
  1560. addq %rax, %r9
  1561. adcq %rdx, %r10
  1562. adcq $0x00, %r8
  1563. # A[8] * B[11]
  1564. movq 88(%rcx), %rax
  1565. mulq 64(%rsi)
  1566. addq %rax, %r9
  1567. adcq %rdx, %r10
  1568. adcq $0x00, %r8
  1569. # A[9] * B[10]
  1570. movq 80(%rcx), %rax
  1571. mulq 72(%rsi)
  1572. addq %rax, %r9
  1573. adcq %rdx, %r10
  1574. adcq $0x00, %r8
  1575. # A[10] * B[9]
  1576. movq 72(%rcx), %rax
  1577. mulq 80(%rsi)
  1578. addq %rax, %r9
  1579. adcq %rdx, %r10
  1580. adcq $0x00, %r8
  1581. # A[11] * B[8]
  1582. movq 64(%rcx), %rax
  1583. mulq 88(%rsi)
  1584. addq %rax, %r9
  1585. adcq %rdx, %r10
  1586. adcq $0x00, %r8
  1587. # A[12] * B[7]
  1588. movq 56(%rcx), %rax
  1589. mulq 96(%rsi)
  1590. addq %rax, %r9
  1591. adcq %rdx, %r10
  1592. adcq $0x00, %r8
  1593. # A[13] * B[6]
  1594. movq 48(%rcx), %rax
  1595. mulq 104(%rsi)
  1596. addq %rax, %r9
  1597. adcq %rdx, %r10
  1598. adcq $0x00, %r8
  1599. # A[14] * B[5]
  1600. movq 40(%rcx), %rax
  1601. mulq 112(%rsi)
  1602. addq %rax, %r9
  1603. adcq %rdx, %r10
  1604. adcq $0x00, %r8
  1605. # A[15] * B[4]
  1606. movq 32(%rcx), %rax
  1607. mulq 120(%rsi)
  1608. addq %rax, %r9
  1609. adcq %rdx, %r10
  1610. adcq $0x00, %r8
  1611. movq %r9, 152(%rdi)
  1612. # A[5] * B[15]
  1613. movq 120(%rcx), %rax
  1614. mulq 40(%rsi)
  1615. xorq %r9, %r9
  1616. addq %rax, %r10
  1617. adcq %rdx, %r8
  1618. adcq $0x00, %r9
  1619. # A[6] * B[14]
  1620. movq 112(%rcx), %rax
  1621. mulq 48(%rsi)
  1622. addq %rax, %r10
  1623. adcq %rdx, %r8
  1624. adcq $0x00, %r9
  1625. # A[7] * B[13]
  1626. movq 104(%rcx), %rax
  1627. mulq 56(%rsi)
  1628. addq %rax, %r10
  1629. adcq %rdx, %r8
  1630. adcq $0x00, %r9
  1631. # A[8] * B[12]
  1632. movq 96(%rcx), %rax
  1633. mulq 64(%rsi)
  1634. addq %rax, %r10
  1635. adcq %rdx, %r8
  1636. adcq $0x00, %r9
  1637. # A[9] * B[11]
  1638. movq 88(%rcx), %rax
  1639. mulq 72(%rsi)
  1640. addq %rax, %r10
  1641. adcq %rdx, %r8
  1642. adcq $0x00, %r9
  1643. # A[10] * B[10]
  1644. movq 80(%rcx), %rax
  1645. mulq 80(%rsi)
  1646. addq %rax, %r10
  1647. adcq %rdx, %r8
  1648. adcq $0x00, %r9
  1649. # A[11] * B[9]
  1650. movq 72(%rcx), %rax
  1651. mulq 88(%rsi)
  1652. addq %rax, %r10
  1653. adcq %rdx, %r8
  1654. adcq $0x00, %r9
  1655. # A[12] * B[8]
  1656. movq 64(%rcx), %rax
  1657. mulq 96(%rsi)
  1658. addq %rax, %r10
  1659. adcq %rdx, %r8
  1660. adcq $0x00, %r9
  1661. # A[13] * B[7]
  1662. movq 56(%rcx), %rax
  1663. mulq 104(%rsi)
  1664. addq %rax, %r10
  1665. adcq %rdx, %r8
  1666. adcq $0x00, %r9
  1667. # A[14] * B[6]
  1668. movq 48(%rcx), %rax
  1669. mulq 112(%rsi)
  1670. addq %rax, %r10
  1671. adcq %rdx, %r8
  1672. adcq $0x00, %r9
  1673. # A[15] * B[5]
  1674. movq 40(%rcx), %rax
  1675. mulq 120(%rsi)
  1676. addq %rax, %r10
  1677. adcq %rdx, %r8
  1678. adcq $0x00, %r9
  1679. movq %r10, 160(%rdi)
  1680. # A[6] * B[15]
  1681. movq 120(%rcx), %rax
  1682. mulq 48(%rsi)
  1683. xorq %r10, %r10
  1684. addq %rax, %r8
  1685. adcq %rdx, %r9
  1686. adcq $0x00, %r10
  1687. # A[7] * B[14]
  1688. movq 112(%rcx), %rax
  1689. mulq 56(%rsi)
  1690. addq %rax, %r8
  1691. adcq %rdx, %r9
  1692. adcq $0x00, %r10
  1693. # A[8] * B[13]
  1694. movq 104(%rcx), %rax
  1695. mulq 64(%rsi)
  1696. addq %rax, %r8
  1697. adcq %rdx, %r9
  1698. adcq $0x00, %r10
  1699. # A[9] * B[12]
  1700. movq 96(%rcx), %rax
  1701. mulq 72(%rsi)
  1702. addq %rax, %r8
  1703. adcq %rdx, %r9
  1704. adcq $0x00, %r10
  1705. # A[10] * B[11]
  1706. movq 88(%rcx), %rax
  1707. mulq 80(%rsi)
  1708. addq %rax, %r8
  1709. adcq %rdx, %r9
  1710. adcq $0x00, %r10
  1711. # A[11] * B[10]
  1712. movq 80(%rcx), %rax
  1713. mulq 88(%rsi)
  1714. addq %rax, %r8
  1715. adcq %rdx, %r9
  1716. adcq $0x00, %r10
  1717. # A[12] * B[9]
  1718. movq 72(%rcx), %rax
  1719. mulq 96(%rsi)
  1720. addq %rax, %r8
  1721. adcq %rdx, %r9
  1722. adcq $0x00, %r10
  1723. # A[13] * B[8]
  1724. movq 64(%rcx), %rax
  1725. mulq 104(%rsi)
  1726. addq %rax, %r8
  1727. adcq %rdx, %r9
  1728. adcq $0x00, %r10
  1729. # A[14] * B[7]
  1730. movq 56(%rcx), %rax
  1731. mulq 112(%rsi)
  1732. addq %rax, %r8
  1733. adcq %rdx, %r9
  1734. adcq $0x00, %r10
  1735. # A[15] * B[6]
  1736. movq 48(%rcx), %rax
  1737. mulq 120(%rsi)
  1738. addq %rax, %r8
  1739. adcq %rdx, %r9
  1740. adcq $0x00, %r10
  1741. movq %r8, 168(%rdi)
  1742. # A[7] * B[15]
  1743. movq 120(%rcx), %rax
  1744. mulq 56(%rsi)
  1745. xorq %r8, %r8
  1746. addq %rax, %r9
  1747. adcq %rdx, %r10
  1748. adcq $0x00, %r8
  1749. # A[8] * B[14]
  1750. movq 112(%rcx), %rax
  1751. mulq 64(%rsi)
  1752. addq %rax, %r9
  1753. adcq %rdx, %r10
  1754. adcq $0x00, %r8
  1755. # A[9] * B[13]
  1756. movq 104(%rcx), %rax
  1757. mulq 72(%rsi)
  1758. addq %rax, %r9
  1759. adcq %rdx, %r10
  1760. adcq $0x00, %r8
  1761. # A[10] * B[12]
  1762. movq 96(%rcx), %rax
  1763. mulq 80(%rsi)
  1764. addq %rax, %r9
  1765. adcq %rdx, %r10
  1766. adcq $0x00, %r8
  1767. # A[11] * B[11]
  1768. movq 88(%rcx), %rax
  1769. mulq 88(%rsi)
  1770. addq %rax, %r9
  1771. adcq %rdx, %r10
  1772. adcq $0x00, %r8
  1773. # A[12] * B[10]
  1774. movq 80(%rcx), %rax
  1775. mulq 96(%rsi)
  1776. addq %rax, %r9
  1777. adcq %rdx, %r10
  1778. adcq $0x00, %r8
  1779. # A[13] * B[9]
  1780. movq 72(%rcx), %rax
  1781. mulq 104(%rsi)
  1782. addq %rax, %r9
  1783. adcq %rdx, %r10
  1784. adcq $0x00, %r8
  1785. # A[14] * B[8]
  1786. movq 64(%rcx), %rax
  1787. mulq 112(%rsi)
  1788. addq %rax, %r9
  1789. adcq %rdx, %r10
  1790. adcq $0x00, %r8
  1791. # A[15] * B[7]
  1792. movq 56(%rcx), %rax
  1793. mulq 120(%rsi)
  1794. addq %rax, %r9
  1795. adcq %rdx, %r10
  1796. adcq $0x00, %r8
  1797. movq %r9, 176(%rdi)
  1798. # A[8] * B[15]
  1799. movq 120(%rcx), %rax
  1800. mulq 64(%rsi)
  1801. xorq %r9, %r9
  1802. addq %rax, %r10
  1803. adcq %rdx, %r8
  1804. adcq $0x00, %r9
  1805. # A[9] * B[14]
  1806. movq 112(%rcx), %rax
  1807. mulq 72(%rsi)
  1808. addq %rax, %r10
  1809. adcq %rdx, %r8
  1810. adcq $0x00, %r9
  1811. # A[10] * B[13]
  1812. movq 104(%rcx), %rax
  1813. mulq 80(%rsi)
  1814. addq %rax, %r10
  1815. adcq %rdx, %r8
  1816. adcq $0x00, %r9
  1817. # A[11] * B[12]
  1818. movq 96(%rcx), %rax
  1819. mulq 88(%rsi)
  1820. addq %rax, %r10
  1821. adcq %rdx, %r8
  1822. adcq $0x00, %r9
  1823. # A[12] * B[11]
  1824. movq 88(%rcx), %rax
  1825. mulq 96(%rsi)
  1826. addq %rax, %r10
  1827. adcq %rdx, %r8
  1828. adcq $0x00, %r9
  1829. # A[13] * B[10]
  1830. movq 80(%rcx), %rax
  1831. mulq 104(%rsi)
  1832. addq %rax, %r10
  1833. adcq %rdx, %r8
  1834. adcq $0x00, %r9
  1835. # A[14] * B[9]
  1836. movq 72(%rcx), %rax
  1837. mulq 112(%rsi)
  1838. addq %rax, %r10
  1839. adcq %rdx, %r8
  1840. adcq $0x00, %r9
  1841. # A[15] * B[8]
  1842. movq 64(%rcx), %rax
  1843. mulq 120(%rsi)
  1844. addq %rax, %r10
  1845. adcq %rdx, %r8
  1846. adcq $0x00, %r9
  1847. movq %r10, 184(%rdi)
  1848. # A[9] * B[15]
  1849. movq 120(%rcx), %rax
  1850. mulq 72(%rsi)
  1851. xorq %r10, %r10
  1852. addq %rax, %r8
  1853. adcq %rdx, %r9
  1854. adcq $0x00, %r10
  1855. # A[10] * B[14]
  1856. movq 112(%rcx), %rax
  1857. mulq 80(%rsi)
  1858. addq %rax, %r8
  1859. adcq %rdx, %r9
  1860. adcq $0x00, %r10
  1861. # A[11] * B[13]
  1862. movq 104(%rcx), %rax
  1863. mulq 88(%rsi)
  1864. addq %rax, %r8
  1865. adcq %rdx, %r9
  1866. adcq $0x00, %r10
  1867. # A[12] * B[12]
  1868. movq 96(%rcx), %rax
  1869. mulq 96(%rsi)
  1870. addq %rax, %r8
  1871. adcq %rdx, %r9
  1872. adcq $0x00, %r10
  1873. # A[13] * B[11]
  1874. movq 88(%rcx), %rax
  1875. mulq 104(%rsi)
  1876. addq %rax, %r8
  1877. adcq %rdx, %r9
  1878. adcq $0x00, %r10
  1879. # A[14] * B[10]
  1880. movq 80(%rcx), %rax
  1881. mulq 112(%rsi)
  1882. addq %rax, %r8
  1883. adcq %rdx, %r9
  1884. adcq $0x00, %r10
  1885. # A[15] * B[9]
  1886. movq 72(%rcx), %rax
  1887. mulq 120(%rsi)
  1888. addq %rax, %r8
  1889. adcq %rdx, %r9
  1890. adcq $0x00, %r10
  1891. movq %r8, 192(%rdi)
  1892. # A[10] * B[15]
  1893. movq 120(%rcx), %rax
  1894. mulq 80(%rsi)
  1895. xorq %r8, %r8
  1896. addq %rax, %r9
  1897. adcq %rdx, %r10
  1898. adcq $0x00, %r8
  1899. # A[11] * B[14]
  1900. movq 112(%rcx), %rax
  1901. mulq 88(%rsi)
  1902. addq %rax, %r9
  1903. adcq %rdx, %r10
  1904. adcq $0x00, %r8
  1905. # A[12] * B[13]
  1906. movq 104(%rcx), %rax
  1907. mulq 96(%rsi)
  1908. addq %rax, %r9
  1909. adcq %rdx, %r10
  1910. adcq $0x00, %r8
  1911. # A[13] * B[12]
  1912. movq 96(%rcx), %rax
  1913. mulq 104(%rsi)
  1914. addq %rax, %r9
  1915. adcq %rdx, %r10
  1916. adcq $0x00, %r8
  1917. # A[14] * B[11]
  1918. movq 88(%rcx), %rax
  1919. mulq 112(%rsi)
  1920. addq %rax, %r9
  1921. adcq %rdx, %r10
  1922. adcq $0x00, %r8
  1923. # A[15] * B[10]
  1924. movq 80(%rcx), %rax
  1925. mulq 120(%rsi)
  1926. addq %rax, %r9
  1927. adcq %rdx, %r10
  1928. adcq $0x00, %r8
  1929. movq %r9, 200(%rdi)
  1930. # A[11] * B[15]
  1931. movq 120(%rcx), %rax
  1932. mulq 88(%rsi)
  1933. xorq %r9, %r9
  1934. addq %rax, %r10
  1935. adcq %rdx, %r8
  1936. adcq $0x00, %r9
  1937. # A[12] * B[14]
  1938. movq 112(%rcx), %rax
  1939. mulq 96(%rsi)
  1940. addq %rax, %r10
  1941. adcq %rdx, %r8
  1942. adcq $0x00, %r9
  1943. # A[13] * B[13]
  1944. movq 104(%rcx), %rax
  1945. mulq 104(%rsi)
  1946. addq %rax, %r10
  1947. adcq %rdx, %r8
  1948. adcq $0x00, %r9
  1949. # A[14] * B[12]
  1950. movq 96(%rcx), %rax
  1951. mulq 112(%rsi)
  1952. addq %rax, %r10
  1953. adcq %rdx, %r8
  1954. adcq $0x00, %r9
  1955. # A[15] * B[11]
  1956. movq 88(%rcx), %rax
  1957. mulq 120(%rsi)
  1958. addq %rax, %r10
  1959. adcq %rdx, %r8
  1960. adcq $0x00, %r9
  1961. movq %r10, 208(%rdi)
  1962. # A[12] * B[15]
  1963. movq 120(%rcx), %rax
  1964. mulq 96(%rsi)
  1965. xorq %r10, %r10
  1966. addq %rax, %r8
  1967. adcq %rdx, %r9
  1968. adcq $0x00, %r10
  1969. # A[13] * B[14]
  1970. movq 112(%rcx), %rax
  1971. mulq 104(%rsi)
  1972. addq %rax, %r8
  1973. adcq %rdx, %r9
  1974. adcq $0x00, %r10
  1975. # A[14] * B[13]
  1976. movq 104(%rcx), %rax
  1977. mulq 112(%rsi)
  1978. addq %rax, %r8
  1979. adcq %rdx, %r9
  1980. adcq $0x00, %r10
  1981. # A[15] * B[12]
  1982. movq 96(%rcx), %rax
  1983. mulq 120(%rsi)
  1984. addq %rax, %r8
  1985. adcq %rdx, %r9
  1986. adcq $0x00, %r10
  1987. movq %r8, 216(%rdi)
  1988. # A[13] * B[15]
  1989. movq 120(%rcx), %rax
  1990. mulq 104(%rsi)
  1991. xorq %r8, %r8
  1992. addq %rax, %r9
  1993. adcq %rdx, %r10
  1994. adcq $0x00, %r8
  1995. # A[14] * B[14]
  1996. movq 112(%rcx), %rax
  1997. mulq 112(%rsi)
  1998. addq %rax, %r9
  1999. adcq %rdx, %r10
  2000. adcq $0x00, %r8
  2001. # A[15] * B[13]
  2002. movq 104(%rcx), %rax
  2003. mulq 120(%rsi)
  2004. addq %rax, %r9
  2005. adcq %rdx, %r10
  2006. adcq $0x00, %r8
  2007. movq %r9, 224(%rdi)
  2008. # A[14] * B[15]
  2009. movq 120(%rcx), %rax
  2010. mulq 112(%rsi)
  2011. xorq %r9, %r9
  2012. addq %rax, %r10
  2013. adcq %rdx, %r8
  2014. adcq $0x00, %r9
  2015. # A[15] * B[14]
  2016. movq 112(%rcx), %rax
  2017. mulq 120(%rsi)
  2018. addq %rax, %r10
  2019. adcq %rdx, %r8
  2020. adcq $0x00, %r9
  2021. movq %r10, 232(%rdi)
  2022. # A[15] * B[15]
  2023. movq 120(%rcx), %rax
  2024. mulq 120(%rsi)
  2025. addq %rax, %r8
  2026. adcq %rdx, %r9
  2027. movq %r8, 240(%rdi)
  2028. movq %r9, 248(%rdi)
  2029. movq (%rsp), %rax
  2030. movq 8(%rsp), %rdx
  2031. movq 16(%rsp), %r8
  2032. movq 24(%rsp), %r9
  2033. movq %rax, (%rdi)
  2034. movq %rdx, 8(%rdi)
  2035. movq %r8, 16(%rdi)
  2036. movq %r9, 24(%rdi)
  2037. movq 32(%rsp), %rax
  2038. movq 40(%rsp), %rdx
  2039. movq 48(%rsp), %r8
  2040. movq 56(%rsp), %r9
  2041. movq %rax, 32(%rdi)
  2042. movq %rdx, 40(%rdi)
  2043. movq %r8, 48(%rdi)
  2044. movq %r9, 56(%rdi)
  2045. movq 64(%rsp), %rax
  2046. movq 72(%rsp), %rdx
  2047. movq 80(%rsp), %r8
  2048. movq 88(%rsp), %r9
  2049. movq %rax, 64(%rdi)
  2050. movq %rdx, 72(%rdi)
  2051. movq %r8, 80(%rdi)
  2052. movq %r9, 88(%rdi)
  2053. movq 96(%rsp), %rax
  2054. movq 104(%rsp), %rdx
  2055. movq 112(%rsp), %r8
  2056. movq 120(%rsp), %r9
  2057. movq %rax, 96(%rdi)
  2058. movq %rdx, 104(%rdi)
  2059. movq %r8, 112(%rdi)
  2060. movq %r9, 120(%rdi)
  2061. addq $0x80, %rsp
  2062. repz retq
  2063. #ifndef __APPLE__
  2064. .size sp_2048_mul_16,.-sp_2048_mul_16
  2065. #endif /* __APPLE__ */
  2066. /* Square a and put result in r. (r = a * a)
  2067. *
  2068. * r A single precision integer.
  2069. * a A single precision integer.
  2070. */
  2071. #ifndef __APPLE__
  2072. .text
  2073. .globl sp_2048_sqr_16
  2074. .type sp_2048_sqr_16,@function
  2075. .align 16
  2076. sp_2048_sqr_16:
  2077. #else
  2078. .section __TEXT,__text
  2079. .globl _sp_2048_sqr_16
  2080. .p2align 4
  2081. _sp_2048_sqr_16:
  2082. #endif /* __APPLE__ */
  2083. pushq %r12
  2084. subq $0x80, %rsp
  2085. # A[0] * A[0]
  2086. movq (%rsi), %rax
  2087. mulq %rax
  2088. xorq %r9, %r9
  2089. movq %rax, (%rsp)
  2090. movq %rdx, %r8
  2091. # A[0] * A[1]
  2092. movq 8(%rsi), %rax
  2093. mulq (%rsi)
  2094. xorq %rcx, %rcx
  2095. addq %rax, %r8
  2096. adcq %rdx, %r9
  2097. adcq $0x00, %rcx
  2098. addq %rax, %r8
  2099. adcq %rdx, %r9
  2100. adcq $0x00, %rcx
  2101. movq %r8, 8(%rsp)
  2102. # A[0] * A[2]
  2103. movq 16(%rsi), %rax
  2104. mulq (%rsi)
  2105. xorq %r8, %r8
  2106. addq %rax, %r9
  2107. adcq %rdx, %rcx
  2108. adcq $0x00, %r8
  2109. addq %rax, %r9
  2110. adcq %rdx, %rcx
  2111. adcq $0x00, %r8
  2112. # A[1] * A[1]
  2113. movq 8(%rsi), %rax
  2114. mulq %rax
  2115. addq %rax, %r9
  2116. adcq %rdx, %rcx
  2117. adcq $0x00, %r8
  2118. movq %r9, 16(%rsp)
  2119. # A[0] * A[3]
  2120. movq 24(%rsi), %rax
  2121. mulq (%rsi)
  2122. xorq %r9, %r9
  2123. addq %rax, %rcx
  2124. adcq %rdx, %r8
  2125. adcq $0x00, %r9
  2126. addq %rax, %rcx
  2127. adcq %rdx, %r8
  2128. adcq $0x00, %r9
  2129. # A[1] * A[2]
  2130. movq 16(%rsi), %rax
  2131. mulq 8(%rsi)
  2132. addq %rax, %rcx
  2133. adcq %rdx, %r8
  2134. adcq $0x00, %r9
  2135. addq %rax, %rcx
  2136. adcq %rdx, %r8
  2137. adcq $0x00, %r9
  2138. movq %rcx, 24(%rsp)
  2139. # A[0] * A[4]
  2140. movq 32(%rsi), %rax
  2141. mulq (%rsi)
  2142. xorq %rcx, %rcx
  2143. addq %rax, %r8
  2144. adcq %rdx, %r9
  2145. adcq $0x00, %rcx
  2146. addq %rax, %r8
  2147. adcq %rdx, %r9
  2148. adcq $0x00, %rcx
  2149. # A[1] * A[3]
  2150. movq 24(%rsi), %rax
  2151. mulq 8(%rsi)
  2152. addq %rax, %r8
  2153. adcq %rdx, %r9
  2154. adcq $0x00, %rcx
  2155. addq %rax, %r8
  2156. adcq %rdx, %r9
  2157. adcq $0x00, %rcx
  2158. # A[2] * A[2]
  2159. movq 16(%rsi), %rax
  2160. mulq %rax
  2161. addq %rax, %r8
  2162. adcq %rdx, %r9
  2163. adcq $0x00, %rcx
  2164. movq %r8, 32(%rsp)
  2165. # A[0] * A[5]
  2166. movq 40(%rsi), %rax
  2167. mulq (%rsi)
  2168. xorq %r8, %r8
  2169. xorq %r12, %r12
  2170. movq %rax, %r10
  2171. movq %rdx, %r11
  2172. # A[1] * A[4]
  2173. movq 32(%rsi), %rax
  2174. mulq 8(%rsi)
  2175. addq %rax, %r10
  2176. adcq %rdx, %r11
  2177. adcq $0x00, %r12
  2178. # A[2] * A[3]
  2179. movq 24(%rsi), %rax
  2180. mulq 16(%rsi)
  2181. addq %rax, %r10
  2182. adcq %rdx, %r11
  2183. adcq $0x00, %r12
  2184. addq %r10, %r10
  2185. adcq %r11, %r11
  2186. adcq %r12, %r12
  2187. addq %r10, %r9
  2188. adcq %r11, %rcx
  2189. adcq %r12, %r8
  2190. movq %r9, 40(%rsp)
  2191. # A[0] * A[6]
  2192. movq 48(%rsi), %rax
  2193. mulq (%rsi)
  2194. xorq %r9, %r9
  2195. xorq %r12, %r12
  2196. movq %rax, %r10
  2197. movq %rdx, %r11
  2198. # A[1] * A[5]
  2199. movq 40(%rsi), %rax
  2200. mulq 8(%rsi)
  2201. addq %rax, %r10
  2202. adcq %rdx, %r11
  2203. adcq $0x00, %r12
  2204. # A[2] * A[4]
  2205. movq 32(%rsi), %rax
  2206. mulq 16(%rsi)
  2207. addq %rax, %r10
  2208. adcq %rdx, %r11
  2209. adcq $0x00, %r12
  2210. # A[3] * A[3]
  2211. movq 24(%rsi), %rax
  2212. mulq %rax
  2213. addq %r10, %r10
  2214. adcq %r11, %r11
  2215. adcq %r12, %r12
  2216. addq %rax, %r10
  2217. adcq %rdx, %r11
  2218. adcq $0x00, %r12
  2219. addq %r10, %rcx
  2220. adcq %r11, %r8
  2221. adcq %r12, %r9
  2222. movq %rcx, 48(%rsp)
  2223. # A[0] * A[7]
  2224. movq 56(%rsi), %rax
  2225. mulq (%rsi)
  2226. xorq %rcx, %rcx
  2227. xorq %r12, %r12
  2228. movq %rax, %r10
  2229. movq %rdx, %r11
  2230. # A[1] * A[6]
  2231. movq 48(%rsi), %rax
  2232. mulq 8(%rsi)
  2233. addq %rax, %r10
  2234. adcq %rdx, %r11
  2235. adcq $0x00, %r12
  2236. # A[2] * A[5]
  2237. movq 40(%rsi), %rax
  2238. mulq 16(%rsi)
  2239. addq %rax, %r10
  2240. adcq %rdx, %r11
  2241. adcq $0x00, %r12
  2242. # A[3] * A[4]
  2243. movq 32(%rsi), %rax
  2244. mulq 24(%rsi)
  2245. addq %rax, %r10
  2246. adcq %rdx, %r11
  2247. adcq $0x00, %r12
  2248. addq %r10, %r10
  2249. adcq %r11, %r11
  2250. adcq %r12, %r12
  2251. addq %r10, %r8
  2252. adcq %r11, %r9
  2253. adcq %r12, %rcx
  2254. movq %r8, 56(%rsp)
  2255. # A[0] * A[8]
  2256. movq 64(%rsi), %rax
  2257. mulq (%rsi)
  2258. xorq %r8, %r8
  2259. xorq %r12, %r12
  2260. movq %rax, %r10
  2261. movq %rdx, %r11
  2262. # A[1] * A[7]
  2263. movq 56(%rsi), %rax
  2264. mulq 8(%rsi)
  2265. addq %rax, %r10
  2266. adcq %rdx, %r11
  2267. adcq $0x00, %r12
  2268. # A[2] * A[6]
  2269. movq 48(%rsi), %rax
  2270. mulq 16(%rsi)
  2271. addq %rax, %r10
  2272. adcq %rdx, %r11
  2273. adcq $0x00, %r12
  2274. # A[3] * A[5]
  2275. movq 40(%rsi), %rax
  2276. mulq 24(%rsi)
  2277. addq %rax, %r10
  2278. adcq %rdx, %r11
  2279. adcq $0x00, %r12
  2280. # A[4] * A[4]
  2281. movq 32(%rsi), %rax
  2282. mulq %rax
  2283. addq %r10, %r10
  2284. adcq %r11, %r11
  2285. adcq %r12, %r12
  2286. addq %rax, %r10
  2287. adcq %rdx, %r11
  2288. adcq $0x00, %r12
  2289. addq %r10, %r9
  2290. adcq %r11, %rcx
  2291. adcq %r12, %r8
  2292. movq %r9, 64(%rsp)
  2293. # A[0] * A[9]
  2294. movq 72(%rsi), %rax
  2295. mulq (%rsi)
  2296. xorq %r9, %r9
  2297. xorq %r12, %r12
  2298. movq %rax, %r10
  2299. movq %rdx, %r11
  2300. # A[1] * A[8]
  2301. movq 64(%rsi), %rax
  2302. mulq 8(%rsi)
  2303. addq %rax, %r10
  2304. adcq %rdx, %r11
  2305. adcq $0x00, %r12
  2306. # A[2] * A[7]
  2307. movq 56(%rsi), %rax
  2308. mulq 16(%rsi)
  2309. addq %rax, %r10
  2310. adcq %rdx, %r11
  2311. adcq $0x00, %r12
  2312. # A[3] * A[6]
  2313. movq 48(%rsi), %rax
  2314. mulq 24(%rsi)
  2315. addq %rax, %r10
  2316. adcq %rdx, %r11
  2317. adcq $0x00, %r12
  2318. # A[4] * A[5]
  2319. movq 40(%rsi), %rax
  2320. mulq 32(%rsi)
  2321. addq %rax, %r10
  2322. adcq %rdx, %r11
  2323. adcq $0x00, %r12
  2324. addq %r10, %r10
  2325. adcq %r11, %r11
  2326. adcq %r12, %r12
  2327. addq %r10, %rcx
  2328. adcq %r11, %r8
  2329. adcq %r12, %r9
  2330. movq %rcx, 72(%rsp)
  2331. # A[0] * A[10]
  2332. movq 80(%rsi), %rax
  2333. mulq (%rsi)
  2334. xorq %rcx, %rcx
  2335. xorq %r12, %r12
  2336. movq %rax, %r10
  2337. movq %rdx, %r11
  2338. # A[1] * A[9]
  2339. movq 72(%rsi), %rax
  2340. mulq 8(%rsi)
  2341. addq %rax, %r10
  2342. adcq %rdx, %r11
  2343. adcq $0x00, %r12
  2344. # A[2] * A[8]
  2345. movq 64(%rsi), %rax
  2346. mulq 16(%rsi)
  2347. addq %rax, %r10
  2348. adcq %rdx, %r11
  2349. adcq $0x00, %r12
  2350. # A[3] * A[7]
  2351. movq 56(%rsi), %rax
  2352. mulq 24(%rsi)
  2353. addq %rax, %r10
  2354. adcq %rdx, %r11
  2355. adcq $0x00, %r12
  2356. # A[4] * A[6]
  2357. movq 48(%rsi), %rax
  2358. mulq 32(%rsi)
  2359. addq %rax, %r10
  2360. adcq %rdx, %r11
  2361. adcq $0x00, %r12
  2362. # A[5] * A[5]
  2363. movq 40(%rsi), %rax
  2364. mulq %rax
  2365. addq %r10, %r10
  2366. adcq %r11, %r11
  2367. adcq %r12, %r12
  2368. addq %rax, %r10
  2369. adcq %rdx, %r11
  2370. adcq $0x00, %r12
  2371. addq %r10, %r8
  2372. adcq %r11, %r9
  2373. adcq %r12, %rcx
  2374. movq %r8, 80(%rsp)
  2375. # A[0] * A[11]
  2376. movq 88(%rsi), %rax
  2377. mulq (%rsi)
  2378. xorq %r8, %r8
  2379. xorq %r12, %r12
  2380. movq %rax, %r10
  2381. movq %rdx, %r11
  2382. # A[1] * A[10]
  2383. movq 80(%rsi), %rax
  2384. mulq 8(%rsi)
  2385. addq %rax, %r10
  2386. adcq %rdx, %r11
  2387. adcq $0x00, %r12
  2388. # A[2] * A[9]
  2389. movq 72(%rsi), %rax
  2390. mulq 16(%rsi)
  2391. addq %rax, %r10
  2392. adcq %rdx, %r11
  2393. adcq $0x00, %r12
  2394. # A[3] * A[8]
  2395. movq 64(%rsi), %rax
  2396. mulq 24(%rsi)
  2397. addq %rax, %r10
  2398. adcq %rdx, %r11
  2399. adcq $0x00, %r12
  2400. # A[4] * A[7]
  2401. movq 56(%rsi), %rax
  2402. mulq 32(%rsi)
  2403. addq %rax, %r10
  2404. adcq %rdx, %r11
  2405. adcq $0x00, %r12
  2406. # A[5] * A[6]
  2407. movq 48(%rsi), %rax
  2408. mulq 40(%rsi)
  2409. addq %rax, %r10
  2410. adcq %rdx, %r11
  2411. adcq $0x00, %r12
  2412. addq %r10, %r10
  2413. adcq %r11, %r11
  2414. adcq %r12, %r12
  2415. addq %r10, %r9
  2416. adcq %r11, %rcx
  2417. adcq %r12, %r8
  2418. movq %r9, 88(%rsp)
  2419. # A[0] * A[12]
  2420. movq 96(%rsi), %rax
  2421. mulq (%rsi)
  2422. xorq %r9, %r9
  2423. xorq %r12, %r12
  2424. movq %rax, %r10
  2425. movq %rdx, %r11
  2426. # A[1] * A[11]
  2427. movq 88(%rsi), %rax
  2428. mulq 8(%rsi)
  2429. addq %rax, %r10
  2430. adcq %rdx, %r11
  2431. adcq $0x00, %r12
  2432. # A[2] * A[10]
  2433. movq 80(%rsi), %rax
  2434. mulq 16(%rsi)
  2435. addq %rax, %r10
  2436. adcq %rdx, %r11
  2437. adcq $0x00, %r12
  2438. # A[3] * A[9]
  2439. movq 72(%rsi), %rax
  2440. mulq 24(%rsi)
  2441. addq %rax, %r10
  2442. adcq %rdx, %r11
  2443. adcq $0x00, %r12
  2444. # A[4] * A[8]
  2445. movq 64(%rsi), %rax
  2446. mulq 32(%rsi)
  2447. addq %rax, %r10
  2448. adcq %rdx, %r11
  2449. adcq $0x00, %r12
  2450. # A[5] * A[7]
  2451. movq 56(%rsi), %rax
  2452. mulq 40(%rsi)
  2453. addq %rax, %r10
  2454. adcq %rdx, %r11
  2455. adcq $0x00, %r12
  2456. # A[6] * A[6]
  2457. movq 48(%rsi), %rax
  2458. mulq %rax
  2459. addq %r10, %r10
  2460. adcq %r11, %r11
  2461. adcq %r12, %r12
  2462. addq %rax, %r10
  2463. adcq %rdx, %r11
  2464. adcq $0x00, %r12
  2465. addq %r10, %rcx
  2466. adcq %r11, %r8
  2467. adcq %r12, %r9
  2468. movq %rcx, 96(%rsp)
  2469. # A[0] * A[13]
  2470. movq 104(%rsi), %rax
  2471. mulq (%rsi)
  2472. xorq %rcx, %rcx
  2473. xorq %r12, %r12
  2474. movq %rax, %r10
  2475. movq %rdx, %r11
  2476. # A[1] * A[12]
  2477. movq 96(%rsi), %rax
  2478. mulq 8(%rsi)
  2479. addq %rax, %r10
  2480. adcq %rdx, %r11
  2481. adcq $0x00, %r12
  2482. # A[2] * A[11]
  2483. movq 88(%rsi), %rax
  2484. mulq 16(%rsi)
  2485. addq %rax, %r10
  2486. adcq %rdx, %r11
  2487. adcq $0x00, %r12
  2488. # A[3] * A[10]
  2489. movq 80(%rsi), %rax
  2490. mulq 24(%rsi)
  2491. addq %rax, %r10
  2492. adcq %rdx, %r11
  2493. adcq $0x00, %r12
  2494. # A[4] * A[9]
  2495. movq 72(%rsi), %rax
  2496. mulq 32(%rsi)
  2497. addq %rax, %r10
  2498. adcq %rdx, %r11
  2499. adcq $0x00, %r12
  2500. # A[5] * A[8]
  2501. movq 64(%rsi), %rax
  2502. mulq 40(%rsi)
  2503. addq %rax, %r10
  2504. adcq %rdx, %r11
  2505. adcq $0x00, %r12
  2506. # A[6] * A[7]
  2507. movq 56(%rsi), %rax
  2508. mulq 48(%rsi)
  2509. addq %rax, %r10
  2510. adcq %rdx, %r11
  2511. adcq $0x00, %r12
  2512. addq %r10, %r10
  2513. adcq %r11, %r11
  2514. adcq %r12, %r12
  2515. addq %r10, %r8
  2516. adcq %r11, %r9
  2517. adcq %r12, %rcx
  2518. movq %r8, 104(%rsp)
  2519. # A[0] * A[14]
  2520. movq 112(%rsi), %rax
  2521. mulq (%rsi)
  2522. xorq %r8, %r8
  2523. xorq %r12, %r12
  2524. movq %rax, %r10
  2525. movq %rdx, %r11
  2526. # A[1] * A[13]
  2527. movq 104(%rsi), %rax
  2528. mulq 8(%rsi)
  2529. addq %rax, %r10
  2530. adcq %rdx, %r11
  2531. adcq $0x00, %r12
  2532. # A[2] * A[12]
  2533. movq 96(%rsi), %rax
  2534. mulq 16(%rsi)
  2535. addq %rax, %r10
  2536. adcq %rdx, %r11
  2537. adcq $0x00, %r12
  2538. # A[3] * A[11]
  2539. movq 88(%rsi), %rax
  2540. mulq 24(%rsi)
  2541. addq %rax, %r10
  2542. adcq %rdx, %r11
  2543. adcq $0x00, %r12
  2544. # A[4] * A[10]
  2545. movq 80(%rsi), %rax
  2546. mulq 32(%rsi)
  2547. addq %rax, %r10
  2548. adcq %rdx, %r11
  2549. adcq $0x00, %r12
  2550. # A[5] * A[9]
  2551. movq 72(%rsi), %rax
  2552. mulq 40(%rsi)
  2553. addq %rax, %r10
  2554. adcq %rdx, %r11
  2555. adcq $0x00, %r12
  2556. # A[6] * A[8]
  2557. movq 64(%rsi), %rax
  2558. mulq 48(%rsi)
  2559. addq %rax, %r10
  2560. adcq %rdx, %r11
  2561. adcq $0x00, %r12
  2562. # A[7] * A[7]
  2563. movq 56(%rsi), %rax
  2564. mulq %rax
  2565. addq %r10, %r10
  2566. adcq %r11, %r11
  2567. adcq %r12, %r12
  2568. addq %rax, %r10
  2569. adcq %rdx, %r11
  2570. adcq $0x00, %r12
  2571. addq %r10, %r9
  2572. adcq %r11, %rcx
  2573. adcq %r12, %r8
  2574. movq %r9, 112(%rsp)
  2575. # A[0] * A[15]
  2576. movq 120(%rsi), %rax
  2577. mulq (%rsi)
  2578. xorq %r9, %r9
  2579. xorq %r12, %r12
  2580. movq %rax, %r10
  2581. movq %rdx, %r11
  2582. # A[1] * A[14]
  2583. movq 112(%rsi), %rax
  2584. mulq 8(%rsi)
  2585. addq %rax, %r10
  2586. adcq %rdx, %r11
  2587. adcq $0x00, %r12
  2588. # A[2] * A[13]
  2589. movq 104(%rsi), %rax
  2590. mulq 16(%rsi)
  2591. addq %rax, %r10
  2592. adcq %rdx, %r11
  2593. adcq $0x00, %r12
  2594. # A[3] * A[12]
  2595. movq 96(%rsi), %rax
  2596. mulq 24(%rsi)
  2597. addq %rax, %r10
  2598. adcq %rdx, %r11
  2599. adcq $0x00, %r12
  2600. # A[4] * A[11]
  2601. movq 88(%rsi), %rax
  2602. mulq 32(%rsi)
  2603. addq %rax, %r10
  2604. adcq %rdx, %r11
  2605. adcq $0x00, %r12
  2606. # A[5] * A[10]
  2607. movq 80(%rsi), %rax
  2608. mulq 40(%rsi)
  2609. addq %rax, %r10
  2610. adcq %rdx, %r11
  2611. adcq $0x00, %r12
  2612. # A[6] * A[9]
  2613. movq 72(%rsi), %rax
  2614. mulq 48(%rsi)
  2615. addq %rax, %r10
  2616. adcq %rdx, %r11
  2617. adcq $0x00, %r12
  2618. # A[7] * A[8]
  2619. movq 64(%rsi), %rax
  2620. mulq 56(%rsi)
  2621. addq %rax, %r10
  2622. adcq %rdx, %r11
  2623. adcq $0x00, %r12
  2624. addq %r10, %r10
  2625. adcq %r11, %r11
  2626. adcq %r12, %r12
  2627. addq %r10, %rcx
  2628. adcq %r11, %r8
  2629. adcq %r12, %r9
  2630. movq %rcx, 120(%rsp)
  2631. # A[1] * A[15]
  2632. movq 120(%rsi), %rax
  2633. mulq 8(%rsi)
  2634. xorq %rcx, %rcx
  2635. xorq %r12, %r12
  2636. movq %rax, %r10
  2637. movq %rdx, %r11
  2638. # A[2] * A[14]
  2639. movq 112(%rsi), %rax
  2640. mulq 16(%rsi)
  2641. addq %rax, %r10
  2642. adcq %rdx, %r11
  2643. adcq $0x00, %r12
  2644. # A[3] * A[13]
  2645. movq 104(%rsi), %rax
  2646. mulq 24(%rsi)
  2647. addq %rax, %r10
  2648. adcq %rdx, %r11
  2649. adcq $0x00, %r12
  2650. # A[4] * A[12]
  2651. movq 96(%rsi), %rax
  2652. mulq 32(%rsi)
  2653. addq %rax, %r10
  2654. adcq %rdx, %r11
  2655. adcq $0x00, %r12
  2656. # A[5] * A[11]
  2657. movq 88(%rsi), %rax
  2658. mulq 40(%rsi)
  2659. addq %rax, %r10
  2660. adcq %rdx, %r11
  2661. adcq $0x00, %r12
  2662. # A[6] * A[10]
  2663. movq 80(%rsi), %rax
  2664. mulq 48(%rsi)
  2665. addq %rax, %r10
  2666. adcq %rdx, %r11
  2667. adcq $0x00, %r12
  2668. # A[7] * A[9]
  2669. movq 72(%rsi), %rax
  2670. mulq 56(%rsi)
  2671. addq %rax, %r10
  2672. adcq %rdx, %r11
  2673. adcq $0x00, %r12
  2674. # A[8] * A[8]
  2675. movq 64(%rsi), %rax
  2676. mulq %rax
  2677. addq %r10, %r10
  2678. adcq %r11, %r11
  2679. adcq %r12, %r12
  2680. addq %rax, %r10
  2681. adcq %rdx, %r11
  2682. adcq $0x00, %r12
  2683. addq %r10, %r8
  2684. adcq %r11, %r9
  2685. adcq %r12, %rcx
  2686. movq %r8, 128(%rdi)
  2687. # A[2] * A[15]
  2688. movq 120(%rsi), %rax
  2689. mulq 16(%rsi)
  2690. xorq %r8, %r8
  2691. xorq %r12, %r12
  2692. movq %rax, %r10
  2693. movq %rdx, %r11
  2694. # A[3] * A[14]
  2695. movq 112(%rsi), %rax
  2696. mulq 24(%rsi)
  2697. addq %rax, %r10
  2698. adcq %rdx, %r11
  2699. adcq $0x00, %r12
  2700. # A[4] * A[13]
  2701. movq 104(%rsi), %rax
  2702. mulq 32(%rsi)
  2703. addq %rax, %r10
  2704. adcq %rdx, %r11
  2705. adcq $0x00, %r12
  2706. # A[5] * A[12]
  2707. movq 96(%rsi), %rax
  2708. mulq 40(%rsi)
  2709. addq %rax, %r10
  2710. adcq %rdx, %r11
  2711. adcq $0x00, %r12
  2712. # A[6] * A[11]
  2713. movq 88(%rsi), %rax
  2714. mulq 48(%rsi)
  2715. addq %rax, %r10
  2716. adcq %rdx, %r11
  2717. adcq $0x00, %r12
  2718. # A[7] * A[10]
  2719. movq 80(%rsi), %rax
  2720. mulq 56(%rsi)
  2721. addq %rax, %r10
  2722. adcq %rdx, %r11
  2723. adcq $0x00, %r12
  2724. # A[8] * A[9]
  2725. movq 72(%rsi), %rax
  2726. mulq 64(%rsi)
  2727. addq %rax, %r10
  2728. adcq %rdx, %r11
  2729. adcq $0x00, %r12
  2730. addq %r10, %r10
  2731. adcq %r11, %r11
  2732. adcq %r12, %r12
  2733. addq %r10, %r9
  2734. adcq %r11, %rcx
  2735. adcq %r12, %r8
  2736. movq %r9, 136(%rdi)
  2737. # A[3] * A[15]
  2738. movq 120(%rsi), %rax
  2739. mulq 24(%rsi)
  2740. xorq %r9, %r9
  2741. xorq %r12, %r12
  2742. movq %rax, %r10
  2743. movq %rdx, %r11
  2744. # A[4] * A[14]
  2745. movq 112(%rsi), %rax
  2746. mulq 32(%rsi)
  2747. addq %rax, %r10
  2748. adcq %rdx, %r11
  2749. adcq $0x00, %r12
  2750. # A[5] * A[13]
  2751. movq 104(%rsi), %rax
  2752. mulq 40(%rsi)
  2753. addq %rax, %r10
  2754. adcq %rdx, %r11
  2755. adcq $0x00, %r12
  2756. # A[6] * A[12]
  2757. movq 96(%rsi), %rax
  2758. mulq 48(%rsi)
  2759. addq %rax, %r10
  2760. adcq %rdx, %r11
  2761. adcq $0x00, %r12
  2762. # A[7] * A[11]
  2763. movq 88(%rsi), %rax
  2764. mulq 56(%rsi)
  2765. addq %rax, %r10
  2766. adcq %rdx, %r11
  2767. adcq $0x00, %r12
  2768. # A[8] * A[10]
  2769. movq 80(%rsi), %rax
  2770. mulq 64(%rsi)
  2771. addq %rax, %r10
  2772. adcq %rdx, %r11
  2773. adcq $0x00, %r12
  2774. # A[9] * A[9]
  2775. movq 72(%rsi), %rax
  2776. mulq %rax
  2777. addq %r10, %r10
  2778. adcq %r11, %r11
  2779. adcq %r12, %r12
  2780. addq %rax, %r10
  2781. adcq %rdx, %r11
  2782. adcq $0x00, %r12
  2783. addq %r10, %rcx
  2784. adcq %r11, %r8
  2785. adcq %r12, %r9
  2786. movq %rcx, 144(%rdi)
  2787. # A[4] * A[15]
  2788. movq 120(%rsi), %rax
  2789. mulq 32(%rsi)
  2790. xorq %rcx, %rcx
  2791. xorq %r12, %r12
  2792. movq %rax, %r10
  2793. movq %rdx, %r11
  2794. # A[5] * A[14]
  2795. movq 112(%rsi), %rax
  2796. mulq 40(%rsi)
  2797. addq %rax, %r10
  2798. adcq %rdx, %r11
  2799. adcq $0x00, %r12
  2800. # A[6] * A[13]
  2801. movq 104(%rsi), %rax
  2802. mulq 48(%rsi)
  2803. addq %rax, %r10
  2804. adcq %rdx, %r11
  2805. adcq $0x00, %r12
  2806. # A[7] * A[12]
  2807. movq 96(%rsi), %rax
  2808. mulq 56(%rsi)
  2809. addq %rax, %r10
  2810. adcq %rdx, %r11
  2811. adcq $0x00, %r12
  2812. # A[8] * A[11]
  2813. movq 88(%rsi), %rax
  2814. mulq 64(%rsi)
  2815. addq %rax, %r10
  2816. adcq %rdx, %r11
  2817. adcq $0x00, %r12
  2818. # A[9] * A[10]
  2819. movq 80(%rsi), %rax
  2820. mulq 72(%rsi)
  2821. addq %rax, %r10
  2822. adcq %rdx, %r11
  2823. adcq $0x00, %r12
  2824. addq %r10, %r10
  2825. adcq %r11, %r11
  2826. adcq %r12, %r12
  2827. addq %r10, %r8
  2828. adcq %r11, %r9
  2829. adcq %r12, %rcx
  2830. movq %r8, 152(%rdi)
  2831. # A[5] * A[15]
  2832. movq 120(%rsi), %rax
  2833. mulq 40(%rsi)
  2834. xorq %r8, %r8
  2835. xorq %r12, %r12
  2836. movq %rax, %r10
  2837. movq %rdx, %r11
  2838. # A[6] * A[14]
  2839. movq 112(%rsi), %rax
  2840. mulq 48(%rsi)
  2841. addq %rax, %r10
  2842. adcq %rdx, %r11
  2843. adcq $0x00, %r12
  2844. # A[7] * A[13]
  2845. movq 104(%rsi), %rax
  2846. mulq 56(%rsi)
  2847. addq %rax, %r10
  2848. adcq %rdx, %r11
  2849. adcq $0x00, %r12
  2850. # A[8] * A[12]
  2851. movq 96(%rsi), %rax
  2852. mulq 64(%rsi)
  2853. addq %rax, %r10
  2854. adcq %rdx, %r11
  2855. adcq $0x00, %r12
  2856. # A[9] * A[11]
  2857. movq 88(%rsi), %rax
  2858. mulq 72(%rsi)
  2859. addq %rax, %r10
  2860. adcq %rdx, %r11
  2861. adcq $0x00, %r12
  2862. # A[10] * A[10]
  2863. movq 80(%rsi), %rax
  2864. mulq %rax
  2865. addq %r10, %r10
  2866. adcq %r11, %r11
  2867. adcq %r12, %r12
  2868. addq %rax, %r10
  2869. adcq %rdx, %r11
  2870. adcq $0x00, %r12
  2871. addq %r10, %r9
  2872. adcq %r11, %rcx
  2873. adcq %r12, %r8
  2874. movq %r9, 160(%rdi)
  2875. # A[6] * A[15]
  2876. movq 120(%rsi), %rax
  2877. mulq 48(%rsi)
  2878. xorq %r9, %r9
  2879. xorq %r12, %r12
  2880. movq %rax, %r10
  2881. movq %rdx, %r11
  2882. # A[7] * A[14]
  2883. movq 112(%rsi), %rax
  2884. mulq 56(%rsi)
  2885. addq %rax, %r10
  2886. adcq %rdx, %r11
  2887. adcq $0x00, %r12
  2888. # A[8] * A[13]
  2889. movq 104(%rsi), %rax
  2890. mulq 64(%rsi)
  2891. addq %rax, %r10
  2892. adcq %rdx, %r11
  2893. adcq $0x00, %r12
  2894. # A[9] * A[12]
  2895. movq 96(%rsi), %rax
  2896. mulq 72(%rsi)
  2897. addq %rax, %r10
  2898. adcq %rdx, %r11
  2899. adcq $0x00, %r12
  2900. # A[10] * A[11]
  2901. movq 88(%rsi), %rax
  2902. mulq 80(%rsi)
  2903. addq %rax, %r10
  2904. adcq %rdx, %r11
  2905. adcq $0x00, %r12
  2906. addq %r10, %r10
  2907. adcq %r11, %r11
  2908. adcq %r12, %r12
  2909. addq %r10, %rcx
  2910. adcq %r11, %r8
  2911. adcq %r12, %r9
  2912. movq %rcx, 168(%rdi)
  2913. # A[7] * A[15]
  2914. movq 120(%rsi), %rax
  2915. mulq 56(%rsi)
  2916. xorq %rcx, %rcx
  2917. xorq %r12, %r12
  2918. movq %rax, %r10
  2919. movq %rdx, %r11
  2920. # A[8] * A[14]
  2921. movq 112(%rsi), %rax
  2922. mulq 64(%rsi)
  2923. addq %rax, %r10
  2924. adcq %rdx, %r11
  2925. adcq $0x00, %r12
  2926. # A[9] * A[13]
  2927. movq 104(%rsi), %rax
  2928. mulq 72(%rsi)
  2929. addq %rax, %r10
  2930. adcq %rdx, %r11
  2931. adcq $0x00, %r12
  2932. # A[10] * A[12]
  2933. movq 96(%rsi), %rax
  2934. mulq 80(%rsi)
  2935. addq %rax, %r10
  2936. adcq %rdx, %r11
  2937. adcq $0x00, %r12
  2938. # A[11] * A[11]
  2939. movq 88(%rsi), %rax
  2940. mulq %rax
  2941. addq %r10, %r10
  2942. adcq %r11, %r11
  2943. adcq %r12, %r12
  2944. addq %rax, %r10
  2945. adcq %rdx, %r11
  2946. adcq $0x00, %r12
  2947. addq %r10, %r8
  2948. adcq %r11, %r9
  2949. adcq %r12, %rcx
  2950. movq %r8, 176(%rdi)
  2951. # A[8] * A[15]
  2952. movq 120(%rsi), %rax
  2953. mulq 64(%rsi)
  2954. xorq %r8, %r8
  2955. xorq %r12, %r12
  2956. movq %rax, %r10
  2957. movq %rdx, %r11
  2958. # A[9] * A[14]
  2959. movq 112(%rsi), %rax
  2960. mulq 72(%rsi)
  2961. addq %rax, %r10
  2962. adcq %rdx, %r11
  2963. adcq $0x00, %r12
  2964. # A[10] * A[13]
  2965. movq 104(%rsi), %rax
  2966. mulq 80(%rsi)
  2967. addq %rax, %r10
  2968. adcq %rdx, %r11
  2969. adcq $0x00, %r12
  2970. # A[11] * A[12]
  2971. movq 96(%rsi), %rax
  2972. mulq 88(%rsi)
  2973. addq %rax, %r10
  2974. adcq %rdx, %r11
  2975. adcq $0x00, %r12
  2976. addq %r10, %r10
  2977. adcq %r11, %r11
  2978. adcq %r12, %r12
  2979. addq %r10, %r9
  2980. adcq %r11, %rcx
  2981. adcq %r12, %r8
  2982. movq %r9, 184(%rdi)
  2983. # A[9] * A[15]
  2984. movq 120(%rsi), %rax
  2985. mulq 72(%rsi)
  2986. xorq %r9, %r9
  2987. xorq %r12, %r12
  2988. movq %rax, %r10
  2989. movq %rdx, %r11
  2990. # A[10] * A[14]
  2991. movq 112(%rsi), %rax
  2992. mulq 80(%rsi)
  2993. addq %rax, %r10
  2994. adcq %rdx, %r11
  2995. adcq $0x00, %r12
  2996. # A[11] * A[13]
  2997. movq 104(%rsi), %rax
  2998. mulq 88(%rsi)
  2999. addq %rax, %r10
  3000. adcq %rdx, %r11
  3001. adcq $0x00, %r12
  3002. # A[12] * A[12]
  3003. movq 96(%rsi), %rax
  3004. mulq %rax
  3005. addq %r10, %r10
  3006. adcq %r11, %r11
  3007. adcq %r12, %r12
  3008. addq %rax, %r10
  3009. adcq %rdx, %r11
  3010. adcq $0x00, %r12
  3011. addq %r10, %rcx
  3012. adcq %r11, %r8
  3013. adcq %r12, %r9
  3014. movq %rcx, 192(%rdi)
  3015. # A[10] * A[15]
  3016. movq 120(%rsi), %rax
  3017. mulq 80(%rsi)
  3018. xorq %rcx, %rcx
  3019. xorq %r12, %r12
  3020. movq %rax, %r10
  3021. movq %rdx, %r11
  3022. # A[11] * A[14]
  3023. movq 112(%rsi), %rax
  3024. mulq 88(%rsi)
  3025. addq %rax, %r10
  3026. adcq %rdx, %r11
  3027. adcq $0x00, %r12
  3028. # A[12] * A[13]
  3029. movq 104(%rsi), %rax
  3030. mulq 96(%rsi)
  3031. addq %rax, %r10
  3032. adcq %rdx, %r11
  3033. adcq $0x00, %r12
  3034. addq %r10, %r10
  3035. adcq %r11, %r11
  3036. adcq %r12, %r12
  3037. addq %r10, %r8
  3038. adcq %r11, %r9
  3039. adcq %r12, %rcx
  3040. movq %r8, 200(%rdi)
  3041. # A[11] * A[15]
  3042. movq 120(%rsi), %rax
  3043. mulq 88(%rsi)
  3044. xorq %r8, %r8
  3045. addq %rax, %r9
  3046. adcq %rdx, %rcx
  3047. adcq $0x00, %r8
  3048. addq %rax, %r9
  3049. adcq %rdx, %rcx
  3050. adcq $0x00, %r8
  3051. # A[12] * A[14]
  3052. movq 112(%rsi), %rax
  3053. mulq 96(%rsi)
  3054. addq %rax, %r9
  3055. adcq %rdx, %rcx
  3056. adcq $0x00, %r8
  3057. addq %rax, %r9
  3058. adcq %rdx, %rcx
  3059. adcq $0x00, %r8
  3060. # A[13] * A[13]
  3061. movq 104(%rsi), %rax
  3062. mulq %rax
  3063. addq %rax, %r9
  3064. adcq %rdx, %rcx
  3065. adcq $0x00, %r8
  3066. movq %r9, 208(%rdi)
  3067. # A[12] * A[15]
  3068. movq 120(%rsi), %rax
  3069. mulq 96(%rsi)
  3070. xorq %r9, %r9
  3071. addq %rax, %rcx
  3072. adcq %rdx, %r8
  3073. adcq $0x00, %r9
  3074. addq %rax, %rcx
  3075. adcq %rdx, %r8
  3076. adcq $0x00, %r9
  3077. # A[13] * A[14]
  3078. movq 112(%rsi), %rax
  3079. mulq 104(%rsi)
  3080. addq %rax, %rcx
  3081. adcq %rdx, %r8
  3082. adcq $0x00, %r9
  3083. addq %rax, %rcx
  3084. adcq %rdx, %r8
  3085. adcq $0x00, %r9
  3086. movq %rcx, 216(%rdi)
  3087. # A[13] * A[15]
  3088. movq 120(%rsi), %rax
  3089. mulq 104(%rsi)
  3090. xorq %rcx, %rcx
  3091. addq %rax, %r8
  3092. adcq %rdx, %r9
  3093. adcq $0x00, %rcx
  3094. addq %rax, %r8
  3095. adcq %rdx, %r9
  3096. adcq $0x00, %rcx
  3097. # A[14] * A[14]
  3098. movq 112(%rsi), %rax
  3099. mulq %rax
  3100. addq %rax, %r8
  3101. adcq %rdx, %r9
  3102. adcq $0x00, %rcx
  3103. movq %r8, 224(%rdi)
  3104. # A[14] * A[15]
  3105. movq 120(%rsi), %rax
  3106. mulq 112(%rsi)
  3107. xorq %r8, %r8
  3108. addq %rax, %r9
  3109. adcq %rdx, %rcx
  3110. adcq $0x00, %r8
  3111. addq %rax, %r9
  3112. adcq %rdx, %rcx
  3113. adcq $0x00, %r8
  3114. movq %r9, 232(%rdi)
  3115. # A[15] * A[15]
  3116. movq 120(%rsi), %rax
  3117. mulq %rax
  3118. addq %rax, %rcx
  3119. adcq %rdx, %r8
  3120. movq %rcx, 240(%rdi)
  3121. movq %r8, 248(%rdi)
  3122. movq (%rsp), %rax
  3123. movq 8(%rsp), %rdx
  3124. movq 16(%rsp), %r10
  3125. movq 24(%rsp), %r11
  3126. movq %rax, (%rdi)
  3127. movq %rdx, 8(%rdi)
  3128. movq %r10, 16(%rdi)
  3129. movq %r11, 24(%rdi)
  3130. movq 32(%rsp), %rax
  3131. movq 40(%rsp), %rdx
  3132. movq 48(%rsp), %r10
  3133. movq 56(%rsp), %r11
  3134. movq %rax, 32(%rdi)
  3135. movq %rdx, 40(%rdi)
  3136. movq %r10, 48(%rdi)
  3137. movq %r11, 56(%rdi)
  3138. movq 64(%rsp), %rax
  3139. movq 72(%rsp), %rdx
  3140. movq 80(%rsp), %r10
  3141. movq 88(%rsp), %r11
  3142. movq %rax, 64(%rdi)
  3143. movq %rdx, 72(%rdi)
  3144. movq %r10, 80(%rdi)
  3145. movq %r11, 88(%rdi)
  3146. movq 96(%rsp), %rax
  3147. movq 104(%rsp), %rdx
  3148. movq 112(%rsp), %r10
  3149. movq 120(%rsp), %r11
  3150. movq %rax, 96(%rdi)
  3151. movq %rdx, 104(%rdi)
  3152. movq %r10, 112(%rdi)
  3153. movq %r11, 120(%rdi)
  3154. addq $0x80, %rsp
  3155. popq %r12
  3156. repz retq
  3157. #ifndef __APPLE__
  3158. .size sp_2048_sqr_16,.-sp_2048_sqr_16
  3159. #endif /* __APPLE__ */
  3160. #ifdef HAVE_INTEL_AVX2
  3161. /* Multiply a and b into r. (r = a * b)
  3162. *
  3163. * r Result of multiplication.
  3164. * a First number to multiply.
  3165. * b Second number to multiply.
  3166. */
  3167. #ifndef __APPLE__
  3168. .text
  3169. .globl sp_2048_mul_avx2_16
  3170. .type sp_2048_mul_avx2_16,@function
  3171. .align 16
  3172. sp_2048_mul_avx2_16:
  3173. #else
  3174. .section __TEXT,__text
  3175. .globl _sp_2048_mul_avx2_16
  3176. .p2align 4
  3177. _sp_2048_mul_avx2_16:
  3178. #endif /* __APPLE__ */
  3179. pushq %rbx
  3180. pushq %rbp
  3181. pushq %r12
  3182. pushq %r13
  3183. pushq %r14
  3184. movq %rdx, %rbp
  3185. subq $0x80, %rsp
  3186. cmpq %rdi, %rsi
  3187. movq %rsp, %rbx
  3188. cmovne %rdi, %rbx
  3189. cmpq %rdi, %rbp
  3190. cmove %rsp, %rbx
  3191. xorq %r14, %r14
  3192. movq (%rsi), %rdx
  3193. # A[0] * B[0]
  3194. mulx (%rbp), %r8, %r9
  3195. # A[0] * B[1]
  3196. mulx 8(%rbp), %rax, %r10
  3197. movq %r8, (%rbx)
  3198. adcxq %rax, %r9
  3199. # A[0] * B[2]
  3200. mulx 16(%rbp), %rax, %r11
  3201. movq %r9, 8(%rbx)
  3202. adcxq %rax, %r10
  3203. # A[0] * B[3]
  3204. mulx 24(%rbp), %rax, %r12
  3205. movq %r10, 16(%rbx)
  3206. adcxq %rax, %r11
  3207. movq %r11, 24(%rbx)
  3208. # A[0] * B[4]
  3209. mulx 32(%rbp), %rax, %r8
  3210. adcxq %rax, %r12
  3211. # A[0] * B[5]
  3212. mulx 40(%rbp), %rax, %r9
  3213. movq %r12, 32(%rbx)
  3214. adcxq %rax, %r8
  3215. # A[0] * B[6]
  3216. mulx 48(%rbp), %rax, %r10
  3217. movq %r8, 40(%rbx)
  3218. adcxq %rax, %r9
  3219. # A[0] * B[7]
  3220. mulx 56(%rbp), %rax, %r11
  3221. movq %r9, 48(%rbx)
  3222. adcxq %rax, %r10
  3223. movq %r10, 56(%rbx)
  3224. # A[0] * B[8]
  3225. mulx 64(%rbp), %rax, %r12
  3226. adcxq %rax, %r11
  3227. # A[0] * B[9]
  3228. mulx 72(%rbp), %rax, %r8
  3229. movq %r11, 64(%rbx)
  3230. adcxq %rax, %r12
  3231. # A[0] * B[10]
  3232. mulx 80(%rbp), %rax, %r9
  3233. movq %r12, 72(%rbx)
  3234. adcxq %rax, %r8
  3235. # A[0] * B[11]
  3236. mulx 88(%rbp), %rax, %r10
  3237. movq %r8, 80(%rbx)
  3238. adcxq %rax, %r9
  3239. movq %r9, 88(%rbx)
  3240. # A[0] * B[12]
  3241. mulx 96(%rbp), %rax, %r11
  3242. adcxq %rax, %r10
  3243. # A[0] * B[13]
  3244. mulx 104(%rbp), %rax, %r12
  3245. movq %r10, 96(%rbx)
  3246. adcxq %rax, %r11
  3247. # A[0] * B[14]
  3248. mulx 112(%rbp), %rax, %r8
  3249. movq %r11, 104(%rbx)
  3250. adcxq %rax, %r12
  3251. # A[0] * B[15]
  3252. mulx 120(%rbp), %rax, %r9
  3253. movq %r12, 112(%rbx)
  3254. adcxq %rax, %r8
  3255. adcxq %r14, %r9
  3256. movq %r14, %r13
  3257. adcxq %r14, %r13
  3258. movq %r8, 120(%rbx)
  3259. movq %r9, 128(%rdi)
  3260. movq 8(%rsi), %rdx
  3261. movq 8(%rbx), %r9
  3262. movq 16(%rbx), %r10
  3263. movq 24(%rbx), %r11
  3264. movq 32(%rbx), %r12
  3265. movq 40(%rbx), %r8
  3266. # A[1] * B[0]
  3267. mulx (%rbp), %rax, %rcx
  3268. adcxq %rax, %r9
  3269. adoxq %rcx, %r10
  3270. # A[1] * B[1]
  3271. mulx 8(%rbp), %rax, %rcx
  3272. movq %r9, 8(%rbx)
  3273. adcxq %rax, %r10
  3274. adoxq %rcx, %r11
  3275. # A[1] * B[2]
  3276. mulx 16(%rbp), %rax, %rcx
  3277. movq %r10, 16(%rbx)
  3278. adcxq %rax, %r11
  3279. adoxq %rcx, %r12
  3280. # A[1] * B[3]
  3281. mulx 24(%rbp), %rax, %rcx
  3282. movq %r11, 24(%rbx)
  3283. adcxq %rax, %r12
  3284. adoxq %rcx, %r8
  3285. movq %r12, 32(%rbx)
  3286. movq 48(%rbx), %r9
  3287. movq 56(%rbx), %r10
  3288. movq 64(%rbx), %r11
  3289. movq 72(%rbx), %r12
  3290. # A[1] * B[4]
  3291. mulx 32(%rbp), %rax, %rcx
  3292. adcxq %rax, %r8
  3293. adoxq %rcx, %r9
  3294. # A[1] * B[5]
  3295. mulx 40(%rbp), %rax, %rcx
  3296. movq %r8, 40(%rbx)
  3297. adcxq %rax, %r9
  3298. adoxq %rcx, %r10
  3299. # A[1] * B[6]
  3300. mulx 48(%rbp), %rax, %rcx
  3301. movq %r9, 48(%rbx)
  3302. adcxq %rax, %r10
  3303. adoxq %rcx, %r11
  3304. # A[1] * B[7]
  3305. mulx 56(%rbp), %rax, %rcx
  3306. movq %r10, 56(%rbx)
  3307. adcxq %rax, %r11
  3308. adoxq %rcx, %r12
  3309. movq %r11, 64(%rbx)
  3310. movq 80(%rbx), %r8
  3311. movq 88(%rbx), %r9
  3312. movq 96(%rbx), %r10
  3313. movq 104(%rbx), %r11
  3314. # A[1] * B[8]
  3315. mulx 64(%rbp), %rax, %rcx
  3316. adcxq %rax, %r12
  3317. adoxq %rcx, %r8
  3318. # A[1] * B[9]
  3319. mulx 72(%rbp), %rax, %rcx
  3320. movq %r12, 72(%rbx)
  3321. adcxq %rax, %r8
  3322. adoxq %rcx, %r9
  3323. # A[1] * B[10]
  3324. mulx 80(%rbp), %rax, %rcx
  3325. movq %r8, 80(%rbx)
  3326. adcxq %rax, %r9
  3327. adoxq %rcx, %r10
  3328. # A[1] * B[11]
  3329. mulx 88(%rbp), %rax, %rcx
  3330. movq %r9, 88(%rbx)
  3331. adcxq %rax, %r10
  3332. adoxq %rcx, %r11
  3333. movq %r10, 96(%rbx)
  3334. movq 112(%rbx), %r12
  3335. movq 120(%rbx), %r8
  3336. movq 128(%rdi), %r9
  3337. # A[1] * B[12]
  3338. mulx 96(%rbp), %rax, %rcx
  3339. adcxq %rax, %r11
  3340. adoxq %rcx, %r12
  3341. # A[1] * B[13]
  3342. mulx 104(%rbp), %rax, %rcx
  3343. movq %r11, 104(%rbx)
  3344. adcxq %rax, %r12
  3345. adoxq %rcx, %r8
  3346. # A[1] * B[14]
  3347. mulx 112(%rbp), %rax, %rcx
  3348. movq %r12, 112(%rbx)
  3349. adcxq %rax, %r8
  3350. adoxq %rcx, %r9
  3351. # A[1] * B[15]
  3352. mulx 120(%rbp), %rax, %rcx
  3353. movq %r8, 120(%rbx)
  3354. movq %r14, %r10
  3355. adcxq %rax, %r9
  3356. adoxq %rcx, %r10
  3357. adcxq %r13, %r10
  3358. movq %r14, %r13
  3359. adoxq %r14, %r13
  3360. adcxq %r14, %r13
  3361. movq %r9, 128(%rdi)
  3362. movq %r10, 136(%rdi)
  3363. movq 16(%rsi), %rdx
  3364. movq 16(%rbx), %r10
  3365. movq 24(%rbx), %r11
  3366. movq 32(%rbx), %r12
  3367. movq 40(%rbx), %r8
  3368. movq 48(%rbx), %r9
  3369. # A[2] * B[0]
  3370. mulx (%rbp), %rax, %rcx
  3371. adcxq %rax, %r10
  3372. adoxq %rcx, %r11
  3373. # A[2] * B[1]
  3374. mulx 8(%rbp), %rax, %rcx
  3375. movq %r10, 16(%rbx)
  3376. adcxq %rax, %r11
  3377. adoxq %rcx, %r12
  3378. # A[2] * B[2]
  3379. mulx 16(%rbp), %rax, %rcx
  3380. movq %r11, 24(%rbx)
  3381. adcxq %rax, %r12
  3382. adoxq %rcx, %r8
  3383. # A[2] * B[3]
  3384. mulx 24(%rbp), %rax, %rcx
  3385. movq %r12, 32(%rbx)
  3386. adcxq %rax, %r8
  3387. adoxq %rcx, %r9
  3388. movq %r8, 40(%rbx)
  3389. movq 56(%rbx), %r10
  3390. movq 64(%rbx), %r11
  3391. movq 72(%rbx), %r12
  3392. movq 80(%rbx), %r8
  3393. # A[2] * B[4]
  3394. mulx 32(%rbp), %rax, %rcx
  3395. adcxq %rax, %r9
  3396. adoxq %rcx, %r10
  3397. # A[2] * B[5]
  3398. mulx 40(%rbp), %rax, %rcx
  3399. movq %r9, 48(%rbx)
  3400. adcxq %rax, %r10
  3401. adoxq %rcx, %r11
  3402. # A[2] * B[6]
  3403. mulx 48(%rbp), %rax, %rcx
  3404. movq %r10, 56(%rbx)
  3405. adcxq %rax, %r11
  3406. adoxq %rcx, %r12
  3407. # A[2] * B[7]
  3408. mulx 56(%rbp), %rax, %rcx
  3409. movq %r11, 64(%rbx)
  3410. adcxq %rax, %r12
  3411. adoxq %rcx, %r8
  3412. movq %r12, 72(%rbx)
  3413. movq 88(%rbx), %r9
  3414. movq 96(%rbx), %r10
  3415. movq 104(%rbx), %r11
  3416. movq 112(%rbx), %r12
  3417. # A[2] * B[8]
  3418. mulx 64(%rbp), %rax, %rcx
  3419. adcxq %rax, %r8
  3420. adoxq %rcx, %r9
  3421. # A[2] * B[9]
  3422. mulx 72(%rbp), %rax, %rcx
  3423. movq %r8, 80(%rbx)
  3424. adcxq %rax, %r9
  3425. adoxq %rcx, %r10
  3426. # A[2] * B[10]
  3427. mulx 80(%rbp), %rax, %rcx
  3428. movq %r9, 88(%rbx)
  3429. adcxq %rax, %r10
  3430. adoxq %rcx, %r11
  3431. # A[2] * B[11]
  3432. mulx 88(%rbp), %rax, %rcx
  3433. movq %r10, 96(%rbx)
  3434. adcxq %rax, %r11
  3435. adoxq %rcx, %r12
  3436. movq %r11, 104(%rbx)
  3437. movq 120(%rbx), %r8
  3438. movq 128(%rdi), %r9
  3439. movq 136(%rdi), %r10
  3440. # A[2] * B[12]
  3441. mulx 96(%rbp), %rax, %rcx
  3442. adcxq %rax, %r12
  3443. adoxq %rcx, %r8
  3444. # A[2] * B[13]
  3445. mulx 104(%rbp), %rax, %rcx
  3446. movq %r12, 112(%rbx)
  3447. adcxq %rax, %r8
  3448. adoxq %rcx, %r9
  3449. # A[2] * B[14]
  3450. mulx 112(%rbp), %rax, %rcx
  3451. movq %r8, 120(%rbx)
  3452. adcxq %rax, %r9
  3453. adoxq %rcx, %r10
  3454. # A[2] * B[15]
  3455. mulx 120(%rbp), %rax, %rcx
  3456. movq %r9, 128(%rdi)
  3457. movq %r14, %r11
  3458. adcxq %rax, %r10
  3459. adoxq %rcx, %r11
  3460. adcxq %r13, %r11
  3461. movq %r14, %r13
  3462. adoxq %r14, %r13
  3463. adcxq %r14, %r13
  3464. movq %r10, 136(%rdi)
  3465. movq %r11, 144(%rdi)
  3466. movq 24(%rsi), %rdx
  3467. movq 24(%rbx), %r11
  3468. movq 32(%rbx), %r12
  3469. movq 40(%rbx), %r8
  3470. movq 48(%rbx), %r9
  3471. movq 56(%rbx), %r10
  3472. # A[3] * B[0]
  3473. mulx (%rbp), %rax, %rcx
  3474. adcxq %rax, %r11
  3475. adoxq %rcx, %r12
  3476. # A[3] * B[1]
  3477. mulx 8(%rbp), %rax, %rcx
  3478. movq %r11, 24(%rbx)
  3479. adcxq %rax, %r12
  3480. adoxq %rcx, %r8
  3481. # A[3] * B[2]
  3482. mulx 16(%rbp), %rax, %rcx
  3483. movq %r12, 32(%rbx)
  3484. adcxq %rax, %r8
  3485. adoxq %rcx, %r9
  3486. # A[3] * B[3]
  3487. mulx 24(%rbp), %rax, %rcx
  3488. movq %r8, 40(%rbx)
  3489. adcxq %rax, %r9
  3490. adoxq %rcx, %r10
  3491. movq %r9, 48(%rbx)
  3492. movq 64(%rbx), %r11
  3493. movq 72(%rbx), %r12
  3494. movq 80(%rbx), %r8
  3495. movq 88(%rbx), %r9
  3496. # A[3] * B[4]
  3497. mulx 32(%rbp), %rax, %rcx
  3498. adcxq %rax, %r10
  3499. adoxq %rcx, %r11
  3500. # A[3] * B[5]
  3501. mulx 40(%rbp), %rax, %rcx
  3502. movq %r10, 56(%rbx)
  3503. adcxq %rax, %r11
  3504. adoxq %rcx, %r12
  3505. # A[3] * B[6]
  3506. mulx 48(%rbp), %rax, %rcx
  3507. movq %r11, 64(%rbx)
  3508. adcxq %rax, %r12
  3509. adoxq %rcx, %r8
  3510. # A[3] * B[7]
  3511. mulx 56(%rbp), %rax, %rcx
  3512. movq %r12, 72(%rbx)
  3513. adcxq %rax, %r8
  3514. adoxq %rcx, %r9
  3515. movq %r8, 80(%rbx)
  3516. movq 96(%rbx), %r10
  3517. movq 104(%rbx), %r11
  3518. movq 112(%rbx), %r12
  3519. movq 120(%rbx), %r8
  3520. # A[3] * B[8]
  3521. mulx 64(%rbp), %rax, %rcx
  3522. adcxq %rax, %r9
  3523. adoxq %rcx, %r10
  3524. # A[3] * B[9]
  3525. mulx 72(%rbp), %rax, %rcx
  3526. movq %r9, 88(%rbx)
  3527. adcxq %rax, %r10
  3528. adoxq %rcx, %r11
  3529. # A[3] * B[10]
  3530. mulx 80(%rbp), %rax, %rcx
  3531. movq %r10, 96(%rbx)
  3532. adcxq %rax, %r11
  3533. adoxq %rcx, %r12
  3534. # A[3] * B[11]
  3535. mulx 88(%rbp), %rax, %rcx
  3536. movq %r11, 104(%rbx)
  3537. adcxq %rax, %r12
  3538. adoxq %rcx, %r8
  3539. movq %r12, 112(%rbx)
  3540. movq 128(%rdi), %r9
  3541. movq 136(%rdi), %r10
  3542. movq 144(%rdi), %r11
  3543. # A[3] * B[12]
  3544. mulx 96(%rbp), %rax, %rcx
  3545. adcxq %rax, %r8
  3546. adoxq %rcx, %r9
  3547. # A[3] * B[13]
  3548. mulx 104(%rbp), %rax, %rcx
  3549. movq %r8, 120(%rbx)
  3550. adcxq %rax, %r9
  3551. adoxq %rcx, %r10
  3552. # A[3] * B[14]
  3553. mulx 112(%rbp), %rax, %rcx
  3554. movq %r9, 128(%rdi)
  3555. adcxq %rax, %r10
  3556. adoxq %rcx, %r11
  3557. # A[3] * B[15]
  3558. mulx 120(%rbp), %rax, %rcx
  3559. movq %r10, 136(%rdi)
  3560. movq %r14, %r12
  3561. adcxq %rax, %r11
  3562. adoxq %rcx, %r12
  3563. adcxq %r13, %r12
  3564. movq %r14, %r13
  3565. adoxq %r14, %r13
  3566. adcxq %r14, %r13
  3567. movq %r11, 144(%rdi)
  3568. movq %r12, 152(%rdi)
  3569. movq 32(%rsi), %rdx
  3570. movq 32(%rbx), %r12
  3571. movq 40(%rbx), %r8
  3572. movq 48(%rbx), %r9
  3573. movq 56(%rbx), %r10
  3574. movq 64(%rbx), %r11
  3575. # A[4] * B[0]
  3576. mulx (%rbp), %rax, %rcx
  3577. adcxq %rax, %r12
  3578. adoxq %rcx, %r8
  3579. # A[4] * B[1]
  3580. mulx 8(%rbp), %rax, %rcx
  3581. movq %r12, 32(%rbx)
  3582. adcxq %rax, %r8
  3583. adoxq %rcx, %r9
  3584. # A[4] * B[2]
  3585. mulx 16(%rbp), %rax, %rcx
  3586. movq %r8, 40(%rbx)
  3587. adcxq %rax, %r9
  3588. adoxq %rcx, %r10
  3589. # A[4] * B[3]
  3590. mulx 24(%rbp), %rax, %rcx
  3591. movq %r9, 48(%rbx)
  3592. adcxq %rax, %r10
  3593. adoxq %rcx, %r11
  3594. movq %r10, 56(%rbx)
  3595. movq 72(%rbx), %r12
  3596. movq 80(%rbx), %r8
  3597. movq 88(%rbx), %r9
  3598. movq 96(%rbx), %r10
  3599. # A[4] * B[4]
  3600. mulx 32(%rbp), %rax, %rcx
  3601. adcxq %rax, %r11
  3602. adoxq %rcx, %r12
  3603. # A[4] * B[5]
  3604. mulx 40(%rbp), %rax, %rcx
  3605. movq %r11, 64(%rbx)
  3606. adcxq %rax, %r12
  3607. adoxq %rcx, %r8
  3608. # A[4] * B[6]
  3609. mulx 48(%rbp), %rax, %rcx
  3610. movq %r12, 72(%rbx)
  3611. adcxq %rax, %r8
  3612. adoxq %rcx, %r9
  3613. # A[4] * B[7]
  3614. mulx 56(%rbp), %rax, %rcx
  3615. movq %r8, 80(%rbx)
  3616. adcxq %rax, %r9
  3617. adoxq %rcx, %r10
  3618. movq %r9, 88(%rbx)
  3619. movq 104(%rbx), %r11
  3620. movq 112(%rbx), %r12
  3621. movq 120(%rbx), %r8
  3622. movq 128(%rdi), %r9
  3623. # A[4] * B[8]
  3624. mulx 64(%rbp), %rax, %rcx
  3625. adcxq %rax, %r10
  3626. adoxq %rcx, %r11
  3627. # A[4] * B[9]
  3628. mulx 72(%rbp), %rax, %rcx
  3629. movq %r10, 96(%rbx)
  3630. adcxq %rax, %r11
  3631. adoxq %rcx, %r12
  3632. # A[4] * B[10]
  3633. mulx 80(%rbp), %rax, %rcx
  3634. movq %r11, 104(%rbx)
  3635. adcxq %rax, %r12
  3636. adoxq %rcx, %r8
  3637. # A[4] * B[11]
  3638. mulx 88(%rbp), %rax, %rcx
  3639. movq %r12, 112(%rbx)
  3640. adcxq %rax, %r8
  3641. adoxq %rcx, %r9
  3642. movq %r8, 120(%rbx)
  3643. movq 136(%rdi), %r10
  3644. movq 144(%rdi), %r11
  3645. movq 152(%rdi), %r12
  3646. # A[4] * B[12]
  3647. mulx 96(%rbp), %rax, %rcx
  3648. adcxq %rax, %r9
  3649. adoxq %rcx, %r10
  3650. # A[4] * B[13]
  3651. mulx 104(%rbp), %rax, %rcx
  3652. movq %r9, 128(%rdi)
  3653. adcxq %rax, %r10
  3654. adoxq %rcx, %r11
  3655. # A[4] * B[14]
  3656. mulx 112(%rbp), %rax, %rcx
  3657. movq %r10, 136(%rdi)
  3658. adcxq %rax, %r11
  3659. adoxq %rcx, %r12
  3660. # A[4] * B[15]
  3661. mulx 120(%rbp), %rax, %rcx
  3662. movq %r11, 144(%rdi)
  3663. movq %r14, %r8
  3664. adcxq %rax, %r12
  3665. adoxq %rcx, %r8
  3666. adcxq %r13, %r8
  3667. movq %r14, %r13
  3668. adoxq %r14, %r13
  3669. adcxq %r14, %r13
  3670. movq %r12, 152(%rdi)
  3671. movq %r8, 160(%rdi)
  3672. movq 40(%rsi), %rdx
  3673. movq 40(%rbx), %r8
  3674. movq 48(%rbx), %r9
  3675. movq 56(%rbx), %r10
  3676. movq 64(%rbx), %r11
  3677. movq 72(%rbx), %r12
  3678. # A[5] * B[0]
  3679. mulx (%rbp), %rax, %rcx
  3680. adcxq %rax, %r8
  3681. adoxq %rcx, %r9
  3682. # A[5] * B[1]
  3683. mulx 8(%rbp), %rax, %rcx
  3684. movq %r8, 40(%rbx)
  3685. adcxq %rax, %r9
  3686. adoxq %rcx, %r10
  3687. # A[5] * B[2]
  3688. mulx 16(%rbp), %rax, %rcx
  3689. movq %r9, 48(%rbx)
  3690. adcxq %rax, %r10
  3691. adoxq %rcx, %r11
  3692. # A[5] * B[3]
  3693. mulx 24(%rbp), %rax, %rcx
  3694. movq %r10, 56(%rbx)
  3695. adcxq %rax, %r11
  3696. adoxq %rcx, %r12
  3697. movq %r11, 64(%rbx)
  3698. movq 80(%rbx), %r8
  3699. movq 88(%rbx), %r9
  3700. movq 96(%rbx), %r10
  3701. movq 104(%rbx), %r11
  3702. # A[5] * B[4]
  3703. mulx 32(%rbp), %rax, %rcx
  3704. adcxq %rax, %r12
  3705. adoxq %rcx, %r8
  3706. # A[5] * B[5]
  3707. mulx 40(%rbp), %rax, %rcx
  3708. movq %r12, 72(%rbx)
  3709. adcxq %rax, %r8
  3710. adoxq %rcx, %r9
  3711. # A[5] * B[6]
  3712. mulx 48(%rbp), %rax, %rcx
  3713. movq %r8, 80(%rbx)
  3714. adcxq %rax, %r9
  3715. adoxq %rcx, %r10
  3716. # A[5] * B[7]
  3717. mulx 56(%rbp), %rax, %rcx
  3718. movq %r9, 88(%rbx)
  3719. adcxq %rax, %r10
  3720. adoxq %rcx, %r11
  3721. movq %r10, 96(%rbx)
  3722. movq 112(%rbx), %r12
  3723. movq 120(%rbx), %r8
  3724. movq 128(%rdi), %r9
  3725. movq 136(%rdi), %r10
  3726. # A[5] * B[8]
  3727. mulx 64(%rbp), %rax, %rcx
  3728. adcxq %rax, %r11
  3729. adoxq %rcx, %r12
  3730. # A[5] * B[9]
  3731. mulx 72(%rbp), %rax, %rcx
  3732. movq %r11, 104(%rbx)
  3733. adcxq %rax, %r12
  3734. adoxq %rcx, %r8
  3735. # A[5] * B[10]
  3736. mulx 80(%rbp), %rax, %rcx
  3737. movq %r12, 112(%rbx)
  3738. adcxq %rax, %r8
  3739. adoxq %rcx, %r9
  3740. # A[5] * B[11]
  3741. mulx 88(%rbp), %rax, %rcx
  3742. movq %r8, 120(%rbx)
  3743. adcxq %rax, %r9
  3744. adoxq %rcx, %r10
  3745. movq %r9, 128(%rdi)
  3746. movq 144(%rdi), %r11
  3747. movq 152(%rdi), %r12
  3748. movq 160(%rdi), %r8
  3749. # A[5] * B[12]
  3750. mulx 96(%rbp), %rax, %rcx
  3751. adcxq %rax, %r10
  3752. adoxq %rcx, %r11
  3753. # A[5] * B[13]
  3754. mulx 104(%rbp), %rax, %rcx
  3755. movq %r10, 136(%rdi)
  3756. adcxq %rax, %r11
  3757. adoxq %rcx, %r12
  3758. # A[5] * B[14]
  3759. mulx 112(%rbp), %rax, %rcx
  3760. movq %r11, 144(%rdi)
  3761. adcxq %rax, %r12
  3762. adoxq %rcx, %r8
  3763. # A[5] * B[15]
  3764. mulx 120(%rbp), %rax, %rcx
  3765. movq %r12, 152(%rdi)
  3766. movq %r14, %r9
  3767. adcxq %rax, %r8
  3768. adoxq %rcx, %r9
  3769. adcxq %r13, %r9
  3770. movq %r14, %r13
  3771. adoxq %r14, %r13
  3772. adcxq %r14, %r13
  3773. movq %r8, 160(%rdi)
  3774. movq %r9, 168(%rdi)
  3775. movq 48(%rsi), %rdx
  3776. movq 48(%rbx), %r9
  3777. movq 56(%rbx), %r10
  3778. movq 64(%rbx), %r11
  3779. movq 72(%rbx), %r12
  3780. movq 80(%rbx), %r8
  3781. # A[6] * B[0]
  3782. mulx (%rbp), %rax, %rcx
  3783. adcxq %rax, %r9
  3784. adoxq %rcx, %r10
  3785. # A[6] * B[1]
  3786. mulx 8(%rbp), %rax, %rcx
  3787. movq %r9, 48(%rbx)
  3788. adcxq %rax, %r10
  3789. adoxq %rcx, %r11
  3790. # A[6] * B[2]
  3791. mulx 16(%rbp), %rax, %rcx
  3792. movq %r10, 56(%rbx)
  3793. adcxq %rax, %r11
  3794. adoxq %rcx, %r12
  3795. # A[6] * B[3]
  3796. mulx 24(%rbp), %rax, %rcx
  3797. movq %r11, 64(%rbx)
  3798. adcxq %rax, %r12
  3799. adoxq %rcx, %r8
  3800. movq %r12, 72(%rbx)
  3801. movq 88(%rbx), %r9
  3802. movq 96(%rbx), %r10
  3803. movq 104(%rbx), %r11
  3804. movq 112(%rbx), %r12
  3805. # A[6] * B[4]
  3806. mulx 32(%rbp), %rax, %rcx
  3807. adcxq %rax, %r8
  3808. adoxq %rcx, %r9
  3809. # A[6] * B[5]
  3810. mulx 40(%rbp), %rax, %rcx
  3811. movq %r8, 80(%rbx)
  3812. adcxq %rax, %r9
  3813. adoxq %rcx, %r10
  3814. # A[6] * B[6]
  3815. mulx 48(%rbp), %rax, %rcx
  3816. movq %r9, 88(%rbx)
  3817. adcxq %rax, %r10
  3818. adoxq %rcx, %r11
  3819. # A[6] * B[7]
  3820. mulx 56(%rbp), %rax, %rcx
  3821. movq %r10, 96(%rbx)
  3822. adcxq %rax, %r11
  3823. adoxq %rcx, %r12
  3824. movq %r11, 104(%rbx)
  3825. movq 120(%rbx), %r8
  3826. movq 128(%rdi), %r9
  3827. movq 136(%rdi), %r10
  3828. movq 144(%rdi), %r11
  3829. # A[6] * B[8]
  3830. mulx 64(%rbp), %rax, %rcx
  3831. adcxq %rax, %r12
  3832. adoxq %rcx, %r8
  3833. # A[6] * B[9]
  3834. mulx 72(%rbp), %rax, %rcx
  3835. movq %r12, 112(%rbx)
  3836. adcxq %rax, %r8
  3837. adoxq %rcx, %r9
  3838. # A[6] * B[10]
  3839. mulx 80(%rbp), %rax, %rcx
  3840. movq %r8, 120(%rbx)
  3841. adcxq %rax, %r9
  3842. adoxq %rcx, %r10
  3843. # A[6] * B[11]
  3844. mulx 88(%rbp), %rax, %rcx
  3845. movq %r9, 128(%rdi)
  3846. adcxq %rax, %r10
  3847. adoxq %rcx, %r11
  3848. movq %r10, 136(%rdi)
  3849. movq 152(%rdi), %r12
  3850. movq 160(%rdi), %r8
  3851. movq 168(%rdi), %r9
  3852. # A[6] * B[12]
  3853. mulx 96(%rbp), %rax, %rcx
  3854. adcxq %rax, %r11
  3855. adoxq %rcx, %r12
  3856. # A[6] * B[13]
  3857. mulx 104(%rbp), %rax, %rcx
  3858. movq %r11, 144(%rdi)
  3859. adcxq %rax, %r12
  3860. adoxq %rcx, %r8
  3861. # A[6] * B[14]
  3862. mulx 112(%rbp), %rax, %rcx
  3863. movq %r12, 152(%rdi)
  3864. adcxq %rax, %r8
  3865. adoxq %rcx, %r9
  3866. # A[6] * B[15]
  3867. mulx 120(%rbp), %rax, %rcx
  3868. movq %r8, 160(%rdi)
  3869. movq %r14, %r10
  3870. adcxq %rax, %r9
  3871. adoxq %rcx, %r10
  3872. adcxq %r13, %r10
  3873. movq %r14, %r13
  3874. adoxq %r14, %r13
  3875. adcxq %r14, %r13
  3876. movq %r9, 168(%rdi)
  3877. movq %r10, 176(%rdi)
  3878. movq 56(%rsi), %rdx
  3879. movq 56(%rbx), %r10
  3880. movq 64(%rbx), %r11
  3881. movq 72(%rbx), %r12
  3882. movq 80(%rbx), %r8
  3883. movq 88(%rbx), %r9
  3884. # A[7] * B[0]
  3885. mulx (%rbp), %rax, %rcx
  3886. adcxq %rax, %r10
  3887. adoxq %rcx, %r11
  3888. # A[7] * B[1]
  3889. mulx 8(%rbp), %rax, %rcx
  3890. movq %r10, 56(%rbx)
  3891. adcxq %rax, %r11
  3892. adoxq %rcx, %r12
  3893. # A[7] * B[2]
  3894. mulx 16(%rbp), %rax, %rcx
  3895. movq %r11, 64(%rbx)
  3896. adcxq %rax, %r12
  3897. adoxq %rcx, %r8
  3898. # A[7] * B[3]
  3899. mulx 24(%rbp), %rax, %rcx
  3900. movq %r12, 72(%rbx)
  3901. adcxq %rax, %r8
  3902. adoxq %rcx, %r9
  3903. movq %r8, 80(%rbx)
  3904. movq 96(%rbx), %r10
  3905. movq 104(%rbx), %r11
  3906. movq 112(%rbx), %r12
  3907. movq 120(%rbx), %r8
  3908. # A[7] * B[4]
  3909. mulx 32(%rbp), %rax, %rcx
  3910. adcxq %rax, %r9
  3911. adoxq %rcx, %r10
  3912. # A[7] * B[5]
  3913. mulx 40(%rbp), %rax, %rcx
  3914. movq %r9, 88(%rbx)
  3915. adcxq %rax, %r10
  3916. adoxq %rcx, %r11
  3917. # A[7] * B[6]
  3918. mulx 48(%rbp), %rax, %rcx
  3919. movq %r10, 96(%rbx)
  3920. adcxq %rax, %r11
  3921. adoxq %rcx, %r12
  3922. # A[7] * B[7]
  3923. mulx 56(%rbp), %rax, %rcx
  3924. movq %r11, 104(%rbx)
  3925. adcxq %rax, %r12
  3926. adoxq %rcx, %r8
  3927. movq %r12, 112(%rbx)
  3928. movq 128(%rdi), %r9
  3929. movq 136(%rdi), %r10
  3930. movq 144(%rdi), %r11
  3931. movq 152(%rdi), %r12
  3932. # A[7] * B[8]
  3933. mulx 64(%rbp), %rax, %rcx
  3934. adcxq %rax, %r8
  3935. adoxq %rcx, %r9
  3936. # A[7] * B[9]
  3937. mulx 72(%rbp), %rax, %rcx
  3938. movq %r8, 120(%rbx)
  3939. adcxq %rax, %r9
  3940. adoxq %rcx, %r10
  3941. # A[7] * B[10]
  3942. mulx 80(%rbp), %rax, %rcx
  3943. movq %r9, 128(%rdi)
  3944. adcxq %rax, %r10
  3945. adoxq %rcx, %r11
  3946. # A[7] * B[11]
  3947. mulx 88(%rbp), %rax, %rcx
  3948. movq %r10, 136(%rdi)
  3949. adcxq %rax, %r11
  3950. adoxq %rcx, %r12
  3951. movq %r11, 144(%rdi)
  3952. movq 160(%rdi), %r8
  3953. movq 168(%rdi), %r9
  3954. movq 176(%rdi), %r10
  3955. # A[7] * B[12]
  3956. mulx 96(%rbp), %rax, %rcx
  3957. adcxq %rax, %r12
  3958. adoxq %rcx, %r8
  3959. # A[7] * B[13]
  3960. mulx 104(%rbp), %rax, %rcx
  3961. movq %r12, 152(%rdi)
  3962. adcxq %rax, %r8
  3963. adoxq %rcx, %r9
  3964. # A[7] * B[14]
  3965. mulx 112(%rbp), %rax, %rcx
  3966. movq %r8, 160(%rdi)
  3967. adcxq %rax, %r9
  3968. adoxq %rcx, %r10
  3969. # A[7] * B[15]
  3970. mulx 120(%rbp), %rax, %rcx
  3971. movq %r9, 168(%rdi)
  3972. movq %r14, %r11
  3973. adcxq %rax, %r10
  3974. adoxq %rcx, %r11
  3975. adcxq %r13, %r11
  3976. movq %r14, %r13
  3977. adoxq %r14, %r13
  3978. adcxq %r14, %r13
  3979. movq %r10, 176(%rdi)
  3980. movq %r11, 184(%rdi)
  3981. movq 64(%rsi), %rdx
  3982. movq 64(%rbx), %r11
  3983. movq 72(%rbx), %r12
  3984. movq 80(%rbx), %r8
  3985. movq 88(%rbx), %r9
  3986. movq 96(%rbx), %r10
  3987. # A[8] * B[0]
  3988. mulx (%rbp), %rax, %rcx
  3989. adcxq %rax, %r11
  3990. adoxq %rcx, %r12
  3991. # A[8] * B[1]
  3992. mulx 8(%rbp), %rax, %rcx
  3993. movq %r11, 64(%rbx)
  3994. adcxq %rax, %r12
  3995. adoxq %rcx, %r8
  3996. # A[8] * B[2]
  3997. mulx 16(%rbp), %rax, %rcx
  3998. movq %r12, 72(%rbx)
  3999. adcxq %rax, %r8
  4000. adoxq %rcx, %r9
  4001. # A[8] * B[3]
  4002. mulx 24(%rbp), %rax, %rcx
  4003. movq %r8, 80(%rbx)
  4004. adcxq %rax, %r9
  4005. adoxq %rcx, %r10
  4006. movq %r9, 88(%rbx)
  4007. movq 104(%rbx), %r11
  4008. movq 112(%rbx), %r12
  4009. movq 120(%rbx), %r8
  4010. movq 128(%rdi), %r9
  4011. # A[8] * B[4]
  4012. mulx 32(%rbp), %rax, %rcx
  4013. adcxq %rax, %r10
  4014. adoxq %rcx, %r11
  4015. # A[8] * B[5]
  4016. mulx 40(%rbp), %rax, %rcx
  4017. movq %r10, 96(%rbx)
  4018. adcxq %rax, %r11
  4019. adoxq %rcx, %r12
  4020. # A[8] * B[6]
  4021. mulx 48(%rbp), %rax, %rcx
  4022. movq %r11, 104(%rbx)
  4023. adcxq %rax, %r12
  4024. adoxq %rcx, %r8
  4025. # A[8] * B[7]
  4026. mulx 56(%rbp), %rax, %rcx
  4027. movq %r12, 112(%rbx)
  4028. adcxq %rax, %r8
  4029. adoxq %rcx, %r9
  4030. movq %r8, 120(%rbx)
  4031. movq 136(%rdi), %r10
  4032. movq 144(%rdi), %r11
  4033. movq 152(%rdi), %r12
  4034. movq 160(%rdi), %r8
  4035. # A[8] * B[8]
  4036. mulx 64(%rbp), %rax, %rcx
  4037. adcxq %rax, %r9
  4038. adoxq %rcx, %r10
  4039. # A[8] * B[9]
  4040. mulx 72(%rbp), %rax, %rcx
  4041. movq %r9, 128(%rdi)
  4042. adcxq %rax, %r10
  4043. adoxq %rcx, %r11
  4044. # A[8] * B[10]
  4045. mulx 80(%rbp), %rax, %rcx
  4046. movq %r10, 136(%rdi)
  4047. adcxq %rax, %r11
  4048. adoxq %rcx, %r12
  4049. # A[8] * B[11]
  4050. mulx 88(%rbp), %rax, %rcx
  4051. movq %r11, 144(%rdi)
  4052. adcxq %rax, %r12
  4053. adoxq %rcx, %r8
  4054. movq %r12, 152(%rdi)
  4055. movq 168(%rdi), %r9
  4056. movq 176(%rdi), %r10
  4057. movq 184(%rdi), %r11
  4058. # A[8] * B[12]
  4059. mulx 96(%rbp), %rax, %rcx
  4060. adcxq %rax, %r8
  4061. adoxq %rcx, %r9
  4062. # A[8] * B[13]
  4063. mulx 104(%rbp), %rax, %rcx
  4064. movq %r8, 160(%rdi)
  4065. adcxq %rax, %r9
  4066. adoxq %rcx, %r10
  4067. # A[8] * B[14]
  4068. mulx 112(%rbp), %rax, %rcx
  4069. movq %r9, 168(%rdi)
  4070. adcxq %rax, %r10
  4071. adoxq %rcx, %r11
  4072. # A[8] * B[15]
  4073. mulx 120(%rbp), %rax, %rcx
  4074. movq %r10, 176(%rdi)
  4075. movq %r14, %r12
  4076. adcxq %rax, %r11
  4077. adoxq %rcx, %r12
  4078. adcxq %r13, %r12
  4079. movq %r14, %r13
  4080. adoxq %r14, %r13
  4081. adcxq %r14, %r13
  4082. movq %r11, 184(%rdi)
  4083. movq %r12, 192(%rdi)
  4084. movq 72(%rsi), %rdx
  4085. movq 72(%rbx), %r12
  4086. movq 80(%rbx), %r8
  4087. movq 88(%rbx), %r9
  4088. movq 96(%rbx), %r10
  4089. movq 104(%rbx), %r11
  4090. # A[9] * B[0]
  4091. mulx (%rbp), %rax, %rcx
  4092. adcxq %rax, %r12
  4093. adoxq %rcx, %r8
  4094. # A[9] * B[1]
  4095. mulx 8(%rbp), %rax, %rcx
  4096. movq %r12, 72(%rbx)
  4097. adcxq %rax, %r8
  4098. adoxq %rcx, %r9
  4099. # A[9] * B[2]
  4100. mulx 16(%rbp), %rax, %rcx
  4101. movq %r8, 80(%rbx)
  4102. adcxq %rax, %r9
  4103. adoxq %rcx, %r10
  4104. # A[9] * B[3]
  4105. mulx 24(%rbp), %rax, %rcx
  4106. movq %r9, 88(%rbx)
  4107. adcxq %rax, %r10
  4108. adoxq %rcx, %r11
  4109. movq %r10, 96(%rbx)
  4110. movq 112(%rbx), %r12
  4111. movq 120(%rbx), %r8
  4112. movq 128(%rdi), %r9
  4113. movq 136(%rdi), %r10
  4114. # A[9] * B[4]
  4115. mulx 32(%rbp), %rax, %rcx
  4116. adcxq %rax, %r11
  4117. adoxq %rcx, %r12
  4118. # A[9] * B[5]
  4119. mulx 40(%rbp), %rax, %rcx
  4120. movq %r11, 104(%rbx)
  4121. adcxq %rax, %r12
  4122. adoxq %rcx, %r8
  4123. # A[9] * B[6]
  4124. mulx 48(%rbp), %rax, %rcx
  4125. movq %r12, 112(%rbx)
  4126. adcxq %rax, %r8
  4127. adoxq %rcx, %r9
  4128. # A[9] * B[7]
  4129. mulx 56(%rbp), %rax, %rcx
  4130. movq %r8, 120(%rbx)
  4131. adcxq %rax, %r9
  4132. adoxq %rcx, %r10
  4133. movq %r9, 128(%rdi)
  4134. movq 144(%rdi), %r11
  4135. movq 152(%rdi), %r12
  4136. movq 160(%rdi), %r8
  4137. movq 168(%rdi), %r9
  4138. # A[9] * B[8]
  4139. mulx 64(%rbp), %rax, %rcx
  4140. adcxq %rax, %r10
  4141. adoxq %rcx, %r11
  4142. # A[9] * B[9]
  4143. mulx 72(%rbp), %rax, %rcx
  4144. movq %r10, 136(%rdi)
  4145. adcxq %rax, %r11
  4146. adoxq %rcx, %r12
  4147. # A[9] * B[10]
  4148. mulx 80(%rbp), %rax, %rcx
  4149. movq %r11, 144(%rdi)
  4150. adcxq %rax, %r12
  4151. adoxq %rcx, %r8
  4152. # A[9] * B[11]
  4153. mulx 88(%rbp), %rax, %rcx
  4154. movq %r12, 152(%rdi)
  4155. adcxq %rax, %r8
  4156. adoxq %rcx, %r9
  4157. movq %r8, 160(%rdi)
  4158. movq 176(%rdi), %r10
  4159. movq 184(%rdi), %r11
  4160. movq 192(%rdi), %r12
  4161. # A[9] * B[12]
  4162. mulx 96(%rbp), %rax, %rcx
  4163. adcxq %rax, %r9
  4164. adoxq %rcx, %r10
  4165. # A[9] * B[13]
  4166. mulx 104(%rbp), %rax, %rcx
  4167. movq %r9, 168(%rdi)
  4168. adcxq %rax, %r10
  4169. adoxq %rcx, %r11
  4170. # A[9] * B[14]
  4171. mulx 112(%rbp), %rax, %rcx
  4172. movq %r10, 176(%rdi)
  4173. adcxq %rax, %r11
  4174. adoxq %rcx, %r12
  4175. # A[9] * B[15]
  4176. mulx 120(%rbp), %rax, %rcx
  4177. movq %r11, 184(%rdi)
  4178. movq %r14, %r8
  4179. adcxq %rax, %r12
  4180. adoxq %rcx, %r8
  4181. adcxq %r13, %r8
  4182. movq %r14, %r13
  4183. adoxq %r14, %r13
  4184. adcxq %r14, %r13
  4185. movq %r12, 192(%rdi)
  4186. movq %r8, 200(%rdi)
  4187. movq 80(%rsi), %rdx
  4188. movq 80(%rbx), %r8
  4189. movq 88(%rbx), %r9
  4190. movq 96(%rbx), %r10
  4191. movq 104(%rbx), %r11
  4192. movq 112(%rbx), %r12
  4193. # A[10] * B[0]
  4194. mulx (%rbp), %rax, %rcx
  4195. adcxq %rax, %r8
  4196. adoxq %rcx, %r9
  4197. # A[10] * B[1]
  4198. mulx 8(%rbp), %rax, %rcx
  4199. movq %r8, 80(%rbx)
  4200. adcxq %rax, %r9
  4201. adoxq %rcx, %r10
  4202. # A[10] * B[2]
  4203. mulx 16(%rbp), %rax, %rcx
  4204. movq %r9, 88(%rbx)
  4205. adcxq %rax, %r10
  4206. adoxq %rcx, %r11
  4207. # A[10] * B[3]
  4208. mulx 24(%rbp), %rax, %rcx
  4209. movq %r10, 96(%rbx)
  4210. adcxq %rax, %r11
  4211. adoxq %rcx, %r12
  4212. movq %r11, 104(%rbx)
  4213. movq 120(%rbx), %r8
  4214. movq 128(%rdi), %r9
  4215. movq 136(%rdi), %r10
  4216. movq 144(%rdi), %r11
  4217. # A[10] * B[4]
  4218. mulx 32(%rbp), %rax, %rcx
  4219. adcxq %rax, %r12
  4220. adoxq %rcx, %r8
  4221. # A[10] * B[5]
  4222. mulx 40(%rbp), %rax, %rcx
  4223. movq %r12, 112(%rbx)
  4224. adcxq %rax, %r8
  4225. adoxq %rcx, %r9
  4226. # A[10] * B[6]
  4227. mulx 48(%rbp), %rax, %rcx
  4228. movq %r8, 120(%rbx)
  4229. adcxq %rax, %r9
  4230. adoxq %rcx, %r10
  4231. # A[10] * B[7]
  4232. mulx 56(%rbp), %rax, %rcx
  4233. movq %r9, 128(%rdi)
  4234. adcxq %rax, %r10
  4235. adoxq %rcx, %r11
  4236. movq %r10, 136(%rdi)
  4237. movq 152(%rdi), %r12
  4238. movq 160(%rdi), %r8
  4239. movq 168(%rdi), %r9
  4240. movq 176(%rdi), %r10
  4241. # A[10] * B[8]
  4242. mulx 64(%rbp), %rax, %rcx
  4243. adcxq %rax, %r11
  4244. adoxq %rcx, %r12
  4245. # A[10] * B[9]
  4246. mulx 72(%rbp), %rax, %rcx
  4247. movq %r11, 144(%rdi)
  4248. adcxq %rax, %r12
  4249. adoxq %rcx, %r8
  4250. # A[10] * B[10]
  4251. mulx 80(%rbp), %rax, %rcx
  4252. movq %r12, 152(%rdi)
  4253. adcxq %rax, %r8
  4254. adoxq %rcx, %r9
  4255. # A[10] * B[11]
  4256. mulx 88(%rbp), %rax, %rcx
  4257. movq %r8, 160(%rdi)
  4258. adcxq %rax, %r9
  4259. adoxq %rcx, %r10
  4260. movq %r9, 168(%rdi)
  4261. movq 184(%rdi), %r11
  4262. movq 192(%rdi), %r12
  4263. movq 200(%rdi), %r8
  4264. # A[10] * B[12]
  4265. mulx 96(%rbp), %rax, %rcx
  4266. adcxq %rax, %r10
  4267. adoxq %rcx, %r11
  4268. # A[10] * B[13]
  4269. mulx 104(%rbp), %rax, %rcx
  4270. movq %r10, 176(%rdi)
  4271. adcxq %rax, %r11
  4272. adoxq %rcx, %r12
  4273. # A[10] * B[14]
  4274. mulx 112(%rbp), %rax, %rcx
  4275. movq %r11, 184(%rdi)
  4276. adcxq %rax, %r12
  4277. adoxq %rcx, %r8
  4278. # A[10] * B[15]
  4279. mulx 120(%rbp), %rax, %rcx
  4280. movq %r12, 192(%rdi)
  4281. movq %r14, %r9
  4282. adcxq %rax, %r8
  4283. adoxq %rcx, %r9
  4284. adcxq %r13, %r9
  4285. movq %r14, %r13
  4286. adoxq %r14, %r13
  4287. adcxq %r14, %r13
  4288. movq %r8, 200(%rdi)
  4289. movq %r9, 208(%rdi)
  4290. movq 88(%rsi), %rdx
  4291. movq 88(%rbx), %r9
  4292. movq 96(%rbx), %r10
  4293. movq 104(%rbx), %r11
  4294. movq 112(%rbx), %r12
  4295. movq 120(%rbx), %r8
  4296. # A[11] * B[0]
  4297. mulx (%rbp), %rax, %rcx
  4298. adcxq %rax, %r9
  4299. adoxq %rcx, %r10
  4300. # A[11] * B[1]
  4301. mulx 8(%rbp), %rax, %rcx
  4302. movq %r9, 88(%rbx)
  4303. adcxq %rax, %r10
  4304. adoxq %rcx, %r11
  4305. # A[11] * B[2]
  4306. mulx 16(%rbp), %rax, %rcx
  4307. movq %r10, 96(%rbx)
  4308. adcxq %rax, %r11
  4309. adoxq %rcx, %r12
  4310. # A[11] * B[3]
  4311. mulx 24(%rbp), %rax, %rcx
  4312. movq %r11, 104(%rbx)
  4313. adcxq %rax, %r12
  4314. adoxq %rcx, %r8
  4315. movq %r12, 112(%rbx)
  4316. movq 128(%rdi), %r9
  4317. movq 136(%rdi), %r10
  4318. movq 144(%rdi), %r11
  4319. movq 152(%rdi), %r12
  4320. # A[11] * B[4]
  4321. mulx 32(%rbp), %rax, %rcx
  4322. adcxq %rax, %r8
  4323. adoxq %rcx, %r9
  4324. # A[11] * B[5]
  4325. mulx 40(%rbp), %rax, %rcx
  4326. movq %r8, 120(%rbx)
  4327. adcxq %rax, %r9
  4328. adoxq %rcx, %r10
  4329. # A[11] * B[6]
  4330. mulx 48(%rbp), %rax, %rcx
  4331. movq %r9, 128(%rdi)
  4332. adcxq %rax, %r10
  4333. adoxq %rcx, %r11
  4334. # A[11] * B[7]
  4335. mulx 56(%rbp), %rax, %rcx
  4336. movq %r10, 136(%rdi)
  4337. adcxq %rax, %r11
  4338. adoxq %rcx, %r12
  4339. movq %r11, 144(%rdi)
  4340. movq 160(%rdi), %r8
  4341. movq 168(%rdi), %r9
  4342. movq 176(%rdi), %r10
  4343. movq 184(%rdi), %r11
  4344. # A[11] * B[8]
  4345. mulx 64(%rbp), %rax, %rcx
  4346. adcxq %rax, %r12
  4347. adoxq %rcx, %r8
  4348. # A[11] * B[9]
  4349. mulx 72(%rbp), %rax, %rcx
  4350. movq %r12, 152(%rdi)
  4351. adcxq %rax, %r8
  4352. adoxq %rcx, %r9
  4353. # A[11] * B[10]
  4354. mulx 80(%rbp), %rax, %rcx
  4355. movq %r8, 160(%rdi)
  4356. adcxq %rax, %r9
  4357. adoxq %rcx, %r10
  4358. # A[11] * B[11]
  4359. mulx 88(%rbp), %rax, %rcx
  4360. movq %r9, 168(%rdi)
  4361. adcxq %rax, %r10
  4362. adoxq %rcx, %r11
  4363. movq %r10, 176(%rdi)
  4364. movq 192(%rdi), %r12
  4365. movq 200(%rdi), %r8
  4366. movq 208(%rdi), %r9
  4367. # A[11] * B[12]
  4368. mulx 96(%rbp), %rax, %rcx
  4369. adcxq %rax, %r11
  4370. adoxq %rcx, %r12
  4371. # A[11] * B[13]
  4372. mulx 104(%rbp), %rax, %rcx
  4373. movq %r11, 184(%rdi)
  4374. adcxq %rax, %r12
  4375. adoxq %rcx, %r8
  4376. # A[11] * B[14]
  4377. mulx 112(%rbp), %rax, %rcx
  4378. movq %r12, 192(%rdi)
  4379. adcxq %rax, %r8
  4380. adoxq %rcx, %r9
  4381. # A[11] * B[15]
  4382. mulx 120(%rbp), %rax, %rcx
  4383. movq %r8, 200(%rdi)
  4384. movq %r14, %r10
  4385. adcxq %rax, %r9
  4386. adoxq %rcx, %r10
  4387. adcxq %r13, %r10
  4388. movq %r14, %r13
  4389. adoxq %r14, %r13
  4390. adcxq %r14, %r13
  4391. movq %r9, 208(%rdi)
  4392. movq %r10, 216(%rdi)
  4393. movq 96(%rsi), %rdx
  4394. movq 96(%rbx), %r10
  4395. movq 104(%rbx), %r11
  4396. movq 112(%rbx), %r12
  4397. movq 120(%rbx), %r8
  4398. movq 128(%rdi), %r9
  4399. # A[12] * B[0]
  4400. mulx (%rbp), %rax, %rcx
  4401. adcxq %rax, %r10
  4402. adoxq %rcx, %r11
  4403. # A[12] * B[1]
  4404. mulx 8(%rbp), %rax, %rcx
  4405. movq %r10, 96(%rbx)
  4406. adcxq %rax, %r11
  4407. adoxq %rcx, %r12
  4408. # A[12] * B[2]
  4409. mulx 16(%rbp), %rax, %rcx
  4410. movq %r11, 104(%rbx)
  4411. adcxq %rax, %r12
  4412. adoxq %rcx, %r8
  4413. # A[12] * B[3]
  4414. mulx 24(%rbp), %rax, %rcx
  4415. movq %r12, 112(%rbx)
  4416. adcxq %rax, %r8
  4417. adoxq %rcx, %r9
  4418. movq %r8, 120(%rbx)
  4419. movq 136(%rdi), %r10
  4420. movq 144(%rdi), %r11
  4421. movq 152(%rdi), %r12
  4422. movq 160(%rdi), %r8
  4423. # A[12] * B[4]
  4424. mulx 32(%rbp), %rax, %rcx
  4425. adcxq %rax, %r9
  4426. adoxq %rcx, %r10
  4427. # A[12] * B[5]
  4428. mulx 40(%rbp), %rax, %rcx
  4429. movq %r9, 128(%rdi)
  4430. adcxq %rax, %r10
  4431. adoxq %rcx, %r11
  4432. # A[12] * B[6]
  4433. mulx 48(%rbp), %rax, %rcx
  4434. movq %r10, 136(%rdi)
  4435. adcxq %rax, %r11
  4436. adoxq %rcx, %r12
  4437. # A[12] * B[7]
  4438. mulx 56(%rbp), %rax, %rcx
  4439. movq %r11, 144(%rdi)
  4440. adcxq %rax, %r12
  4441. adoxq %rcx, %r8
  4442. movq %r12, 152(%rdi)
  4443. movq 168(%rdi), %r9
  4444. movq 176(%rdi), %r10
  4445. movq 184(%rdi), %r11
  4446. movq 192(%rdi), %r12
  4447. # A[12] * B[8]
  4448. mulx 64(%rbp), %rax, %rcx
  4449. adcxq %rax, %r8
  4450. adoxq %rcx, %r9
  4451. # A[12] * B[9]
  4452. mulx 72(%rbp), %rax, %rcx
  4453. movq %r8, 160(%rdi)
  4454. adcxq %rax, %r9
  4455. adoxq %rcx, %r10
  4456. # A[12] * B[10]
  4457. mulx 80(%rbp), %rax, %rcx
  4458. movq %r9, 168(%rdi)
  4459. adcxq %rax, %r10
  4460. adoxq %rcx, %r11
  4461. # A[12] * B[11]
  4462. mulx 88(%rbp), %rax, %rcx
  4463. movq %r10, 176(%rdi)
  4464. adcxq %rax, %r11
  4465. adoxq %rcx, %r12
  4466. movq %r11, 184(%rdi)
  4467. movq 200(%rdi), %r8
  4468. movq 208(%rdi), %r9
  4469. movq 216(%rdi), %r10
  4470. # A[12] * B[12]
  4471. mulx 96(%rbp), %rax, %rcx
  4472. adcxq %rax, %r12
  4473. adoxq %rcx, %r8
  4474. # A[12] * B[13]
  4475. mulx 104(%rbp), %rax, %rcx
  4476. movq %r12, 192(%rdi)
  4477. adcxq %rax, %r8
  4478. adoxq %rcx, %r9
  4479. # A[12] * B[14]
  4480. mulx 112(%rbp), %rax, %rcx
  4481. movq %r8, 200(%rdi)
  4482. adcxq %rax, %r9
  4483. adoxq %rcx, %r10
  4484. # A[12] * B[15]
  4485. mulx 120(%rbp), %rax, %rcx
  4486. movq %r9, 208(%rdi)
  4487. movq %r14, %r11
  4488. adcxq %rax, %r10
  4489. adoxq %rcx, %r11
  4490. adcxq %r13, %r11
  4491. movq %r14, %r13
  4492. adoxq %r14, %r13
  4493. adcxq %r14, %r13
  4494. movq %r10, 216(%rdi)
  4495. movq %r11, 224(%rdi)
  4496. movq 104(%rsi), %rdx
  4497. movq 104(%rbx), %r11
  4498. movq 112(%rbx), %r12
  4499. movq 120(%rbx), %r8
  4500. movq 128(%rdi), %r9
  4501. movq 136(%rdi), %r10
  4502. # A[13] * B[0]
  4503. mulx (%rbp), %rax, %rcx
  4504. adcxq %rax, %r11
  4505. adoxq %rcx, %r12
  4506. # A[13] * B[1]
  4507. mulx 8(%rbp), %rax, %rcx
  4508. movq %r11, 104(%rbx)
  4509. adcxq %rax, %r12
  4510. adoxq %rcx, %r8
  4511. # A[13] * B[2]
  4512. mulx 16(%rbp), %rax, %rcx
  4513. movq %r12, 112(%rbx)
  4514. adcxq %rax, %r8
  4515. adoxq %rcx, %r9
  4516. # A[13] * B[3]
  4517. mulx 24(%rbp), %rax, %rcx
  4518. movq %r8, 120(%rbx)
  4519. adcxq %rax, %r9
  4520. adoxq %rcx, %r10
  4521. movq %r9, 128(%rdi)
  4522. movq 144(%rdi), %r11
  4523. movq 152(%rdi), %r12
  4524. movq 160(%rdi), %r8
  4525. movq 168(%rdi), %r9
  4526. # A[13] * B[4]
  4527. mulx 32(%rbp), %rax, %rcx
  4528. adcxq %rax, %r10
  4529. adoxq %rcx, %r11
  4530. # A[13] * B[5]
  4531. mulx 40(%rbp), %rax, %rcx
  4532. movq %r10, 136(%rdi)
  4533. adcxq %rax, %r11
  4534. adoxq %rcx, %r12
  4535. # A[13] * B[6]
  4536. mulx 48(%rbp), %rax, %rcx
  4537. movq %r11, 144(%rdi)
  4538. adcxq %rax, %r12
  4539. adoxq %rcx, %r8
  4540. # A[13] * B[7]
  4541. mulx 56(%rbp), %rax, %rcx
  4542. movq %r12, 152(%rdi)
  4543. adcxq %rax, %r8
  4544. adoxq %rcx, %r9
  4545. movq %r8, 160(%rdi)
  4546. movq 176(%rdi), %r10
  4547. movq 184(%rdi), %r11
  4548. movq 192(%rdi), %r12
  4549. movq 200(%rdi), %r8
  4550. # A[13] * B[8]
  4551. mulx 64(%rbp), %rax, %rcx
  4552. adcxq %rax, %r9
  4553. adoxq %rcx, %r10
  4554. # A[13] * B[9]
  4555. mulx 72(%rbp), %rax, %rcx
  4556. movq %r9, 168(%rdi)
  4557. adcxq %rax, %r10
  4558. adoxq %rcx, %r11
  4559. # A[13] * B[10]
  4560. mulx 80(%rbp), %rax, %rcx
  4561. movq %r10, 176(%rdi)
  4562. adcxq %rax, %r11
  4563. adoxq %rcx, %r12
  4564. # A[13] * B[11]
  4565. mulx 88(%rbp), %rax, %rcx
  4566. movq %r11, 184(%rdi)
  4567. adcxq %rax, %r12
  4568. adoxq %rcx, %r8
  4569. movq %r12, 192(%rdi)
  4570. movq 208(%rdi), %r9
  4571. movq 216(%rdi), %r10
  4572. movq 224(%rdi), %r11
  4573. # A[13] * B[12]
  4574. mulx 96(%rbp), %rax, %rcx
  4575. adcxq %rax, %r8
  4576. adoxq %rcx, %r9
  4577. # A[13] * B[13]
  4578. mulx 104(%rbp), %rax, %rcx
  4579. movq %r8, 200(%rdi)
  4580. adcxq %rax, %r9
  4581. adoxq %rcx, %r10
  4582. # A[13] * B[14]
  4583. mulx 112(%rbp), %rax, %rcx
  4584. movq %r9, 208(%rdi)
  4585. adcxq %rax, %r10
  4586. adoxq %rcx, %r11
  4587. # A[13] * B[15]
  4588. mulx 120(%rbp), %rax, %rcx
  4589. movq %r10, 216(%rdi)
  4590. movq %r14, %r12
  4591. adcxq %rax, %r11
  4592. adoxq %rcx, %r12
  4593. adcxq %r13, %r12
  4594. movq %r14, %r13
  4595. adoxq %r14, %r13
  4596. adcxq %r14, %r13
  4597. movq %r11, 224(%rdi)
  4598. movq %r12, 232(%rdi)
  4599. movq 112(%rsi), %rdx
  4600. movq 112(%rbx), %r12
  4601. movq 120(%rbx), %r8
  4602. movq 128(%rdi), %r9
  4603. movq 136(%rdi), %r10
  4604. movq 144(%rdi), %r11
  4605. # A[14] * B[0]
  4606. mulx (%rbp), %rax, %rcx
  4607. adcxq %rax, %r12
  4608. adoxq %rcx, %r8
  4609. # A[14] * B[1]
  4610. mulx 8(%rbp), %rax, %rcx
  4611. movq %r12, 112(%rbx)
  4612. adcxq %rax, %r8
  4613. adoxq %rcx, %r9
  4614. # A[14] * B[2]
  4615. mulx 16(%rbp), %rax, %rcx
  4616. movq %r8, 120(%rbx)
  4617. adcxq %rax, %r9
  4618. adoxq %rcx, %r10
  4619. # A[14] * B[3]
  4620. mulx 24(%rbp), %rax, %rcx
  4621. movq %r9, 128(%rdi)
  4622. adcxq %rax, %r10
  4623. adoxq %rcx, %r11
  4624. movq %r10, 136(%rdi)
  4625. movq 152(%rdi), %r12
  4626. movq 160(%rdi), %r8
  4627. movq 168(%rdi), %r9
  4628. movq 176(%rdi), %r10
  4629. # A[14] * B[4]
  4630. mulx 32(%rbp), %rax, %rcx
  4631. adcxq %rax, %r11
  4632. adoxq %rcx, %r12
  4633. # A[14] * B[5]
  4634. mulx 40(%rbp), %rax, %rcx
  4635. movq %r11, 144(%rdi)
  4636. adcxq %rax, %r12
  4637. adoxq %rcx, %r8
  4638. # A[14] * B[6]
  4639. mulx 48(%rbp), %rax, %rcx
  4640. movq %r12, 152(%rdi)
  4641. adcxq %rax, %r8
  4642. adoxq %rcx, %r9
  4643. # A[14] * B[7]
  4644. mulx 56(%rbp), %rax, %rcx
  4645. movq %r8, 160(%rdi)
  4646. adcxq %rax, %r9
  4647. adoxq %rcx, %r10
  4648. movq %r9, 168(%rdi)
  4649. movq 184(%rdi), %r11
  4650. movq 192(%rdi), %r12
  4651. movq 200(%rdi), %r8
  4652. movq 208(%rdi), %r9
  4653. # A[14] * B[8]
  4654. mulx 64(%rbp), %rax, %rcx
  4655. adcxq %rax, %r10
  4656. adoxq %rcx, %r11
  4657. # A[14] * B[9]
  4658. mulx 72(%rbp), %rax, %rcx
  4659. movq %r10, 176(%rdi)
  4660. adcxq %rax, %r11
  4661. adoxq %rcx, %r12
  4662. # A[14] * B[10]
  4663. mulx 80(%rbp), %rax, %rcx
  4664. movq %r11, 184(%rdi)
  4665. adcxq %rax, %r12
  4666. adoxq %rcx, %r8
  4667. # A[14] * B[11]
  4668. mulx 88(%rbp), %rax, %rcx
  4669. movq %r12, 192(%rdi)
  4670. adcxq %rax, %r8
  4671. adoxq %rcx, %r9
  4672. movq %r8, 200(%rdi)
  4673. movq 216(%rdi), %r10
  4674. movq 224(%rdi), %r11
  4675. movq 232(%rdi), %r12
  4676. # A[14] * B[12]
  4677. mulx 96(%rbp), %rax, %rcx
  4678. adcxq %rax, %r9
  4679. adoxq %rcx, %r10
  4680. # A[14] * B[13]
  4681. mulx 104(%rbp), %rax, %rcx
  4682. movq %r9, 208(%rdi)
  4683. adcxq %rax, %r10
  4684. adoxq %rcx, %r11
  4685. # A[14] * B[14]
  4686. mulx 112(%rbp), %rax, %rcx
  4687. movq %r10, 216(%rdi)
  4688. adcxq %rax, %r11
  4689. adoxq %rcx, %r12
  4690. # A[14] * B[15]
  4691. mulx 120(%rbp), %rax, %rcx
  4692. movq %r11, 224(%rdi)
  4693. movq %r14, %r8
  4694. adcxq %rax, %r12
  4695. adoxq %rcx, %r8
  4696. adcxq %r13, %r8
  4697. movq %r14, %r13
  4698. adoxq %r14, %r13
  4699. adcxq %r14, %r13
  4700. movq %r12, 232(%rdi)
  4701. movq %r8, 240(%rdi)
  4702. movq 120(%rsi), %rdx
  4703. movq 120(%rbx), %r8
  4704. movq 128(%rdi), %r9
  4705. movq 136(%rdi), %r10
  4706. movq 144(%rdi), %r11
  4707. movq 152(%rdi), %r12
  4708. # A[15] * B[0]
  4709. mulx (%rbp), %rax, %rcx
  4710. adcxq %rax, %r8
  4711. adoxq %rcx, %r9
  4712. # A[15] * B[1]
  4713. mulx 8(%rbp), %rax, %rcx
  4714. movq %r8, 120(%rbx)
  4715. adcxq %rax, %r9
  4716. adoxq %rcx, %r10
  4717. # A[15] * B[2]
  4718. mulx 16(%rbp), %rax, %rcx
  4719. movq %r9, 128(%rdi)
  4720. adcxq %rax, %r10
  4721. adoxq %rcx, %r11
  4722. # A[15] * B[3]
  4723. mulx 24(%rbp), %rax, %rcx
  4724. movq %r10, 136(%rdi)
  4725. adcxq %rax, %r11
  4726. adoxq %rcx, %r12
  4727. movq %r11, 144(%rdi)
  4728. movq 160(%rdi), %r8
  4729. movq 168(%rdi), %r9
  4730. movq 176(%rdi), %r10
  4731. movq 184(%rdi), %r11
  4732. # A[15] * B[4]
  4733. mulx 32(%rbp), %rax, %rcx
  4734. adcxq %rax, %r12
  4735. adoxq %rcx, %r8
  4736. # A[15] * B[5]
  4737. mulx 40(%rbp), %rax, %rcx
  4738. movq %r12, 152(%rdi)
  4739. adcxq %rax, %r8
  4740. adoxq %rcx, %r9
  4741. # A[15] * B[6]
  4742. mulx 48(%rbp), %rax, %rcx
  4743. movq %r8, 160(%rdi)
  4744. adcxq %rax, %r9
  4745. adoxq %rcx, %r10
  4746. # A[15] * B[7]
  4747. mulx 56(%rbp), %rax, %rcx
  4748. movq %r9, 168(%rdi)
  4749. adcxq %rax, %r10
  4750. adoxq %rcx, %r11
  4751. movq %r10, 176(%rdi)
  4752. movq 192(%rdi), %r12
  4753. movq 200(%rdi), %r8
  4754. movq 208(%rdi), %r9
  4755. movq 216(%rdi), %r10
  4756. # A[15] * B[8]
  4757. mulx 64(%rbp), %rax, %rcx
  4758. adcxq %rax, %r11
  4759. adoxq %rcx, %r12
  4760. # A[15] * B[9]
  4761. mulx 72(%rbp), %rax, %rcx
  4762. movq %r11, 184(%rdi)
  4763. adcxq %rax, %r12
  4764. adoxq %rcx, %r8
  4765. # A[15] * B[10]
  4766. mulx 80(%rbp), %rax, %rcx
  4767. movq %r12, 192(%rdi)
  4768. adcxq %rax, %r8
  4769. adoxq %rcx, %r9
  4770. # A[15] * B[11]
  4771. mulx 88(%rbp), %rax, %rcx
  4772. movq %r8, 200(%rdi)
  4773. adcxq %rax, %r9
  4774. adoxq %rcx, %r10
  4775. movq %r9, 208(%rdi)
  4776. movq 224(%rdi), %r11
  4777. movq 232(%rdi), %r12
  4778. movq 240(%rdi), %r8
  4779. # A[15] * B[12]
  4780. mulx 96(%rbp), %rax, %rcx
  4781. adcxq %rax, %r10
  4782. adoxq %rcx, %r11
  4783. # A[15] * B[13]
  4784. mulx 104(%rbp), %rax, %rcx
  4785. movq %r10, 216(%rdi)
  4786. adcxq %rax, %r11
  4787. adoxq %rcx, %r12
  4788. # A[15] * B[14]
  4789. mulx 112(%rbp), %rax, %rcx
  4790. movq %r11, 224(%rdi)
  4791. adcxq %rax, %r12
  4792. adoxq %rcx, %r8
  4793. # A[15] * B[15]
  4794. mulx 120(%rbp), %rax, %rcx
  4795. movq %r12, 232(%rdi)
  4796. movq %r14, %r9
  4797. adcxq %rax, %r8
  4798. adoxq %rcx, %r9
  4799. adcxq %r13, %r9
  4800. movq %r8, 240(%rdi)
  4801. movq %r9, 248(%rdi)
  4802. cmpq %rdi, %rsi
  4803. je L_start_2048_mul_avx2_16
  4804. cmpq %rdi, %rbp
  4805. jne L_end_2048_mul_avx2_16
  4806. L_start_2048_mul_avx2_16:
  4807. vmovdqu (%rbx), %xmm0
  4808. vmovups %xmm0, (%rdi)
  4809. vmovdqu 16(%rbx), %xmm0
  4810. vmovups %xmm0, 16(%rdi)
  4811. vmovdqu 32(%rbx), %xmm0
  4812. vmovups %xmm0, 32(%rdi)
  4813. vmovdqu 48(%rbx), %xmm0
  4814. vmovups %xmm0, 48(%rdi)
  4815. vmovdqu 64(%rbx), %xmm0
  4816. vmovups %xmm0, 64(%rdi)
  4817. vmovdqu 80(%rbx), %xmm0
  4818. vmovups %xmm0, 80(%rdi)
  4819. vmovdqu 96(%rbx), %xmm0
  4820. vmovups %xmm0, 96(%rdi)
  4821. vmovdqu 112(%rbx), %xmm0
  4822. vmovups %xmm0, 112(%rdi)
  4823. L_end_2048_mul_avx2_16:
  4824. addq $0x80, %rsp
  4825. popq %r14
  4826. popq %r13
  4827. popq %r12
  4828. popq %rbp
  4829. popq %rbx
  4830. repz retq
  4831. #ifndef __APPLE__
  4832. .size sp_2048_mul_avx2_16,.-sp_2048_mul_avx2_16
  4833. #endif /* __APPLE__ */
  4834. #endif /* HAVE_INTEL_AVX2 */
  4835. #ifdef HAVE_INTEL_AVX2
  4836. /* Square a and put result in r. (r = a * a)
  4837. *
  4838. * r A single precision integer.
  4839. * a A single precision integer.
  4840. */
  4841. #ifndef __APPLE__
  4842. .text
  4843. .globl sp_2048_sqr_avx2_16
  4844. .type sp_2048_sqr_avx2_16,@function
  4845. .align 16
  4846. sp_2048_sqr_avx2_16:
  4847. #else
  4848. .section __TEXT,__text
  4849. .globl _sp_2048_sqr_avx2_16
  4850. .p2align 4
  4851. _sp_2048_sqr_avx2_16:
  4852. #endif /* __APPLE__ */
  4853. pushq %rbp
  4854. pushq %r12
  4855. pushq %r13
  4856. pushq %r14
  4857. pushq %r15
  4858. pushq %rbx
  4859. subq $0x80, %rsp
  4860. cmpq %rdi, %rsi
  4861. movq %rsp, %rbp
  4862. cmovne %rdi, %rbp
  4863. xorq %r11, %r11
  4864. # Diagonal 1
  4865. xorq %r10, %r10
  4866. # A[1] x A[0]
  4867. movq (%rsi), %rdx
  4868. mulxq 8(%rsi), %r8, %r9
  4869. # A[2] x A[0]
  4870. mulxq 16(%rsi), %rax, %rcx
  4871. adcxq %rax, %r9
  4872. adoxq %rcx, %r10
  4873. movq %r8, 8(%rbp)
  4874. movq %r9, 16(%rbp)
  4875. movq %r11, %r8
  4876. movq %r11, %r9
  4877. # A[3] x A[0]
  4878. mulxq 24(%rsi), %rax, %rcx
  4879. adcxq %rax, %r10
  4880. adoxq %rcx, %r8
  4881. # A[4] x A[0]
  4882. mulxq 32(%rsi), %rax, %rcx
  4883. adcxq %rax, %r8
  4884. adoxq %rcx, %r9
  4885. movq %r10, 24(%rbp)
  4886. movq %r8, 32(%rbp)
  4887. movq %r11, %r10
  4888. movq %r11, %r8
  4889. # A[5] x A[0]
  4890. mulxq 40(%rsi), %rax, %rcx
  4891. adcxq %rax, %r9
  4892. adoxq %rcx, %r10
  4893. # A[6] x A[0]
  4894. mulxq 48(%rsi), %rax, %rcx
  4895. adcxq %rax, %r10
  4896. adoxq %rcx, %r8
  4897. movq %r9, 40(%rbp)
  4898. movq %r10, 48(%rbp)
  4899. movq %r11, %r9
  4900. movq %r11, %r10
  4901. # A[7] x A[0]
  4902. mulxq 56(%rsi), %rax, %rcx
  4903. adcxq %rax, %r8
  4904. adoxq %rcx, %r9
  4905. # A[8] x A[0]
  4906. mulxq 64(%rsi), %rax, %rcx
  4907. adcxq %rax, %r9
  4908. adoxq %rcx, %r10
  4909. movq %r8, 56(%rbp)
  4910. movq %r9, 64(%rbp)
  4911. movq %r11, %r8
  4912. movq %r11, %r9
  4913. # A[9] x A[0]
  4914. mulxq 72(%rsi), %rax, %rcx
  4915. adcxq %rax, %r10
  4916. adoxq %rcx, %r8
  4917. # A[10] x A[0]
  4918. mulxq 80(%rsi), %rax, %rcx
  4919. adcxq %rax, %r8
  4920. adoxq %rcx, %r9
  4921. movq %r10, 72(%rbp)
  4922. movq %r8, 80(%rbp)
  4923. movq %r11, %r10
  4924. movq %r11, %r8
  4925. # A[11] x A[0]
  4926. mulxq 88(%rsi), %rax, %rcx
  4927. adcxq %rax, %r9
  4928. adoxq %rcx, %r10
  4929. # A[12] x A[0]
  4930. mulxq 96(%rsi), %rax, %rcx
  4931. adcxq %rax, %r10
  4932. adoxq %rcx, %r8
  4933. movq %r9, 88(%rbp)
  4934. movq %r10, %r13
  4935. movq %r11, %r9
  4936. movq %r11, %r10
  4937. # A[13] x A[0]
  4938. mulxq 104(%rsi), %rax, %rcx
  4939. adcxq %rax, %r8
  4940. adoxq %rcx, %r9
  4941. # A[14] x A[0]
  4942. mulxq 112(%rsi), %rax, %rcx
  4943. adcxq %rax, %r9
  4944. adoxq %rcx, %r10
  4945. movq %r8, %r14
  4946. movq %r9, %r15
  4947. movq %r11, %r8
  4948. # A[15] x A[0]
  4949. mulxq 120(%rsi), %rax, %rcx
  4950. adcxq %rax, %r10
  4951. adoxq %rcx, %r8
  4952. movq %r10, %rbx
  4953. # Carry
  4954. adcxq %r11, %r8
  4955. movq %r11, %r12
  4956. adcxq %r11, %r12
  4957. adoxq %r11, %r12
  4958. movq %r8, 128(%rdi)
  4959. # Diagonal 2
  4960. movq 24(%rbp), %r8
  4961. movq 32(%rbp), %r9
  4962. movq 40(%rbp), %r10
  4963. # A[2] x A[1]
  4964. movq 8(%rsi), %rdx
  4965. mulxq 16(%rsi), %rax, %rcx
  4966. adcxq %rax, %r8
  4967. adoxq %rcx, %r9
  4968. # A[3] x A[1]
  4969. mulxq 24(%rsi), %rax, %rcx
  4970. adcxq %rax, %r9
  4971. adoxq %rcx, %r10
  4972. movq %r8, 24(%rbp)
  4973. movq %r9, 32(%rbp)
  4974. movq 48(%rbp), %r8
  4975. movq 56(%rbp), %r9
  4976. # A[4] x A[1]
  4977. mulxq 32(%rsi), %rax, %rcx
  4978. adcxq %rax, %r10
  4979. adoxq %rcx, %r8
  4980. # A[5] x A[1]
  4981. mulxq 40(%rsi), %rax, %rcx
  4982. adcxq %rax, %r8
  4983. adoxq %rcx, %r9
  4984. movq %r10, 40(%rbp)
  4985. movq %r8, 48(%rbp)
  4986. movq 64(%rbp), %r10
  4987. movq 72(%rbp), %r8
  4988. # A[6] x A[1]
  4989. mulxq 48(%rsi), %rax, %rcx
  4990. adcxq %rax, %r9
  4991. adoxq %rcx, %r10
  4992. # A[7] x A[1]
  4993. mulxq 56(%rsi), %rax, %rcx
  4994. adcxq %rax, %r10
  4995. adoxq %rcx, %r8
  4996. movq %r9, 56(%rbp)
  4997. movq %r10, 64(%rbp)
  4998. movq 80(%rbp), %r9
  4999. movq 88(%rbp), %r10
  5000. # A[8] x A[1]
  5001. mulxq 64(%rsi), %rax, %rcx
  5002. adcxq %rax, %r8
  5003. adoxq %rcx, %r9
  5004. # A[9] x A[1]
  5005. mulxq 72(%rsi), %rax, %rcx
  5006. adcxq %rax, %r9
  5007. adoxq %rcx, %r10
  5008. movq %r8, 72(%rbp)
  5009. movq %r9, 80(%rbp)
  5010. # No load %r13 - %r8
  5011. # No load %r14 - %r9
  5012. # A[10] x A[1]
  5013. mulxq 80(%rsi), %rax, %rcx
  5014. adcxq %rax, %r10
  5015. adoxq %rcx, %r13
  5016. # A[11] x A[1]
  5017. mulxq 88(%rsi), %rax, %rcx
  5018. adcxq %rax, %r13
  5019. adoxq %rcx, %r14
  5020. movq %r10, 88(%rbp)
  5021. # No store %r13
  5022. # No load %r15 - %r10
  5023. # No load %rbx - %r8
  5024. # A[12] x A[1]
  5025. mulxq 96(%rsi), %rax, %rcx
  5026. adcxq %rax, %r14
  5027. adoxq %rcx, %r15
  5028. # A[13] x A[1]
  5029. mulxq 104(%rsi), %rax, %rcx
  5030. adcxq %rax, %r15
  5031. adoxq %rcx, %rbx
  5032. # No store %r14
  5033. # No store %r15
  5034. movq 128(%rdi), %r9
  5035. movq %r11, %r10
  5036. # A[14] x A[1]
  5037. mulxq 112(%rsi), %rax, %rcx
  5038. adcxq %rax, %rbx
  5039. adoxq %rcx, %r9
  5040. # A[15] x A[1]
  5041. mulxq 120(%rsi), %rax, %rcx
  5042. adcxq %rax, %r9
  5043. adoxq %rcx, %r10
  5044. # No store %rbx
  5045. movq %r9, 128(%rdi)
  5046. movq %r11, %r8
  5047. # A[15] x A[2]
  5048. movq 16(%rsi), %rdx
  5049. mulxq 120(%rsi), %rax, %rcx
  5050. adcxq %rax, %r10
  5051. adoxq %rcx, %r8
  5052. movq %r10, 136(%rdi)
  5053. # Carry
  5054. adcxq %r12, %r8
  5055. movq %r11, %r12
  5056. adcxq %r11, %r12
  5057. adoxq %r11, %r12
  5058. movq %r8, 144(%rdi)
  5059. # Diagonal 3
  5060. movq 40(%rbp), %r8
  5061. movq 48(%rbp), %r9
  5062. movq 56(%rbp), %r10
  5063. # A[3] x A[2]
  5064. mulxq 24(%rsi), %rax, %rcx
  5065. adcxq %rax, %r8
  5066. adoxq %rcx, %r9
  5067. # A[4] x A[2]
  5068. mulxq 32(%rsi), %rax, %rcx
  5069. adcxq %rax, %r9
  5070. adoxq %rcx, %r10
  5071. movq %r8, 40(%rbp)
  5072. movq %r9, 48(%rbp)
  5073. movq 64(%rbp), %r8
  5074. movq 72(%rbp), %r9
  5075. # A[5] x A[2]
  5076. mulxq 40(%rsi), %rax, %rcx
  5077. adcxq %rax, %r10
  5078. adoxq %rcx, %r8
  5079. # A[6] x A[2]
  5080. mulxq 48(%rsi), %rax, %rcx
  5081. adcxq %rax, %r8
  5082. adoxq %rcx, %r9
  5083. movq %r10, 56(%rbp)
  5084. movq %r8, 64(%rbp)
  5085. movq 80(%rbp), %r10
  5086. movq 88(%rbp), %r8
  5087. # A[7] x A[2]
  5088. mulxq 56(%rsi), %rax, %rcx
  5089. adcxq %rax, %r9
  5090. adoxq %rcx, %r10
  5091. # A[8] x A[2]
  5092. mulxq 64(%rsi), %rax, %rcx
  5093. adcxq %rax, %r10
  5094. adoxq %rcx, %r8
  5095. movq %r9, 72(%rbp)
  5096. movq %r10, 80(%rbp)
  5097. # No load %r13 - %r9
  5098. # No load %r14 - %r10
  5099. # A[9] x A[2]
  5100. mulxq 72(%rsi), %rax, %rcx
  5101. adcxq %rax, %r8
  5102. adoxq %rcx, %r13
  5103. # A[10] x A[2]
  5104. mulxq 80(%rsi), %rax, %rcx
  5105. adcxq %rax, %r13
  5106. adoxq %rcx, %r14
  5107. movq %r8, 88(%rbp)
  5108. # No store %r13
  5109. # No load %r15 - %r8
  5110. # No load %rbx - %r9
  5111. # A[11] x A[2]
  5112. mulxq 88(%rsi), %rax, %rcx
  5113. adcxq %rax, %r14
  5114. adoxq %rcx, %r15
  5115. # A[12] x A[2]
  5116. mulxq 96(%rsi), %rax, %rcx
  5117. adcxq %rax, %r15
  5118. adoxq %rcx, %rbx
  5119. # No store %r14
  5120. # No store %r15
  5121. movq 128(%rdi), %r10
  5122. movq 136(%rdi), %r8
  5123. # A[13] x A[2]
  5124. mulxq 104(%rsi), %rax, %rcx
  5125. adcxq %rax, %rbx
  5126. adoxq %rcx, %r10
  5127. # A[14] x A[2]
  5128. mulxq 112(%rsi), %rax, %rcx
  5129. adcxq %rax, %r10
  5130. adoxq %rcx, %r8
  5131. # No store %rbx
  5132. movq %r10, 128(%rdi)
  5133. movq 144(%rdi), %r9
  5134. movq %r11, %r10
  5135. # A[14] x A[3]
  5136. movq 112(%rsi), %rdx
  5137. mulxq 24(%rsi), %rax, %rcx
  5138. adcxq %rax, %r8
  5139. adoxq %rcx, %r9
  5140. # A[14] x A[4]
  5141. mulxq 32(%rsi), %rax, %rcx
  5142. adcxq %rax, %r9
  5143. adoxq %rcx, %r10
  5144. movq %r8, 136(%rdi)
  5145. movq %r9, 144(%rdi)
  5146. movq %r11, %r8
  5147. # A[14] x A[5]
  5148. mulxq 40(%rsi), %rax, %rcx
  5149. adcxq %rax, %r10
  5150. adoxq %rcx, %r8
  5151. movq %r10, 152(%rdi)
  5152. # Carry
  5153. adcxq %r12, %r8
  5154. movq %r11, %r12
  5155. adcxq %r11, %r12
  5156. adoxq %r11, %r12
  5157. movq %r8, 160(%rdi)
  5158. # Diagonal 4
  5159. movq 56(%rbp), %r8
  5160. movq 64(%rbp), %r9
  5161. movq 72(%rbp), %r10
  5162. # A[4] x A[3]
  5163. movq 24(%rsi), %rdx
  5164. mulxq 32(%rsi), %rax, %rcx
  5165. adcxq %rax, %r8
  5166. adoxq %rcx, %r9
  5167. # A[5] x A[3]
  5168. mulxq 40(%rsi), %rax, %rcx
  5169. adcxq %rax, %r9
  5170. adoxq %rcx, %r10
  5171. movq %r8, 56(%rbp)
  5172. movq %r9, 64(%rbp)
  5173. movq 80(%rbp), %r8
  5174. movq 88(%rbp), %r9
  5175. # A[6] x A[3]
  5176. mulxq 48(%rsi), %rax, %rcx
  5177. adcxq %rax, %r10
  5178. adoxq %rcx, %r8
  5179. # A[7] x A[3]
  5180. mulxq 56(%rsi), %rax, %rcx
  5181. adcxq %rax, %r8
  5182. adoxq %rcx, %r9
  5183. movq %r10, 72(%rbp)
  5184. movq %r8, 80(%rbp)
  5185. # No load %r13 - %r10
  5186. # No load %r14 - %r8
  5187. # A[8] x A[3]
  5188. mulxq 64(%rsi), %rax, %rcx
  5189. adcxq %rax, %r9
  5190. adoxq %rcx, %r13
  5191. # A[9] x A[3]
  5192. mulxq 72(%rsi), %rax, %rcx
  5193. adcxq %rax, %r13
  5194. adoxq %rcx, %r14
  5195. movq %r9, 88(%rbp)
  5196. # No store %r13
  5197. # No load %r15 - %r9
  5198. # No load %rbx - %r10
  5199. # A[10] x A[3]
  5200. mulxq 80(%rsi), %rax, %rcx
  5201. adcxq %rax, %r14
  5202. adoxq %rcx, %r15
  5203. # A[11] x A[3]
  5204. mulxq 88(%rsi), %rax, %rcx
  5205. adcxq %rax, %r15
  5206. adoxq %rcx, %rbx
  5207. # No store %r14
  5208. # No store %r15
  5209. movq 128(%rdi), %r8
  5210. movq 136(%rdi), %r9
  5211. # A[12] x A[3]
  5212. mulxq 96(%rsi), %rax, %rcx
  5213. adcxq %rax, %rbx
  5214. adoxq %rcx, %r8
  5215. # A[13] x A[3]
  5216. mulxq 104(%rsi), %rax, %rcx
  5217. adcxq %rax, %r8
  5218. adoxq %rcx, %r9
  5219. # No store %rbx
  5220. movq %r8, 128(%rdi)
  5221. movq 144(%rdi), %r10
  5222. movq 152(%rdi), %r8
  5223. # A[13] x A[4]
  5224. movq 104(%rsi), %rdx
  5225. mulxq 32(%rsi), %rax, %rcx
  5226. adcxq %rax, %r9
  5227. adoxq %rcx, %r10
  5228. # A[13] x A[5]
  5229. mulxq 40(%rsi), %rax, %rcx
  5230. adcxq %rax, %r10
  5231. adoxq %rcx, %r8
  5232. movq %r9, 136(%rdi)
  5233. movq %r10, 144(%rdi)
  5234. movq 160(%rdi), %r9
  5235. movq %r11, %r10
  5236. # A[13] x A[6]
  5237. mulxq 48(%rsi), %rax, %rcx
  5238. adcxq %rax, %r8
  5239. adoxq %rcx, %r9
  5240. # A[13] x A[7]
  5241. mulxq 56(%rsi), %rax, %rcx
  5242. adcxq %rax, %r9
  5243. adoxq %rcx, %r10
  5244. movq %r8, 152(%rdi)
  5245. movq %r9, 160(%rdi)
  5246. movq %r11, %r8
  5247. # A[13] x A[8]
  5248. mulxq 64(%rsi), %rax, %rcx
  5249. adcxq %rax, %r10
  5250. adoxq %rcx, %r8
  5251. movq %r10, 168(%rdi)
  5252. # Carry
  5253. adcxq %r12, %r8
  5254. movq %r11, %r12
  5255. adcxq %r11, %r12
  5256. adoxq %r11, %r12
  5257. movq %r8, 176(%rdi)
  5258. # Diagonal 5
  5259. movq 72(%rbp), %r8
  5260. movq 80(%rbp), %r9
  5261. movq 88(%rbp), %r10
  5262. # A[5] x A[4]
  5263. movq 32(%rsi), %rdx
  5264. mulxq 40(%rsi), %rax, %rcx
  5265. adcxq %rax, %r8
  5266. adoxq %rcx, %r9
  5267. # A[6] x A[4]
  5268. mulxq 48(%rsi), %rax, %rcx
  5269. adcxq %rax, %r9
  5270. adoxq %rcx, %r10
  5271. movq %r8, 72(%rbp)
  5272. movq %r9, 80(%rbp)
  5273. # No load %r13 - %r8
  5274. # No load %r14 - %r9
  5275. # A[7] x A[4]
  5276. mulxq 56(%rsi), %rax, %rcx
  5277. adcxq %rax, %r10
  5278. adoxq %rcx, %r13
  5279. # A[8] x A[4]
  5280. mulxq 64(%rsi), %rax, %rcx
  5281. adcxq %rax, %r13
  5282. adoxq %rcx, %r14
  5283. movq %r10, 88(%rbp)
  5284. # No store %r13
  5285. # No load %r15 - %r10
  5286. # No load %rbx - %r8
  5287. # A[9] x A[4]
  5288. mulxq 72(%rsi), %rax, %rcx
  5289. adcxq %rax, %r14
  5290. adoxq %rcx, %r15
  5291. # A[10] x A[4]
  5292. mulxq 80(%rsi), %rax, %rcx
  5293. adcxq %rax, %r15
  5294. adoxq %rcx, %rbx
  5295. # No store %r14
  5296. # No store %r15
  5297. movq 128(%rdi), %r9
  5298. movq 136(%rdi), %r10
  5299. # A[11] x A[4]
  5300. mulxq 88(%rsi), %rax, %rcx
  5301. adcxq %rax, %rbx
  5302. adoxq %rcx, %r9
  5303. # A[12] x A[4]
  5304. mulxq 96(%rsi), %rax, %rcx
  5305. adcxq %rax, %r9
  5306. adoxq %rcx, %r10
  5307. # No store %rbx
  5308. movq %r9, 128(%rdi)
  5309. movq 144(%rdi), %r8
  5310. movq 152(%rdi), %r9
  5311. # A[12] x A[5]
  5312. movq 96(%rsi), %rdx
  5313. mulxq 40(%rsi), %rax, %rcx
  5314. adcxq %rax, %r10
  5315. adoxq %rcx, %r8
  5316. # A[12] x A[6]
  5317. mulxq 48(%rsi), %rax, %rcx
  5318. adcxq %rax, %r8
  5319. adoxq %rcx, %r9
  5320. movq %r10, 136(%rdi)
  5321. movq %r8, 144(%rdi)
  5322. movq 160(%rdi), %r10
  5323. movq 168(%rdi), %r8
  5324. # A[12] x A[7]
  5325. mulxq 56(%rsi), %rax, %rcx
  5326. adcxq %rax, %r9
  5327. adoxq %rcx, %r10
  5328. # A[12] x A[8]
  5329. mulxq 64(%rsi), %rax, %rcx
  5330. adcxq %rax, %r10
  5331. adoxq %rcx, %r8
  5332. movq %r9, 152(%rdi)
  5333. movq %r10, 160(%rdi)
  5334. movq 176(%rdi), %r9
  5335. movq %r11, %r10
  5336. # A[12] x A[9]
  5337. mulxq 72(%rsi), %rax, %rcx
  5338. adcxq %rax, %r8
  5339. adoxq %rcx, %r9
  5340. # A[12] x A[10]
  5341. mulxq 80(%rsi), %rax, %rcx
  5342. adcxq %rax, %r9
  5343. adoxq %rcx, %r10
  5344. movq %r8, 168(%rdi)
  5345. movq %r9, 176(%rdi)
  5346. movq %r11, %r8
  5347. # A[12] x A[11]
  5348. mulxq 88(%rsi), %rax, %rcx
  5349. adcxq %rax, %r10
  5350. adoxq %rcx, %r8
  5351. movq %r10, 184(%rdi)
  5352. # Carry
  5353. adcxq %r12, %r8
  5354. movq %r11, %r12
  5355. adcxq %r11, %r12
  5356. adoxq %r11, %r12
  5357. movq %r8, 192(%rdi)
  5358. # Diagonal 6
  5359. movq 88(%rbp), %r8
  5360. # No load %r13 - %r9
  5361. # No load %r14 - %r10
  5362. # A[6] x A[5]
  5363. movq 40(%rsi), %rdx
  5364. mulxq 48(%rsi), %rax, %rcx
  5365. adcxq %rax, %r8
  5366. adoxq %rcx, %r13
  5367. # A[7] x A[5]
  5368. mulxq 56(%rsi), %rax, %rcx
  5369. adcxq %rax, %r13
  5370. adoxq %rcx, %r14
  5371. movq %r8, 88(%rbp)
  5372. # No store %r13
  5373. # No load %r15 - %r8
  5374. # No load %rbx - %r9
  5375. # A[8] x A[5]
  5376. mulxq 64(%rsi), %rax, %rcx
  5377. adcxq %rax, %r14
  5378. adoxq %rcx, %r15
  5379. # A[9] x A[5]
  5380. mulxq 72(%rsi), %rax, %rcx
  5381. adcxq %rax, %r15
  5382. adoxq %rcx, %rbx
  5383. # No store %r14
  5384. # No store %r15
  5385. movq 128(%rdi), %r10
  5386. movq 136(%rdi), %r8
  5387. # A[10] x A[5]
  5388. mulxq 80(%rsi), %rax, %rcx
  5389. adcxq %rax, %rbx
  5390. adoxq %rcx, %r10
  5391. # A[11] x A[5]
  5392. mulxq 88(%rsi), %rax, %rcx
  5393. adcxq %rax, %r10
  5394. adoxq %rcx, %r8
  5395. # No store %rbx
  5396. movq %r10, 128(%rdi)
  5397. movq 144(%rdi), %r9
  5398. movq 152(%rdi), %r10
  5399. # A[11] x A[6]
  5400. movq 88(%rsi), %rdx
  5401. mulxq 48(%rsi), %rax, %rcx
  5402. adcxq %rax, %r8
  5403. adoxq %rcx, %r9
  5404. # A[11] x A[7]
  5405. mulxq 56(%rsi), %rax, %rcx
  5406. adcxq %rax, %r9
  5407. adoxq %rcx, %r10
  5408. movq %r8, 136(%rdi)
  5409. movq %r9, 144(%rdi)
  5410. movq 160(%rdi), %r8
  5411. movq 168(%rdi), %r9
  5412. # A[11] x A[8]
  5413. mulxq 64(%rsi), %rax, %rcx
  5414. adcxq %rax, %r10
  5415. adoxq %rcx, %r8
  5416. # A[11] x A[9]
  5417. mulxq 72(%rsi), %rax, %rcx
  5418. adcxq %rax, %r8
  5419. adoxq %rcx, %r9
  5420. movq %r10, 152(%rdi)
  5421. movq %r8, 160(%rdi)
  5422. movq 176(%rdi), %r10
  5423. movq 184(%rdi), %r8
  5424. # A[11] x A[10]
  5425. mulxq 80(%rsi), %rax, %rcx
  5426. adcxq %rax, %r9
  5427. adoxq %rcx, %r10
  5428. # A[13] x A[9]
  5429. movq 104(%rsi), %rdx
  5430. mulxq 72(%rsi), %rax, %rcx
  5431. adcxq %rax, %r10
  5432. adoxq %rcx, %r8
  5433. movq %r9, 168(%rdi)
  5434. movq %r10, 176(%rdi)
  5435. movq 192(%rdi), %r9
  5436. movq %r11, %r10
  5437. # A[13] x A[10]
  5438. mulxq 80(%rsi), %rax, %rcx
  5439. adcxq %rax, %r8
  5440. adoxq %rcx, %r9
  5441. # A[13] x A[11]
  5442. mulxq 88(%rsi), %rax, %rcx
  5443. adcxq %rax, %r9
  5444. adoxq %rcx, %r10
  5445. movq %r8, 184(%rdi)
  5446. movq %r9, 192(%rdi)
  5447. movq %r11, %r8
  5448. # A[13] x A[12]
  5449. mulxq 96(%rsi), %rax, %rcx
  5450. adcxq %rax, %r10
  5451. adoxq %rcx, %r8
  5452. movq %r10, 200(%rdi)
  5453. # Carry
  5454. adcxq %r12, %r8
  5455. movq %r11, %r12
  5456. adcxq %r11, %r12
  5457. adoxq %r11, %r12
  5458. movq %r8, 208(%rdi)
  5459. # Diagonal 7
  5460. # No load %r14 - %r8
  5461. # No load %r15 - %r9
  5462. # No load %rbx - %r10
  5463. # A[7] x A[6]
  5464. movq 48(%rsi), %rdx
  5465. mulxq 56(%rsi), %rax, %rcx
  5466. adcxq %rax, %r14
  5467. adoxq %rcx, %r15
  5468. # A[8] x A[6]
  5469. mulxq 64(%rsi), %rax, %rcx
  5470. adcxq %rax, %r15
  5471. adoxq %rcx, %rbx
  5472. # No store %r14
  5473. # No store %r15
  5474. movq 128(%rdi), %r8
  5475. movq 136(%rdi), %r9
  5476. # A[9] x A[6]
  5477. mulxq 72(%rsi), %rax, %rcx
  5478. adcxq %rax, %rbx
  5479. adoxq %rcx, %r8
  5480. # A[10] x A[6]
  5481. mulxq 80(%rsi), %rax, %rcx
  5482. adcxq %rax, %r8
  5483. adoxq %rcx, %r9
  5484. # No store %rbx
  5485. movq %r8, 128(%rdi)
  5486. movq 144(%rdi), %r10
  5487. movq 152(%rdi), %r8
  5488. # A[10] x A[7]
  5489. movq 80(%rsi), %rdx
  5490. mulxq 56(%rsi), %rax, %rcx
  5491. adcxq %rax, %r9
  5492. adoxq %rcx, %r10
  5493. # A[10] x A[8]
  5494. mulxq 64(%rsi), %rax, %rcx
  5495. adcxq %rax, %r10
  5496. adoxq %rcx, %r8
  5497. movq %r9, 136(%rdi)
  5498. movq %r10, 144(%rdi)
  5499. movq 160(%rdi), %r9
  5500. movq 168(%rdi), %r10
  5501. # A[10] x A[9]
  5502. mulxq 72(%rsi), %rax, %rcx
  5503. adcxq %rax, %r8
  5504. adoxq %rcx, %r9
  5505. # A[14] x A[6]
  5506. movq 112(%rsi), %rdx
  5507. mulxq 48(%rsi), %rax, %rcx
  5508. adcxq %rax, %r9
  5509. adoxq %rcx, %r10
  5510. movq %r8, 152(%rdi)
  5511. movq %r9, 160(%rdi)
  5512. movq 176(%rdi), %r8
  5513. movq 184(%rdi), %r9
  5514. # A[14] x A[7]
  5515. mulxq 56(%rsi), %rax, %rcx
  5516. adcxq %rax, %r10
  5517. adoxq %rcx, %r8
  5518. # A[14] x A[8]
  5519. mulxq 64(%rsi), %rax, %rcx
  5520. adcxq %rax, %r8
  5521. adoxq %rcx, %r9
  5522. movq %r10, 168(%rdi)
  5523. movq %r8, 176(%rdi)
  5524. movq 192(%rdi), %r10
  5525. movq 200(%rdi), %r8
  5526. # A[14] x A[9]
  5527. mulxq 72(%rsi), %rax, %rcx
  5528. adcxq %rax, %r9
  5529. adoxq %rcx, %r10
  5530. # A[14] x A[10]
  5531. mulxq 80(%rsi), %rax, %rcx
  5532. adcxq %rax, %r10
  5533. adoxq %rcx, %r8
  5534. movq %r9, 184(%rdi)
  5535. movq %r10, 192(%rdi)
  5536. movq 208(%rdi), %r9
  5537. movq %r11, %r10
  5538. # A[14] x A[11]
  5539. mulxq 88(%rsi), %rax, %rcx
  5540. adcxq %rax, %r8
  5541. adoxq %rcx, %r9
  5542. # A[14] x A[12]
  5543. mulxq 96(%rsi), %rax, %rcx
  5544. adcxq %rax, %r9
  5545. adoxq %rcx, %r10
  5546. movq %r8, 200(%rdi)
  5547. movq %r9, 208(%rdi)
  5548. movq %r11, %r8
  5549. # A[14] x A[13]
  5550. mulxq 104(%rsi), %rax, %rcx
  5551. adcxq %rax, %r10
  5552. adoxq %rcx, %r8
  5553. movq %r10, 216(%rdi)
  5554. # Carry
  5555. adcxq %r12, %r8
  5556. movq %r11, %r12
  5557. adcxq %r11, %r12
  5558. adoxq %r11, %r12
  5559. movq %r8, 224(%rdi)
  5560. # Diagonal 8
  5561. # No load %rbx - %r8
  5562. movq 128(%rdi), %r9
  5563. movq 136(%rdi), %r10
  5564. # A[8] x A[7]
  5565. movq 56(%rsi), %rdx
  5566. mulxq 64(%rsi), %rax, %rcx
  5567. adcxq %rax, %rbx
  5568. adoxq %rcx, %r9
  5569. # A[9] x A[7]
  5570. mulxq 72(%rsi), %rax, %rcx
  5571. adcxq %rax, %r9
  5572. adoxq %rcx, %r10
  5573. # No store %rbx
  5574. movq %r9, 128(%rdi)
  5575. movq 144(%rdi), %r8
  5576. movq 152(%rdi), %r9
  5577. # A[9] x A[8]
  5578. movq 64(%rsi), %rdx
  5579. mulxq 72(%rsi), %rax, %rcx
  5580. adcxq %rax, %r10
  5581. adoxq %rcx, %r8
  5582. # A[15] x A[3]
  5583. movq 120(%rsi), %rdx
  5584. mulxq 24(%rsi), %rax, %rcx
  5585. adcxq %rax, %r8
  5586. adoxq %rcx, %r9
  5587. movq %r10, 136(%rdi)
  5588. movq %r8, 144(%rdi)
  5589. movq 160(%rdi), %r10
  5590. movq 168(%rdi), %r8
  5591. # A[15] x A[4]
  5592. mulxq 32(%rsi), %rax, %rcx
  5593. adcxq %rax, %r9
  5594. adoxq %rcx, %r10
  5595. # A[15] x A[5]
  5596. mulxq 40(%rsi), %rax, %rcx
  5597. adcxq %rax, %r10
  5598. adoxq %rcx, %r8
  5599. movq %r9, 152(%rdi)
  5600. movq %r10, 160(%rdi)
  5601. movq 176(%rdi), %r9
  5602. movq 184(%rdi), %r10
  5603. # A[15] x A[6]
  5604. mulxq 48(%rsi), %rax, %rcx
  5605. adcxq %rax, %r8
  5606. adoxq %rcx, %r9
  5607. # A[15] x A[7]
  5608. mulxq 56(%rsi), %rax, %rcx
  5609. adcxq %rax, %r9
  5610. adoxq %rcx, %r10
  5611. movq %r8, 168(%rdi)
  5612. movq %r9, 176(%rdi)
  5613. movq 192(%rdi), %r8
  5614. movq 200(%rdi), %r9
  5615. # A[15] x A[8]
  5616. mulxq 64(%rsi), %rax, %rcx
  5617. adcxq %rax, %r10
  5618. adoxq %rcx, %r8
  5619. # A[15] x A[9]
  5620. mulxq 72(%rsi), %rax, %rcx
  5621. adcxq %rax, %r8
  5622. adoxq %rcx, %r9
  5623. movq %r10, 184(%rdi)
  5624. movq %r8, 192(%rdi)
  5625. movq 208(%rdi), %r10
  5626. movq 216(%rdi), %r8
  5627. # A[15] x A[10]
  5628. mulxq 80(%rsi), %rax, %rcx
  5629. adcxq %rax, %r9
  5630. adoxq %rcx, %r10
  5631. # A[15] x A[11]
  5632. mulxq 88(%rsi), %rax, %rcx
  5633. adcxq %rax, %r10
  5634. adoxq %rcx, %r8
  5635. movq %r9, 200(%rdi)
  5636. movq %r10, 208(%rdi)
  5637. movq 224(%rdi), %r9
  5638. movq %r11, %r10
  5639. # A[15] x A[12]
  5640. mulxq 96(%rsi), %rax, %rcx
  5641. adcxq %rax, %r8
  5642. adoxq %rcx, %r9
  5643. # A[15] x A[13]
  5644. mulxq 104(%rsi), %rax, %rcx
  5645. adcxq %rax, %r9
  5646. adoxq %rcx, %r10
  5647. movq %r8, 216(%rdi)
  5648. movq %r9, 224(%rdi)
  5649. movq %r11, %r8
  5650. # A[15] x A[14]
  5651. mulxq 112(%rsi), %rax, %rcx
  5652. adcxq %rax, %r10
  5653. adoxq %rcx, %r8
  5654. movq %r10, 232(%rdi)
  5655. # Carry
  5656. adcxq %r12, %r8
  5657. movq %r11, %r12
  5658. adcxq %r11, %r12
  5659. adoxq %r11, %r12
  5660. movq %r8, 240(%rdi)
  5661. movq %r12, 248(%rdi)
  5662. # Double and Add in A[i] x A[i]
  5663. movq 8(%rbp), %r9
  5664. # A[0] x A[0]
  5665. movq (%rsi), %rdx
  5666. mulxq %rdx, %rax, %rcx
  5667. movq %rax, (%rbp)
  5668. adoxq %r9, %r9
  5669. adcxq %rcx, %r9
  5670. movq %r9, 8(%rbp)
  5671. movq 16(%rbp), %r8
  5672. movq 24(%rbp), %r9
  5673. # A[1] x A[1]
  5674. movq 8(%rsi), %rdx
  5675. mulxq %rdx, %rax, %rcx
  5676. adoxq %r8, %r8
  5677. adoxq %r9, %r9
  5678. adcxq %rax, %r8
  5679. adcxq %rcx, %r9
  5680. movq %r8, 16(%rbp)
  5681. movq %r9, 24(%rbp)
  5682. movq 32(%rbp), %r8
  5683. movq 40(%rbp), %r9
  5684. # A[2] x A[2]
  5685. movq 16(%rsi), %rdx
  5686. mulxq %rdx, %rax, %rcx
  5687. adoxq %r8, %r8
  5688. adoxq %r9, %r9
  5689. adcxq %rax, %r8
  5690. adcxq %rcx, %r9
  5691. movq %r8, 32(%rbp)
  5692. movq %r9, 40(%rbp)
  5693. movq 48(%rbp), %r8
  5694. movq 56(%rbp), %r9
  5695. # A[3] x A[3]
  5696. movq 24(%rsi), %rdx
  5697. mulxq %rdx, %rax, %rcx
  5698. adoxq %r8, %r8
  5699. adoxq %r9, %r9
  5700. adcxq %rax, %r8
  5701. adcxq %rcx, %r9
  5702. movq %r8, 48(%rbp)
  5703. movq %r9, 56(%rbp)
  5704. movq 64(%rbp), %r8
  5705. movq 72(%rbp), %r9
  5706. # A[4] x A[4]
  5707. movq 32(%rsi), %rdx
  5708. mulxq %rdx, %rax, %rcx
  5709. adoxq %r8, %r8
  5710. adoxq %r9, %r9
  5711. adcxq %rax, %r8
  5712. adcxq %rcx, %r9
  5713. movq %r8, 64(%rbp)
  5714. movq %r9, 72(%rbp)
  5715. movq 80(%rbp), %r8
  5716. movq 88(%rbp), %r9
  5717. # A[5] x A[5]
  5718. movq 40(%rsi), %rdx
  5719. mulxq %rdx, %rax, %rcx
  5720. adoxq %r8, %r8
  5721. adoxq %r9, %r9
  5722. adcxq %rax, %r8
  5723. adcxq %rcx, %r9
  5724. movq %r8, 80(%rbp)
  5725. movq %r9, 88(%rbp)
  5726. # A[6] x A[6]
  5727. movq 48(%rsi), %rdx
  5728. mulxq %rdx, %rax, %rcx
  5729. adoxq %r13, %r13
  5730. adoxq %r14, %r14
  5731. adcxq %rax, %r13
  5732. adcxq %rcx, %r14
  5733. # A[7] x A[7]
  5734. movq 56(%rsi), %rdx
  5735. mulxq %rdx, %rax, %rcx
  5736. adoxq %r15, %r15
  5737. adoxq %rbx, %rbx
  5738. adcxq %rax, %r15
  5739. adcxq %rcx, %rbx
  5740. movq 128(%rdi), %r8
  5741. movq 136(%rdi), %r9
  5742. # A[8] x A[8]
  5743. movq 64(%rsi), %rdx
  5744. mulxq %rdx, %rax, %rcx
  5745. adoxq %r8, %r8
  5746. adoxq %r9, %r9
  5747. adcxq %rax, %r8
  5748. adcxq %rcx, %r9
  5749. movq %r8, 128(%rdi)
  5750. movq %r9, 136(%rdi)
  5751. movq 144(%rdi), %r8
  5752. movq 152(%rdi), %r9
  5753. # A[9] x A[9]
  5754. movq 72(%rsi), %rdx
  5755. mulxq %rdx, %rax, %rcx
  5756. adoxq %r8, %r8
  5757. adoxq %r9, %r9
  5758. adcxq %rax, %r8
  5759. adcxq %rcx, %r9
  5760. movq %r8, 144(%rdi)
  5761. movq %r9, 152(%rdi)
  5762. movq 160(%rdi), %r8
  5763. movq 168(%rdi), %r9
  5764. # A[10] x A[10]
  5765. movq 80(%rsi), %rdx
  5766. mulxq %rdx, %rax, %rcx
  5767. adoxq %r8, %r8
  5768. adoxq %r9, %r9
  5769. adcxq %rax, %r8
  5770. adcxq %rcx, %r9
  5771. movq %r8, 160(%rdi)
  5772. movq %r9, 168(%rdi)
  5773. movq 176(%rdi), %r8
  5774. movq 184(%rdi), %r9
  5775. # A[11] x A[11]
  5776. movq 88(%rsi), %rdx
  5777. mulxq %rdx, %rax, %rcx
  5778. adoxq %r8, %r8
  5779. adoxq %r9, %r9
  5780. adcxq %rax, %r8
  5781. adcxq %rcx, %r9
  5782. movq %r8, 176(%rdi)
  5783. movq %r9, 184(%rdi)
  5784. movq 192(%rdi), %r8
  5785. movq 200(%rdi), %r9
  5786. # A[12] x A[12]
  5787. movq 96(%rsi), %rdx
  5788. mulxq %rdx, %rax, %rcx
  5789. adoxq %r8, %r8
  5790. adoxq %r9, %r9
  5791. adcxq %rax, %r8
  5792. adcxq %rcx, %r9
  5793. movq %r8, 192(%rdi)
  5794. movq %r9, 200(%rdi)
  5795. movq 208(%rdi), %r8
  5796. movq 216(%rdi), %r9
  5797. # A[13] x A[13]
  5798. movq 104(%rsi), %rdx
  5799. mulxq %rdx, %rax, %rcx
  5800. adoxq %r8, %r8
  5801. adoxq %r9, %r9
  5802. adcxq %rax, %r8
  5803. adcxq %rcx, %r9
  5804. movq %r8, 208(%rdi)
  5805. movq %r9, 216(%rdi)
  5806. movq 224(%rdi), %r8
  5807. movq 232(%rdi), %r9
  5808. # A[14] x A[14]
  5809. movq 112(%rsi), %rdx
  5810. mulxq %rdx, %rax, %rcx
  5811. adoxq %r8, %r8
  5812. adoxq %r9, %r9
  5813. adcxq %rax, %r8
  5814. adcxq %rcx, %r9
  5815. movq %r8, 224(%rdi)
  5816. movq %r9, 232(%rdi)
  5817. movq 240(%rdi), %r8
  5818. movq 248(%rdi), %r9
  5819. # A[15] x A[15]
  5820. movq 120(%rsi), %rdx
  5821. mulxq %rdx, %rax, %rcx
  5822. adoxq %r8, %r8
  5823. adoxq %r9, %r9
  5824. adcxq %rax, %r8
  5825. adcxq %rcx, %r9
  5826. movq %r8, 240(%rdi)
  5827. movq %r9, 248(%rdi)
  5828. movq %r13, 96(%rdi)
  5829. movq %r14, 104(%rdi)
  5830. movq %r15, 112(%rdi)
  5831. movq %rbx, 120(%rdi)
  5832. cmpq %rdi, %rsi
  5833. jne L_end_2048_sqr_avx2_16
  5834. vmovdqu (%rbp), %xmm0
  5835. vmovups %xmm0, (%rdi)
  5836. vmovdqu 16(%rbp), %xmm0
  5837. vmovups %xmm0, 16(%rdi)
  5838. vmovdqu 32(%rbp), %xmm0
  5839. vmovups %xmm0, 32(%rdi)
  5840. vmovdqu 48(%rbp), %xmm0
  5841. vmovups %xmm0, 48(%rdi)
  5842. vmovdqu 64(%rbp), %xmm0
  5843. vmovups %xmm0, 64(%rdi)
  5844. vmovdqu 80(%rbp), %xmm0
  5845. vmovups %xmm0, 80(%rdi)
  5846. L_end_2048_sqr_avx2_16:
  5847. addq $0x80, %rsp
  5848. popq %rbx
  5849. popq %r15
  5850. popq %r14
  5851. popq %r13
  5852. popq %r12
  5853. popq %rbp
  5854. repz retq
  5855. #ifndef __APPLE__
  5856. .size sp_2048_sqr_avx2_16,.-sp_2048_sqr_avx2_16
  5857. #endif /* __APPLE__ */
  5858. #endif /* HAVE_INTEL_AVX2 */
  5859. /* Add b to a into r. (r = a + b)
  5860. *
  5861. * r A single precision integer.
  5862. * a A single precision integer.
  5863. * b A single precision integer.
  5864. */
  5865. #ifndef __APPLE__
  5866. .text
  5867. .globl sp_2048_add_16
  5868. .type sp_2048_add_16,@function
  5869. .align 16
  5870. sp_2048_add_16:
  5871. #else
  5872. .section __TEXT,__text
  5873. .globl _sp_2048_add_16
  5874. .p2align 4
  5875. _sp_2048_add_16:
  5876. #endif /* __APPLE__ */
  5877. # Add
  5878. movq (%rsi), %rcx
  5879. xorq %rax, %rax
  5880. addq (%rdx), %rcx
  5881. movq 8(%rsi), %r8
  5882. movq %rcx, (%rdi)
  5883. adcq 8(%rdx), %r8
  5884. movq 16(%rsi), %rcx
  5885. movq %r8, 8(%rdi)
  5886. adcq 16(%rdx), %rcx
  5887. movq 24(%rsi), %r8
  5888. movq %rcx, 16(%rdi)
  5889. adcq 24(%rdx), %r8
  5890. movq 32(%rsi), %rcx
  5891. movq %r8, 24(%rdi)
  5892. adcq 32(%rdx), %rcx
  5893. movq 40(%rsi), %r8
  5894. movq %rcx, 32(%rdi)
  5895. adcq 40(%rdx), %r8
  5896. movq 48(%rsi), %rcx
  5897. movq %r8, 40(%rdi)
  5898. adcq 48(%rdx), %rcx
  5899. movq 56(%rsi), %r8
  5900. movq %rcx, 48(%rdi)
  5901. adcq 56(%rdx), %r8
  5902. movq 64(%rsi), %rcx
  5903. movq %r8, 56(%rdi)
  5904. adcq 64(%rdx), %rcx
  5905. movq 72(%rsi), %r8
  5906. movq %rcx, 64(%rdi)
  5907. adcq 72(%rdx), %r8
  5908. movq 80(%rsi), %rcx
  5909. movq %r8, 72(%rdi)
  5910. adcq 80(%rdx), %rcx
  5911. movq 88(%rsi), %r8
  5912. movq %rcx, 80(%rdi)
  5913. adcq 88(%rdx), %r8
  5914. movq 96(%rsi), %rcx
  5915. movq %r8, 88(%rdi)
  5916. adcq 96(%rdx), %rcx
  5917. movq 104(%rsi), %r8
  5918. movq %rcx, 96(%rdi)
  5919. adcq 104(%rdx), %r8
  5920. movq 112(%rsi), %rcx
  5921. movq %r8, 104(%rdi)
  5922. adcq 112(%rdx), %rcx
  5923. movq 120(%rsi), %r8
  5924. movq %rcx, 112(%rdi)
  5925. adcq 120(%rdx), %r8
  5926. movq %r8, 120(%rdi)
  5927. adcq $0x00, %rax
  5928. repz retq
  5929. #ifndef __APPLE__
  5930. .size sp_2048_add_16,.-sp_2048_add_16
  5931. #endif /* __APPLE__ */
  5932. /* Sub b from a into a. (a -= b)
  5933. *
  5934. * a A single precision integer and result.
  5935. * b A single precision integer.
  5936. */
  5937. #ifndef __APPLE__
  5938. .text
  5939. .globl sp_2048_sub_in_place_32
  5940. .type sp_2048_sub_in_place_32,@function
  5941. .align 16
  5942. sp_2048_sub_in_place_32:
  5943. #else
  5944. .section __TEXT,__text
  5945. .globl _sp_2048_sub_in_place_32
  5946. .p2align 4
  5947. _sp_2048_sub_in_place_32:
  5948. #endif /* __APPLE__ */
  5949. movq (%rdi), %rdx
  5950. xorq %rax, %rax
  5951. subq (%rsi), %rdx
  5952. movq 8(%rdi), %rcx
  5953. movq %rdx, (%rdi)
  5954. sbbq 8(%rsi), %rcx
  5955. movq 16(%rdi), %rdx
  5956. movq %rcx, 8(%rdi)
  5957. sbbq 16(%rsi), %rdx
  5958. movq 24(%rdi), %rcx
  5959. movq %rdx, 16(%rdi)
  5960. sbbq 24(%rsi), %rcx
  5961. movq 32(%rdi), %rdx
  5962. movq %rcx, 24(%rdi)
  5963. sbbq 32(%rsi), %rdx
  5964. movq 40(%rdi), %rcx
  5965. movq %rdx, 32(%rdi)
  5966. sbbq 40(%rsi), %rcx
  5967. movq 48(%rdi), %rdx
  5968. movq %rcx, 40(%rdi)
  5969. sbbq 48(%rsi), %rdx
  5970. movq 56(%rdi), %rcx
  5971. movq %rdx, 48(%rdi)
  5972. sbbq 56(%rsi), %rcx
  5973. movq 64(%rdi), %rdx
  5974. movq %rcx, 56(%rdi)
  5975. sbbq 64(%rsi), %rdx
  5976. movq 72(%rdi), %rcx
  5977. movq %rdx, 64(%rdi)
  5978. sbbq 72(%rsi), %rcx
  5979. movq 80(%rdi), %rdx
  5980. movq %rcx, 72(%rdi)
  5981. sbbq 80(%rsi), %rdx
  5982. movq 88(%rdi), %rcx
  5983. movq %rdx, 80(%rdi)
  5984. sbbq 88(%rsi), %rcx
  5985. movq 96(%rdi), %rdx
  5986. movq %rcx, 88(%rdi)
  5987. sbbq 96(%rsi), %rdx
  5988. movq 104(%rdi), %rcx
  5989. movq %rdx, 96(%rdi)
  5990. sbbq 104(%rsi), %rcx
  5991. movq 112(%rdi), %rdx
  5992. movq %rcx, 104(%rdi)
  5993. sbbq 112(%rsi), %rdx
  5994. movq 120(%rdi), %rcx
  5995. movq %rdx, 112(%rdi)
  5996. sbbq 120(%rsi), %rcx
  5997. movq 128(%rdi), %rdx
  5998. movq %rcx, 120(%rdi)
  5999. sbbq 128(%rsi), %rdx
  6000. movq 136(%rdi), %rcx
  6001. movq %rdx, 128(%rdi)
  6002. sbbq 136(%rsi), %rcx
  6003. movq 144(%rdi), %rdx
  6004. movq %rcx, 136(%rdi)
  6005. sbbq 144(%rsi), %rdx
  6006. movq 152(%rdi), %rcx
  6007. movq %rdx, 144(%rdi)
  6008. sbbq 152(%rsi), %rcx
  6009. movq 160(%rdi), %rdx
  6010. movq %rcx, 152(%rdi)
  6011. sbbq 160(%rsi), %rdx
  6012. movq 168(%rdi), %rcx
  6013. movq %rdx, 160(%rdi)
  6014. sbbq 168(%rsi), %rcx
  6015. movq 176(%rdi), %rdx
  6016. movq %rcx, 168(%rdi)
  6017. sbbq 176(%rsi), %rdx
  6018. movq 184(%rdi), %rcx
  6019. movq %rdx, 176(%rdi)
  6020. sbbq 184(%rsi), %rcx
  6021. movq 192(%rdi), %rdx
  6022. movq %rcx, 184(%rdi)
  6023. sbbq 192(%rsi), %rdx
  6024. movq 200(%rdi), %rcx
  6025. movq %rdx, 192(%rdi)
  6026. sbbq 200(%rsi), %rcx
  6027. movq 208(%rdi), %rdx
  6028. movq %rcx, 200(%rdi)
  6029. sbbq 208(%rsi), %rdx
  6030. movq 216(%rdi), %rcx
  6031. movq %rdx, 208(%rdi)
  6032. sbbq 216(%rsi), %rcx
  6033. movq 224(%rdi), %rdx
  6034. movq %rcx, 216(%rdi)
  6035. sbbq 224(%rsi), %rdx
  6036. movq 232(%rdi), %rcx
  6037. movq %rdx, 224(%rdi)
  6038. sbbq 232(%rsi), %rcx
  6039. movq 240(%rdi), %rdx
  6040. movq %rcx, 232(%rdi)
  6041. sbbq 240(%rsi), %rdx
  6042. movq 248(%rdi), %rcx
  6043. movq %rdx, 240(%rdi)
  6044. sbbq 248(%rsi), %rcx
  6045. movq %rcx, 248(%rdi)
  6046. sbbq $0x00, %rax
  6047. repz retq
  6048. #ifndef __APPLE__
  6049. .size sp_2048_sub_in_place_32,.-sp_2048_sub_in_place_32
  6050. #endif /* __APPLE__ */
  6051. /* Add b to a into r. (r = a + b)
  6052. *
  6053. * r A single precision integer.
  6054. * a A single precision integer.
  6055. * b A single precision integer.
  6056. */
  6057. #ifndef __APPLE__
  6058. .text
  6059. .globl sp_2048_add_32
  6060. .type sp_2048_add_32,@function
  6061. .align 16
  6062. sp_2048_add_32:
  6063. #else
  6064. .section __TEXT,__text
  6065. .globl _sp_2048_add_32
  6066. .p2align 4
  6067. _sp_2048_add_32:
  6068. #endif /* __APPLE__ */
  6069. # Add
  6070. movq (%rsi), %rcx
  6071. xorq %rax, %rax
  6072. addq (%rdx), %rcx
  6073. movq 8(%rsi), %r8
  6074. movq %rcx, (%rdi)
  6075. adcq 8(%rdx), %r8
  6076. movq 16(%rsi), %rcx
  6077. movq %r8, 8(%rdi)
  6078. adcq 16(%rdx), %rcx
  6079. movq 24(%rsi), %r8
  6080. movq %rcx, 16(%rdi)
  6081. adcq 24(%rdx), %r8
  6082. movq 32(%rsi), %rcx
  6083. movq %r8, 24(%rdi)
  6084. adcq 32(%rdx), %rcx
  6085. movq 40(%rsi), %r8
  6086. movq %rcx, 32(%rdi)
  6087. adcq 40(%rdx), %r8
  6088. movq 48(%rsi), %rcx
  6089. movq %r8, 40(%rdi)
  6090. adcq 48(%rdx), %rcx
  6091. movq 56(%rsi), %r8
  6092. movq %rcx, 48(%rdi)
  6093. adcq 56(%rdx), %r8
  6094. movq 64(%rsi), %rcx
  6095. movq %r8, 56(%rdi)
  6096. adcq 64(%rdx), %rcx
  6097. movq 72(%rsi), %r8
  6098. movq %rcx, 64(%rdi)
  6099. adcq 72(%rdx), %r8
  6100. movq 80(%rsi), %rcx
  6101. movq %r8, 72(%rdi)
  6102. adcq 80(%rdx), %rcx
  6103. movq 88(%rsi), %r8
  6104. movq %rcx, 80(%rdi)
  6105. adcq 88(%rdx), %r8
  6106. movq 96(%rsi), %rcx
  6107. movq %r8, 88(%rdi)
  6108. adcq 96(%rdx), %rcx
  6109. movq 104(%rsi), %r8
  6110. movq %rcx, 96(%rdi)
  6111. adcq 104(%rdx), %r8
  6112. movq 112(%rsi), %rcx
  6113. movq %r8, 104(%rdi)
  6114. adcq 112(%rdx), %rcx
  6115. movq 120(%rsi), %r8
  6116. movq %rcx, 112(%rdi)
  6117. adcq 120(%rdx), %r8
  6118. movq 128(%rsi), %rcx
  6119. movq %r8, 120(%rdi)
  6120. adcq 128(%rdx), %rcx
  6121. movq 136(%rsi), %r8
  6122. movq %rcx, 128(%rdi)
  6123. adcq 136(%rdx), %r8
  6124. movq 144(%rsi), %rcx
  6125. movq %r8, 136(%rdi)
  6126. adcq 144(%rdx), %rcx
  6127. movq 152(%rsi), %r8
  6128. movq %rcx, 144(%rdi)
  6129. adcq 152(%rdx), %r8
  6130. movq 160(%rsi), %rcx
  6131. movq %r8, 152(%rdi)
  6132. adcq 160(%rdx), %rcx
  6133. movq 168(%rsi), %r8
  6134. movq %rcx, 160(%rdi)
  6135. adcq 168(%rdx), %r8
  6136. movq 176(%rsi), %rcx
  6137. movq %r8, 168(%rdi)
  6138. adcq 176(%rdx), %rcx
  6139. movq 184(%rsi), %r8
  6140. movq %rcx, 176(%rdi)
  6141. adcq 184(%rdx), %r8
  6142. movq 192(%rsi), %rcx
  6143. movq %r8, 184(%rdi)
  6144. adcq 192(%rdx), %rcx
  6145. movq 200(%rsi), %r8
  6146. movq %rcx, 192(%rdi)
  6147. adcq 200(%rdx), %r8
  6148. movq 208(%rsi), %rcx
  6149. movq %r8, 200(%rdi)
  6150. adcq 208(%rdx), %rcx
  6151. movq 216(%rsi), %r8
  6152. movq %rcx, 208(%rdi)
  6153. adcq 216(%rdx), %r8
  6154. movq 224(%rsi), %rcx
  6155. movq %r8, 216(%rdi)
  6156. adcq 224(%rdx), %rcx
  6157. movq 232(%rsi), %r8
  6158. movq %rcx, 224(%rdi)
  6159. adcq 232(%rdx), %r8
  6160. movq 240(%rsi), %rcx
  6161. movq %r8, 232(%rdi)
  6162. adcq 240(%rdx), %rcx
  6163. movq 248(%rsi), %r8
  6164. movq %rcx, 240(%rdi)
  6165. adcq 248(%rdx), %r8
  6166. movq %r8, 248(%rdi)
  6167. adcq $0x00, %rax
  6168. repz retq
  6169. #ifndef __APPLE__
  6170. .size sp_2048_add_32,.-sp_2048_add_32
  6171. #endif /* __APPLE__ */
  6172. /* Multiply a and b into r. (r = a * b)
  6173. *
  6174. * r A single precision integer.
  6175. * a A single precision integer.
  6176. * b A single precision integer.
  6177. */
  6178. #ifndef __APPLE__
  6179. .text
  6180. .globl sp_2048_mul_32
  6181. .type sp_2048_mul_32,@function
  6182. .align 16
  6183. sp_2048_mul_32:
  6184. #else
  6185. .section __TEXT,__text
  6186. .globl _sp_2048_mul_32
  6187. .p2align 4
  6188. _sp_2048_mul_32:
  6189. #endif /* __APPLE__ */
  6190. pushq %r12
  6191. pushq %r13
  6192. pushq %r14
  6193. pushq %r15
  6194. subq $0x328, %rsp
  6195. movq %rdi, 768(%rsp)
  6196. movq %rsi, 776(%rsp)
  6197. movq %rdx, 784(%rsp)
  6198. leaq 512(%rsp), %r10
  6199. leaq 128(%rsi), %r12
  6200. # Add
  6201. movq (%rsi), %rax
  6202. xorq %r13, %r13
  6203. addq (%r12), %rax
  6204. movq 8(%rsi), %rcx
  6205. movq %rax, (%r10)
  6206. adcq 8(%r12), %rcx
  6207. movq 16(%rsi), %r8
  6208. movq %rcx, 8(%r10)
  6209. adcq 16(%r12), %r8
  6210. movq 24(%rsi), %rax
  6211. movq %r8, 16(%r10)
  6212. adcq 24(%r12), %rax
  6213. movq 32(%rsi), %rcx
  6214. movq %rax, 24(%r10)
  6215. adcq 32(%r12), %rcx
  6216. movq 40(%rsi), %r8
  6217. movq %rcx, 32(%r10)
  6218. adcq 40(%r12), %r8
  6219. movq 48(%rsi), %rax
  6220. movq %r8, 40(%r10)
  6221. adcq 48(%r12), %rax
  6222. movq 56(%rsi), %rcx
  6223. movq %rax, 48(%r10)
  6224. adcq 56(%r12), %rcx
  6225. movq 64(%rsi), %r8
  6226. movq %rcx, 56(%r10)
  6227. adcq 64(%r12), %r8
  6228. movq 72(%rsi), %rax
  6229. movq %r8, 64(%r10)
  6230. adcq 72(%r12), %rax
  6231. movq 80(%rsi), %rcx
  6232. movq %rax, 72(%r10)
  6233. adcq 80(%r12), %rcx
  6234. movq 88(%rsi), %r8
  6235. movq %rcx, 80(%r10)
  6236. adcq 88(%r12), %r8
  6237. movq 96(%rsi), %rax
  6238. movq %r8, 88(%r10)
  6239. adcq 96(%r12), %rax
  6240. movq 104(%rsi), %rcx
  6241. movq %rax, 96(%r10)
  6242. adcq 104(%r12), %rcx
  6243. movq 112(%rsi), %r8
  6244. movq %rcx, 104(%r10)
  6245. adcq 112(%r12), %r8
  6246. movq 120(%rsi), %rax
  6247. movq %r8, 112(%r10)
  6248. adcq 120(%r12), %rax
  6249. movq %rax, 120(%r10)
  6250. adcq $0x00, %r13
  6251. movq %r13, 792(%rsp)
  6252. leaq 640(%rsp), %r11
  6253. leaq 128(%rdx), %r12
  6254. # Add
  6255. movq (%rdx), %rax
  6256. xorq %r14, %r14
  6257. addq (%r12), %rax
  6258. movq 8(%rdx), %rcx
  6259. movq %rax, (%r11)
  6260. adcq 8(%r12), %rcx
  6261. movq 16(%rdx), %r8
  6262. movq %rcx, 8(%r11)
  6263. adcq 16(%r12), %r8
  6264. movq 24(%rdx), %rax
  6265. movq %r8, 16(%r11)
  6266. adcq 24(%r12), %rax
  6267. movq 32(%rdx), %rcx
  6268. movq %rax, 24(%r11)
  6269. adcq 32(%r12), %rcx
  6270. movq 40(%rdx), %r8
  6271. movq %rcx, 32(%r11)
  6272. adcq 40(%r12), %r8
  6273. movq 48(%rdx), %rax
  6274. movq %r8, 40(%r11)
  6275. adcq 48(%r12), %rax
  6276. movq 56(%rdx), %rcx
  6277. movq %rax, 48(%r11)
  6278. adcq 56(%r12), %rcx
  6279. movq 64(%rdx), %r8
  6280. movq %rcx, 56(%r11)
  6281. adcq 64(%r12), %r8
  6282. movq 72(%rdx), %rax
  6283. movq %r8, 64(%r11)
  6284. adcq 72(%r12), %rax
  6285. movq 80(%rdx), %rcx
  6286. movq %rax, 72(%r11)
  6287. adcq 80(%r12), %rcx
  6288. movq 88(%rdx), %r8
  6289. movq %rcx, 80(%r11)
  6290. adcq 88(%r12), %r8
  6291. movq 96(%rdx), %rax
  6292. movq %r8, 88(%r11)
  6293. adcq 96(%r12), %rax
  6294. movq 104(%rdx), %rcx
  6295. movq %rax, 96(%r11)
  6296. adcq 104(%r12), %rcx
  6297. movq 112(%rdx), %r8
  6298. movq %rcx, 104(%r11)
  6299. adcq 112(%r12), %r8
  6300. movq 120(%rdx), %rax
  6301. movq %r8, 112(%r11)
  6302. adcq 120(%r12), %rax
  6303. movq %rax, 120(%r11)
  6304. adcq $0x00, %r14
  6305. movq %r14, 800(%rsp)
  6306. movq %r11, %rdx
  6307. movq %r10, %rsi
  6308. movq %rsp, %rdi
  6309. #ifndef __APPLE__
  6310. callq sp_2048_mul_16@plt
  6311. #else
  6312. callq _sp_2048_mul_16
  6313. #endif /* __APPLE__ */
  6314. movq 784(%rsp), %rdx
  6315. movq 776(%rsp), %rsi
  6316. leaq 256(%rsp), %rdi
  6317. addq $0x80, %rdx
  6318. addq $0x80, %rsi
  6319. #ifndef __APPLE__
  6320. callq sp_2048_mul_16@plt
  6321. #else
  6322. callq _sp_2048_mul_16
  6323. #endif /* __APPLE__ */
  6324. movq 784(%rsp), %rdx
  6325. movq 776(%rsp), %rsi
  6326. movq 768(%rsp), %rdi
  6327. #ifndef __APPLE__
  6328. callq sp_2048_mul_16@plt
  6329. #else
  6330. callq _sp_2048_mul_16
  6331. #endif /* __APPLE__ */
  6332. movq 792(%rsp), %r13
  6333. movq 800(%rsp), %r14
  6334. movq 768(%rsp), %r15
  6335. movq %r13, %r9
  6336. leaq 512(%rsp), %r10
  6337. leaq 640(%rsp), %r11
  6338. andq %r14, %r9
  6339. negq %r13
  6340. negq %r14
  6341. addq $0x100, %r15
  6342. movq (%r10), %rax
  6343. movq (%r11), %rcx
  6344. andq %r14, %rax
  6345. andq %r13, %rcx
  6346. movq %rax, (%r10)
  6347. movq %rcx, (%r11)
  6348. movq 8(%r10), %rax
  6349. movq 8(%r11), %rcx
  6350. andq %r14, %rax
  6351. andq %r13, %rcx
  6352. movq %rax, 8(%r10)
  6353. movq %rcx, 8(%r11)
  6354. movq 16(%r10), %rax
  6355. movq 16(%r11), %rcx
  6356. andq %r14, %rax
  6357. andq %r13, %rcx
  6358. movq %rax, 16(%r10)
  6359. movq %rcx, 16(%r11)
  6360. movq 24(%r10), %rax
  6361. movq 24(%r11), %rcx
  6362. andq %r14, %rax
  6363. andq %r13, %rcx
  6364. movq %rax, 24(%r10)
  6365. movq %rcx, 24(%r11)
  6366. movq 32(%r10), %rax
  6367. movq 32(%r11), %rcx
  6368. andq %r14, %rax
  6369. andq %r13, %rcx
  6370. movq %rax, 32(%r10)
  6371. movq %rcx, 32(%r11)
  6372. movq 40(%r10), %rax
  6373. movq 40(%r11), %rcx
  6374. andq %r14, %rax
  6375. andq %r13, %rcx
  6376. movq %rax, 40(%r10)
  6377. movq %rcx, 40(%r11)
  6378. movq 48(%r10), %rax
  6379. movq 48(%r11), %rcx
  6380. andq %r14, %rax
  6381. andq %r13, %rcx
  6382. movq %rax, 48(%r10)
  6383. movq %rcx, 48(%r11)
  6384. movq 56(%r10), %rax
  6385. movq 56(%r11), %rcx
  6386. andq %r14, %rax
  6387. andq %r13, %rcx
  6388. movq %rax, 56(%r10)
  6389. movq %rcx, 56(%r11)
  6390. movq 64(%r10), %rax
  6391. movq 64(%r11), %rcx
  6392. andq %r14, %rax
  6393. andq %r13, %rcx
  6394. movq %rax, 64(%r10)
  6395. movq %rcx, 64(%r11)
  6396. movq 72(%r10), %rax
  6397. movq 72(%r11), %rcx
  6398. andq %r14, %rax
  6399. andq %r13, %rcx
  6400. movq %rax, 72(%r10)
  6401. movq %rcx, 72(%r11)
  6402. movq 80(%r10), %rax
  6403. movq 80(%r11), %rcx
  6404. andq %r14, %rax
  6405. andq %r13, %rcx
  6406. movq %rax, 80(%r10)
  6407. movq %rcx, 80(%r11)
  6408. movq 88(%r10), %rax
  6409. movq 88(%r11), %rcx
  6410. andq %r14, %rax
  6411. andq %r13, %rcx
  6412. movq %rax, 88(%r10)
  6413. movq %rcx, 88(%r11)
  6414. movq 96(%r10), %rax
  6415. movq 96(%r11), %rcx
  6416. andq %r14, %rax
  6417. andq %r13, %rcx
  6418. movq %rax, 96(%r10)
  6419. movq %rcx, 96(%r11)
  6420. movq 104(%r10), %rax
  6421. movq 104(%r11), %rcx
  6422. andq %r14, %rax
  6423. andq %r13, %rcx
  6424. movq %rax, 104(%r10)
  6425. movq %rcx, 104(%r11)
  6426. movq 112(%r10), %rax
  6427. movq 112(%r11), %rcx
  6428. andq %r14, %rax
  6429. andq %r13, %rcx
  6430. movq %rax, 112(%r10)
  6431. movq %rcx, 112(%r11)
  6432. movq 120(%r10), %rax
  6433. movq 120(%r11), %rcx
  6434. andq %r14, %rax
  6435. andq %r13, %rcx
  6436. movq %rax, 120(%r10)
  6437. movq %rcx, 120(%r11)
  6438. movq (%r10), %rax
  6439. addq (%r11), %rax
  6440. movq 8(%r10), %rcx
  6441. movq %rax, (%r15)
  6442. adcq 8(%r11), %rcx
  6443. movq 16(%r10), %r8
  6444. movq %rcx, 8(%r15)
  6445. adcq 16(%r11), %r8
  6446. movq 24(%r10), %rax
  6447. movq %r8, 16(%r15)
  6448. adcq 24(%r11), %rax
  6449. movq 32(%r10), %rcx
  6450. movq %rax, 24(%r15)
  6451. adcq 32(%r11), %rcx
  6452. movq 40(%r10), %r8
  6453. movq %rcx, 32(%r15)
  6454. adcq 40(%r11), %r8
  6455. movq 48(%r10), %rax
  6456. movq %r8, 40(%r15)
  6457. adcq 48(%r11), %rax
  6458. movq 56(%r10), %rcx
  6459. movq %rax, 48(%r15)
  6460. adcq 56(%r11), %rcx
  6461. movq 64(%r10), %r8
  6462. movq %rcx, 56(%r15)
  6463. adcq 64(%r11), %r8
  6464. movq 72(%r10), %rax
  6465. movq %r8, 64(%r15)
  6466. adcq 72(%r11), %rax
  6467. movq 80(%r10), %rcx
  6468. movq %rax, 72(%r15)
  6469. adcq 80(%r11), %rcx
  6470. movq 88(%r10), %r8
  6471. movq %rcx, 80(%r15)
  6472. adcq 88(%r11), %r8
  6473. movq 96(%r10), %rax
  6474. movq %r8, 88(%r15)
  6475. adcq 96(%r11), %rax
  6476. movq 104(%r10), %rcx
  6477. movq %rax, 96(%r15)
  6478. adcq 104(%r11), %rcx
  6479. movq 112(%r10), %r8
  6480. movq %rcx, 104(%r15)
  6481. adcq 112(%r11), %r8
  6482. movq 120(%r10), %rax
  6483. movq %r8, 112(%r15)
  6484. adcq 120(%r11), %rax
  6485. movq %rax, 120(%r15)
  6486. adcq $0x00, %r9
  6487. leaq 256(%rsp), %r11
  6488. movq %rsp, %r10
  6489. movq (%r10), %rax
  6490. subq (%r11), %rax
  6491. movq 8(%r10), %rcx
  6492. movq %rax, (%r10)
  6493. sbbq 8(%r11), %rcx
  6494. movq 16(%r10), %r8
  6495. movq %rcx, 8(%r10)
  6496. sbbq 16(%r11), %r8
  6497. movq 24(%r10), %rax
  6498. movq %r8, 16(%r10)
  6499. sbbq 24(%r11), %rax
  6500. movq 32(%r10), %rcx
  6501. movq %rax, 24(%r10)
  6502. sbbq 32(%r11), %rcx
  6503. movq 40(%r10), %r8
  6504. movq %rcx, 32(%r10)
  6505. sbbq 40(%r11), %r8
  6506. movq 48(%r10), %rax
  6507. movq %r8, 40(%r10)
  6508. sbbq 48(%r11), %rax
  6509. movq 56(%r10), %rcx
  6510. movq %rax, 48(%r10)
  6511. sbbq 56(%r11), %rcx
  6512. movq 64(%r10), %r8
  6513. movq %rcx, 56(%r10)
  6514. sbbq 64(%r11), %r8
  6515. movq 72(%r10), %rax
  6516. movq %r8, 64(%r10)
  6517. sbbq 72(%r11), %rax
  6518. movq 80(%r10), %rcx
  6519. movq %rax, 72(%r10)
  6520. sbbq 80(%r11), %rcx
  6521. movq 88(%r10), %r8
  6522. movq %rcx, 80(%r10)
  6523. sbbq 88(%r11), %r8
  6524. movq 96(%r10), %rax
  6525. movq %r8, 88(%r10)
  6526. sbbq 96(%r11), %rax
  6527. movq 104(%r10), %rcx
  6528. movq %rax, 96(%r10)
  6529. sbbq 104(%r11), %rcx
  6530. movq 112(%r10), %r8
  6531. movq %rcx, 104(%r10)
  6532. sbbq 112(%r11), %r8
  6533. movq 120(%r10), %rax
  6534. movq %r8, 112(%r10)
  6535. sbbq 120(%r11), %rax
  6536. movq 128(%r10), %rcx
  6537. movq %rax, 120(%r10)
  6538. sbbq 128(%r11), %rcx
  6539. movq 136(%r10), %r8
  6540. movq %rcx, 128(%r10)
  6541. sbbq 136(%r11), %r8
  6542. movq 144(%r10), %rax
  6543. movq %r8, 136(%r10)
  6544. sbbq 144(%r11), %rax
  6545. movq 152(%r10), %rcx
  6546. movq %rax, 144(%r10)
  6547. sbbq 152(%r11), %rcx
  6548. movq 160(%r10), %r8
  6549. movq %rcx, 152(%r10)
  6550. sbbq 160(%r11), %r8
  6551. movq 168(%r10), %rax
  6552. movq %r8, 160(%r10)
  6553. sbbq 168(%r11), %rax
  6554. movq 176(%r10), %rcx
  6555. movq %rax, 168(%r10)
  6556. sbbq 176(%r11), %rcx
  6557. movq 184(%r10), %r8
  6558. movq %rcx, 176(%r10)
  6559. sbbq 184(%r11), %r8
  6560. movq 192(%r10), %rax
  6561. movq %r8, 184(%r10)
  6562. sbbq 192(%r11), %rax
  6563. movq 200(%r10), %rcx
  6564. movq %rax, 192(%r10)
  6565. sbbq 200(%r11), %rcx
  6566. movq 208(%r10), %r8
  6567. movq %rcx, 200(%r10)
  6568. sbbq 208(%r11), %r8
  6569. movq 216(%r10), %rax
  6570. movq %r8, 208(%r10)
  6571. sbbq 216(%r11), %rax
  6572. movq 224(%r10), %rcx
  6573. movq %rax, 216(%r10)
  6574. sbbq 224(%r11), %rcx
  6575. movq 232(%r10), %r8
  6576. movq %rcx, 224(%r10)
  6577. sbbq 232(%r11), %r8
  6578. movq 240(%r10), %rax
  6579. movq %r8, 232(%r10)
  6580. sbbq 240(%r11), %rax
  6581. movq 248(%r10), %rcx
  6582. movq %rax, 240(%r10)
  6583. sbbq 248(%r11), %rcx
  6584. movq %rcx, 248(%r10)
  6585. sbbq $0x00, %r9
  6586. movq (%r10), %rax
  6587. subq (%rdi), %rax
  6588. movq 8(%r10), %rcx
  6589. movq %rax, (%r10)
  6590. sbbq 8(%rdi), %rcx
  6591. movq 16(%r10), %r8
  6592. movq %rcx, 8(%r10)
  6593. sbbq 16(%rdi), %r8
  6594. movq 24(%r10), %rax
  6595. movq %r8, 16(%r10)
  6596. sbbq 24(%rdi), %rax
  6597. movq 32(%r10), %rcx
  6598. movq %rax, 24(%r10)
  6599. sbbq 32(%rdi), %rcx
  6600. movq 40(%r10), %r8
  6601. movq %rcx, 32(%r10)
  6602. sbbq 40(%rdi), %r8
  6603. movq 48(%r10), %rax
  6604. movq %r8, 40(%r10)
  6605. sbbq 48(%rdi), %rax
  6606. movq 56(%r10), %rcx
  6607. movq %rax, 48(%r10)
  6608. sbbq 56(%rdi), %rcx
  6609. movq 64(%r10), %r8
  6610. movq %rcx, 56(%r10)
  6611. sbbq 64(%rdi), %r8
  6612. movq 72(%r10), %rax
  6613. movq %r8, 64(%r10)
  6614. sbbq 72(%rdi), %rax
  6615. movq 80(%r10), %rcx
  6616. movq %rax, 72(%r10)
  6617. sbbq 80(%rdi), %rcx
  6618. movq 88(%r10), %r8
  6619. movq %rcx, 80(%r10)
  6620. sbbq 88(%rdi), %r8
  6621. movq 96(%r10), %rax
  6622. movq %r8, 88(%r10)
  6623. sbbq 96(%rdi), %rax
  6624. movq 104(%r10), %rcx
  6625. movq %rax, 96(%r10)
  6626. sbbq 104(%rdi), %rcx
  6627. movq 112(%r10), %r8
  6628. movq %rcx, 104(%r10)
  6629. sbbq 112(%rdi), %r8
  6630. movq 120(%r10), %rax
  6631. movq %r8, 112(%r10)
  6632. sbbq 120(%rdi), %rax
  6633. movq 128(%r10), %rcx
  6634. movq %rax, 120(%r10)
  6635. sbbq 128(%rdi), %rcx
  6636. movq 136(%r10), %r8
  6637. movq %rcx, 128(%r10)
  6638. sbbq 136(%rdi), %r8
  6639. movq 144(%r10), %rax
  6640. movq %r8, 136(%r10)
  6641. sbbq 144(%rdi), %rax
  6642. movq 152(%r10), %rcx
  6643. movq %rax, 144(%r10)
  6644. sbbq 152(%rdi), %rcx
  6645. movq 160(%r10), %r8
  6646. movq %rcx, 152(%r10)
  6647. sbbq 160(%rdi), %r8
  6648. movq 168(%r10), %rax
  6649. movq %r8, 160(%r10)
  6650. sbbq 168(%rdi), %rax
  6651. movq 176(%r10), %rcx
  6652. movq %rax, 168(%r10)
  6653. sbbq 176(%rdi), %rcx
  6654. movq 184(%r10), %r8
  6655. movq %rcx, 176(%r10)
  6656. sbbq 184(%rdi), %r8
  6657. movq 192(%r10), %rax
  6658. movq %r8, 184(%r10)
  6659. sbbq 192(%rdi), %rax
  6660. movq 200(%r10), %rcx
  6661. movq %rax, 192(%r10)
  6662. sbbq 200(%rdi), %rcx
  6663. movq 208(%r10), %r8
  6664. movq %rcx, 200(%r10)
  6665. sbbq 208(%rdi), %r8
  6666. movq 216(%r10), %rax
  6667. movq %r8, 208(%r10)
  6668. sbbq 216(%rdi), %rax
  6669. movq 224(%r10), %rcx
  6670. movq %rax, 216(%r10)
  6671. sbbq 224(%rdi), %rcx
  6672. movq 232(%r10), %r8
  6673. movq %rcx, 224(%r10)
  6674. sbbq 232(%rdi), %r8
  6675. movq 240(%r10), %rax
  6676. movq %r8, 232(%r10)
  6677. sbbq 240(%rdi), %rax
  6678. movq 248(%r10), %rcx
  6679. movq %rax, 240(%r10)
  6680. sbbq 248(%rdi), %rcx
  6681. movq %rcx, 248(%r10)
  6682. sbbq $0x00, %r9
  6683. subq $0x80, %r15
  6684. # Add
  6685. movq (%r15), %rax
  6686. addq (%r10), %rax
  6687. movq 8(%r15), %rcx
  6688. movq %rax, (%r15)
  6689. adcq 8(%r10), %rcx
  6690. movq 16(%r15), %r8
  6691. movq %rcx, 8(%r15)
  6692. adcq 16(%r10), %r8
  6693. movq 24(%r15), %rax
  6694. movq %r8, 16(%r15)
  6695. adcq 24(%r10), %rax
  6696. movq 32(%r15), %rcx
  6697. movq %rax, 24(%r15)
  6698. adcq 32(%r10), %rcx
  6699. movq 40(%r15), %r8
  6700. movq %rcx, 32(%r15)
  6701. adcq 40(%r10), %r8
  6702. movq 48(%r15), %rax
  6703. movq %r8, 40(%r15)
  6704. adcq 48(%r10), %rax
  6705. movq 56(%r15), %rcx
  6706. movq %rax, 48(%r15)
  6707. adcq 56(%r10), %rcx
  6708. movq 64(%r15), %r8
  6709. movq %rcx, 56(%r15)
  6710. adcq 64(%r10), %r8
  6711. movq 72(%r15), %rax
  6712. movq %r8, 64(%r15)
  6713. adcq 72(%r10), %rax
  6714. movq 80(%r15), %rcx
  6715. movq %rax, 72(%r15)
  6716. adcq 80(%r10), %rcx
  6717. movq 88(%r15), %r8
  6718. movq %rcx, 80(%r15)
  6719. adcq 88(%r10), %r8
  6720. movq 96(%r15), %rax
  6721. movq %r8, 88(%r15)
  6722. adcq 96(%r10), %rax
  6723. movq 104(%r15), %rcx
  6724. movq %rax, 96(%r15)
  6725. adcq 104(%r10), %rcx
  6726. movq 112(%r15), %r8
  6727. movq %rcx, 104(%r15)
  6728. adcq 112(%r10), %r8
  6729. movq 120(%r15), %rax
  6730. movq %r8, 112(%r15)
  6731. adcq 120(%r10), %rax
  6732. movq 128(%r15), %rcx
  6733. movq %rax, 120(%r15)
  6734. adcq 128(%r10), %rcx
  6735. movq 136(%r15), %r8
  6736. movq %rcx, 128(%r15)
  6737. adcq 136(%r10), %r8
  6738. movq 144(%r15), %rax
  6739. movq %r8, 136(%r15)
  6740. adcq 144(%r10), %rax
  6741. movq 152(%r15), %rcx
  6742. movq %rax, 144(%r15)
  6743. adcq 152(%r10), %rcx
  6744. movq 160(%r15), %r8
  6745. movq %rcx, 152(%r15)
  6746. adcq 160(%r10), %r8
  6747. movq 168(%r15), %rax
  6748. movq %r8, 160(%r15)
  6749. adcq 168(%r10), %rax
  6750. movq 176(%r15), %rcx
  6751. movq %rax, 168(%r15)
  6752. adcq 176(%r10), %rcx
  6753. movq 184(%r15), %r8
  6754. movq %rcx, 176(%r15)
  6755. adcq 184(%r10), %r8
  6756. movq 192(%r15), %rax
  6757. movq %r8, 184(%r15)
  6758. adcq 192(%r10), %rax
  6759. movq 200(%r15), %rcx
  6760. movq %rax, 192(%r15)
  6761. adcq 200(%r10), %rcx
  6762. movq 208(%r15), %r8
  6763. movq %rcx, 200(%r15)
  6764. adcq 208(%r10), %r8
  6765. movq 216(%r15), %rax
  6766. movq %r8, 208(%r15)
  6767. adcq 216(%r10), %rax
  6768. movq 224(%r15), %rcx
  6769. movq %rax, 216(%r15)
  6770. adcq 224(%r10), %rcx
  6771. movq 232(%r15), %r8
  6772. movq %rcx, 224(%r15)
  6773. adcq 232(%r10), %r8
  6774. movq 240(%r15), %rax
  6775. movq %r8, 232(%r15)
  6776. adcq 240(%r10), %rax
  6777. movq 248(%r15), %rcx
  6778. movq %rax, 240(%r15)
  6779. adcq 248(%r10), %rcx
  6780. movq %rcx, 248(%r15)
  6781. adcq $0x00, %r9
  6782. movq %r9, 384(%rdi)
  6783. addq $0x80, %r15
  6784. # Add
  6785. movq (%r15), %rax
  6786. xorq %r9, %r9
  6787. addq (%r11), %rax
  6788. movq 8(%r15), %rcx
  6789. movq %rax, (%r15)
  6790. adcq 8(%r11), %rcx
  6791. movq 16(%r15), %r8
  6792. movq %rcx, 8(%r15)
  6793. adcq 16(%r11), %r8
  6794. movq 24(%r15), %rax
  6795. movq %r8, 16(%r15)
  6796. adcq 24(%r11), %rax
  6797. movq 32(%r15), %rcx
  6798. movq %rax, 24(%r15)
  6799. adcq 32(%r11), %rcx
  6800. movq 40(%r15), %r8
  6801. movq %rcx, 32(%r15)
  6802. adcq 40(%r11), %r8
  6803. movq 48(%r15), %rax
  6804. movq %r8, 40(%r15)
  6805. adcq 48(%r11), %rax
  6806. movq 56(%r15), %rcx
  6807. movq %rax, 48(%r15)
  6808. adcq 56(%r11), %rcx
  6809. movq 64(%r15), %r8
  6810. movq %rcx, 56(%r15)
  6811. adcq 64(%r11), %r8
  6812. movq 72(%r15), %rax
  6813. movq %r8, 64(%r15)
  6814. adcq 72(%r11), %rax
  6815. movq 80(%r15), %rcx
  6816. movq %rax, 72(%r15)
  6817. adcq 80(%r11), %rcx
  6818. movq 88(%r15), %r8
  6819. movq %rcx, 80(%r15)
  6820. adcq 88(%r11), %r8
  6821. movq 96(%r15), %rax
  6822. movq %r8, 88(%r15)
  6823. adcq 96(%r11), %rax
  6824. movq 104(%r15), %rcx
  6825. movq %rax, 96(%r15)
  6826. adcq 104(%r11), %rcx
  6827. movq 112(%r15), %r8
  6828. movq %rcx, 104(%r15)
  6829. adcq 112(%r11), %r8
  6830. movq 120(%r15), %rax
  6831. movq %r8, 112(%r15)
  6832. adcq 120(%r11), %rax
  6833. movq 128(%r15), %rcx
  6834. movq %rax, 120(%r15)
  6835. adcq 128(%r11), %rcx
  6836. movq %rcx, 128(%r15)
  6837. adcq $0x00, %r9
  6838. # Add to zero
  6839. movq 136(%r11), %rax
  6840. adcq $0x00, %rax
  6841. movq 144(%r11), %rcx
  6842. movq %rax, 136(%r15)
  6843. adcq $0x00, %rcx
  6844. movq 152(%r11), %r8
  6845. movq %rcx, 144(%r15)
  6846. adcq $0x00, %r8
  6847. movq 160(%r11), %rax
  6848. movq %r8, 152(%r15)
  6849. adcq $0x00, %rax
  6850. movq 168(%r11), %rcx
  6851. movq %rax, 160(%r15)
  6852. adcq $0x00, %rcx
  6853. movq 176(%r11), %r8
  6854. movq %rcx, 168(%r15)
  6855. adcq $0x00, %r8
  6856. movq 184(%r11), %rax
  6857. movq %r8, 176(%r15)
  6858. adcq $0x00, %rax
  6859. movq 192(%r11), %rcx
  6860. movq %rax, 184(%r15)
  6861. adcq $0x00, %rcx
  6862. movq 200(%r11), %r8
  6863. movq %rcx, 192(%r15)
  6864. adcq $0x00, %r8
  6865. movq 208(%r11), %rax
  6866. movq %r8, 200(%r15)
  6867. adcq $0x00, %rax
  6868. movq 216(%r11), %rcx
  6869. movq %rax, 208(%r15)
  6870. adcq $0x00, %rcx
  6871. movq 224(%r11), %r8
  6872. movq %rcx, 216(%r15)
  6873. adcq $0x00, %r8
  6874. movq 232(%r11), %rax
  6875. movq %r8, 224(%r15)
  6876. adcq $0x00, %rax
  6877. movq 240(%r11), %rcx
  6878. movq %rax, 232(%r15)
  6879. adcq $0x00, %rcx
  6880. movq 248(%r11), %r8
  6881. movq %rcx, 240(%r15)
  6882. adcq $0x00, %r8
  6883. movq %r8, 248(%r15)
  6884. addq $0x328, %rsp
  6885. popq %r15
  6886. popq %r14
  6887. popq %r13
  6888. popq %r12
  6889. repz retq
  6890. #ifndef __APPLE__
  6891. .size sp_2048_mul_32,.-sp_2048_mul_32
  6892. #endif /* __APPLE__ */
  6893. /* Add a to a into r. (r = a + a)
  6894. *
  6895. * r A single precision integer.
  6896. * a A single precision integer.
  6897. */
  6898. #ifndef __APPLE__
  6899. .text
  6900. .globl sp_2048_dbl_16
  6901. .type sp_2048_dbl_16,@function
  6902. .align 16
  6903. sp_2048_dbl_16:
  6904. #else
  6905. .section __TEXT,__text
  6906. .globl _sp_2048_dbl_16
  6907. .p2align 4
  6908. _sp_2048_dbl_16:
  6909. #endif /* __APPLE__ */
  6910. movq (%rsi), %rdx
  6911. xorq %rax, %rax
  6912. addq %rdx, %rdx
  6913. movq 8(%rsi), %rcx
  6914. movq %rdx, (%rdi)
  6915. adcq %rcx, %rcx
  6916. movq 16(%rsi), %rdx
  6917. movq %rcx, 8(%rdi)
  6918. adcq %rdx, %rdx
  6919. movq 24(%rsi), %rcx
  6920. movq %rdx, 16(%rdi)
  6921. adcq %rcx, %rcx
  6922. movq 32(%rsi), %rdx
  6923. movq %rcx, 24(%rdi)
  6924. adcq %rdx, %rdx
  6925. movq 40(%rsi), %rcx
  6926. movq %rdx, 32(%rdi)
  6927. adcq %rcx, %rcx
  6928. movq 48(%rsi), %rdx
  6929. movq %rcx, 40(%rdi)
  6930. adcq %rdx, %rdx
  6931. movq 56(%rsi), %rcx
  6932. movq %rdx, 48(%rdi)
  6933. adcq %rcx, %rcx
  6934. movq 64(%rsi), %rdx
  6935. movq %rcx, 56(%rdi)
  6936. adcq %rdx, %rdx
  6937. movq 72(%rsi), %rcx
  6938. movq %rdx, 64(%rdi)
  6939. adcq %rcx, %rcx
  6940. movq 80(%rsi), %rdx
  6941. movq %rcx, 72(%rdi)
  6942. adcq %rdx, %rdx
  6943. movq 88(%rsi), %rcx
  6944. movq %rdx, 80(%rdi)
  6945. adcq %rcx, %rcx
  6946. movq 96(%rsi), %rdx
  6947. movq %rcx, 88(%rdi)
  6948. adcq %rdx, %rdx
  6949. movq 104(%rsi), %rcx
  6950. movq %rdx, 96(%rdi)
  6951. adcq %rcx, %rcx
  6952. movq 112(%rsi), %rdx
  6953. movq %rcx, 104(%rdi)
  6954. adcq %rdx, %rdx
  6955. movq 120(%rsi), %rcx
  6956. movq %rdx, 112(%rdi)
  6957. adcq %rcx, %rcx
  6958. movq %rcx, 120(%rdi)
  6959. adcq $0x00, %rax
  6960. repz retq
  6961. #ifndef __APPLE__
  6962. .size sp_2048_dbl_16,.-sp_2048_dbl_16
  6963. #endif /* __APPLE__ */
  6964. /* Square a and put result in r. (r = a * a)
  6965. *
  6966. * r A single precision integer.
  6967. * a A single precision integer.
  6968. */
  6969. #ifndef __APPLE__
  6970. .text
  6971. .globl sp_2048_sqr_32
  6972. .type sp_2048_sqr_32,@function
  6973. .align 16
  6974. sp_2048_sqr_32:
  6975. #else
  6976. .section __TEXT,__text
  6977. .globl _sp_2048_sqr_32
  6978. .p2align 4
  6979. _sp_2048_sqr_32:
  6980. #endif /* __APPLE__ */
  6981. subq $0x298, %rsp
  6982. movq %rdi, 640(%rsp)
  6983. movq %rsi, 648(%rsp)
  6984. leaq 512(%rsp), %r8
  6985. leaq 128(%rsi), %r9
  6986. # Add
  6987. movq (%rsi), %rdx
  6988. xorq %rcx, %rcx
  6989. addq (%r9), %rdx
  6990. movq 8(%rsi), %rax
  6991. movq %rdx, (%r8)
  6992. adcq 8(%r9), %rax
  6993. movq 16(%rsi), %rdx
  6994. movq %rax, 8(%r8)
  6995. adcq 16(%r9), %rdx
  6996. movq 24(%rsi), %rax
  6997. movq %rdx, 16(%r8)
  6998. adcq 24(%r9), %rax
  6999. movq 32(%rsi), %rdx
  7000. movq %rax, 24(%r8)
  7001. adcq 32(%r9), %rdx
  7002. movq 40(%rsi), %rax
  7003. movq %rdx, 32(%r8)
  7004. adcq 40(%r9), %rax
  7005. movq 48(%rsi), %rdx
  7006. movq %rax, 40(%r8)
  7007. adcq 48(%r9), %rdx
  7008. movq 56(%rsi), %rax
  7009. movq %rdx, 48(%r8)
  7010. adcq 56(%r9), %rax
  7011. movq 64(%rsi), %rdx
  7012. movq %rax, 56(%r8)
  7013. adcq 64(%r9), %rdx
  7014. movq 72(%rsi), %rax
  7015. movq %rdx, 64(%r8)
  7016. adcq 72(%r9), %rax
  7017. movq 80(%rsi), %rdx
  7018. movq %rax, 72(%r8)
  7019. adcq 80(%r9), %rdx
  7020. movq 88(%rsi), %rax
  7021. movq %rdx, 80(%r8)
  7022. adcq 88(%r9), %rax
  7023. movq 96(%rsi), %rdx
  7024. movq %rax, 88(%r8)
  7025. adcq 96(%r9), %rdx
  7026. movq 104(%rsi), %rax
  7027. movq %rdx, 96(%r8)
  7028. adcq 104(%r9), %rax
  7029. movq 112(%rsi), %rdx
  7030. movq %rax, 104(%r8)
  7031. adcq 112(%r9), %rdx
  7032. movq 120(%rsi), %rax
  7033. movq %rdx, 112(%r8)
  7034. adcq 120(%r9), %rax
  7035. movq %rax, 120(%r8)
  7036. adcq $0x00, %rcx
  7037. movq %rcx, 656(%rsp)
  7038. movq %r8, %rsi
  7039. movq %rsp, %rdi
  7040. #ifndef __APPLE__
  7041. callq sp_2048_sqr_16@plt
  7042. #else
  7043. callq _sp_2048_sqr_16
  7044. #endif /* __APPLE__ */
  7045. movq 648(%rsp), %rsi
  7046. leaq 256(%rsp), %rdi
  7047. addq $0x80, %rsi
  7048. #ifndef __APPLE__
  7049. callq sp_2048_sqr_16@plt
  7050. #else
  7051. callq _sp_2048_sqr_16
  7052. #endif /* __APPLE__ */
  7053. movq 648(%rsp), %rsi
  7054. movq 640(%rsp), %rdi
  7055. #ifndef __APPLE__
  7056. callq sp_2048_sqr_16@plt
  7057. #else
  7058. callq _sp_2048_sqr_16
  7059. #endif /* __APPLE__ */
  7060. movq 656(%rsp), %r10
  7061. leaq 512(%rsp), %r8
  7062. movq %r10, %rcx
  7063. negq %r10
  7064. movq (%r8), %rdx
  7065. movq 8(%r8), %rax
  7066. andq %r10, %rdx
  7067. andq %r10, %rax
  7068. movq %rdx, 256(%rdi)
  7069. movq %rax, 264(%rdi)
  7070. movq 16(%r8), %rdx
  7071. movq 24(%r8), %rax
  7072. andq %r10, %rdx
  7073. andq %r10, %rax
  7074. movq %rdx, 272(%rdi)
  7075. movq %rax, 280(%rdi)
  7076. movq 32(%r8), %rdx
  7077. movq 40(%r8), %rax
  7078. andq %r10, %rdx
  7079. andq %r10, %rax
  7080. movq %rdx, 288(%rdi)
  7081. movq %rax, 296(%rdi)
  7082. movq 48(%r8), %rdx
  7083. movq 56(%r8), %rax
  7084. andq %r10, %rdx
  7085. andq %r10, %rax
  7086. movq %rdx, 304(%rdi)
  7087. movq %rax, 312(%rdi)
  7088. movq 64(%r8), %rdx
  7089. movq 72(%r8), %rax
  7090. andq %r10, %rdx
  7091. andq %r10, %rax
  7092. movq %rdx, 320(%rdi)
  7093. movq %rax, 328(%rdi)
  7094. movq 80(%r8), %rdx
  7095. movq 88(%r8), %rax
  7096. andq %r10, %rdx
  7097. andq %r10, %rax
  7098. movq %rdx, 336(%rdi)
  7099. movq %rax, 344(%rdi)
  7100. movq 96(%r8), %rdx
  7101. movq 104(%r8), %rax
  7102. andq %r10, %rdx
  7103. andq %r10, %rax
  7104. movq %rdx, 352(%rdi)
  7105. movq %rax, 360(%rdi)
  7106. movq 112(%r8), %rdx
  7107. movq 120(%r8), %rax
  7108. andq %r10, %rdx
  7109. andq %r10, %rax
  7110. movq %rdx, 368(%rdi)
  7111. movq %rax, 376(%rdi)
  7112. movq 256(%rdi), %rdx
  7113. addq %rdx, %rdx
  7114. movq 264(%rdi), %rax
  7115. movq %rdx, 256(%rdi)
  7116. adcq %rax, %rax
  7117. movq 272(%rdi), %rdx
  7118. movq %rax, 264(%rdi)
  7119. adcq %rdx, %rdx
  7120. movq 280(%rdi), %rax
  7121. movq %rdx, 272(%rdi)
  7122. adcq %rax, %rax
  7123. movq 288(%rdi), %rdx
  7124. movq %rax, 280(%rdi)
  7125. adcq %rdx, %rdx
  7126. movq 296(%rdi), %rax
  7127. movq %rdx, 288(%rdi)
  7128. adcq %rax, %rax
  7129. movq 304(%rdi), %rdx
  7130. movq %rax, 296(%rdi)
  7131. adcq %rdx, %rdx
  7132. movq 312(%rdi), %rax
  7133. movq %rdx, 304(%rdi)
  7134. adcq %rax, %rax
  7135. movq 320(%rdi), %rdx
  7136. movq %rax, 312(%rdi)
  7137. adcq %rdx, %rdx
  7138. movq 328(%rdi), %rax
  7139. movq %rdx, 320(%rdi)
  7140. adcq %rax, %rax
  7141. movq 336(%rdi), %rdx
  7142. movq %rax, 328(%rdi)
  7143. adcq %rdx, %rdx
  7144. movq 344(%rdi), %rax
  7145. movq %rdx, 336(%rdi)
  7146. adcq %rax, %rax
  7147. movq 352(%rdi), %rdx
  7148. movq %rax, 344(%rdi)
  7149. adcq %rdx, %rdx
  7150. movq 360(%rdi), %rax
  7151. movq %rdx, 352(%rdi)
  7152. adcq %rax, %rax
  7153. movq 368(%rdi), %rdx
  7154. movq %rax, 360(%rdi)
  7155. adcq %rdx, %rdx
  7156. movq 376(%rdi), %rax
  7157. movq %rdx, 368(%rdi)
  7158. adcq %rax, %rax
  7159. movq %rax, 376(%rdi)
  7160. adcq $0x00, %rcx
  7161. leaq 256(%rsp), %rsi
  7162. movq %rsp, %r8
  7163. movq (%r8), %rdx
  7164. subq (%rsi), %rdx
  7165. movq 8(%r8), %rax
  7166. movq %rdx, (%r8)
  7167. sbbq 8(%rsi), %rax
  7168. movq 16(%r8), %rdx
  7169. movq %rax, 8(%r8)
  7170. sbbq 16(%rsi), %rdx
  7171. movq 24(%r8), %rax
  7172. movq %rdx, 16(%r8)
  7173. sbbq 24(%rsi), %rax
  7174. movq 32(%r8), %rdx
  7175. movq %rax, 24(%r8)
  7176. sbbq 32(%rsi), %rdx
  7177. movq 40(%r8), %rax
  7178. movq %rdx, 32(%r8)
  7179. sbbq 40(%rsi), %rax
  7180. movq 48(%r8), %rdx
  7181. movq %rax, 40(%r8)
  7182. sbbq 48(%rsi), %rdx
  7183. movq 56(%r8), %rax
  7184. movq %rdx, 48(%r8)
  7185. sbbq 56(%rsi), %rax
  7186. movq 64(%r8), %rdx
  7187. movq %rax, 56(%r8)
  7188. sbbq 64(%rsi), %rdx
  7189. movq 72(%r8), %rax
  7190. movq %rdx, 64(%r8)
  7191. sbbq 72(%rsi), %rax
  7192. movq 80(%r8), %rdx
  7193. movq %rax, 72(%r8)
  7194. sbbq 80(%rsi), %rdx
  7195. movq 88(%r8), %rax
  7196. movq %rdx, 80(%r8)
  7197. sbbq 88(%rsi), %rax
  7198. movq 96(%r8), %rdx
  7199. movq %rax, 88(%r8)
  7200. sbbq 96(%rsi), %rdx
  7201. movq 104(%r8), %rax
  7202. movq %rdx, 96(%r8)
  7203. sbbq 104(%rsi), %rax
  7204. movq 112(%r8), %rdx
  7205. movq %rax, 104(%r8)
  7206. sbbq 112(%rsi), %rdx
  7207. movq 120(%r8), %rax
  7208. movq %rdx, 112(%r8)
  7209. sbbq 120(%rsi), %rax
  7210. movq 128(%r8), %rdx
  7211. movq %rax, 120(%r8)
  7212. sbbq 128(%rsi), %rdx
  7213. movq 136(%r8), %rax
  7214. movq %rdx, 128(%r8)
  7215. sbbq 136(%rsi), %rax
  7216. movq 144(%r8), %rdx
  7217. movq %rax, 136(%r8)
  7218. sbbq 144(%rsi), %rdx
  7219. movq 152(%r8), %rax
  7220. movq %rdx, 144(%r8)
  7221. sbbq 152(%rsi), %rax
  7222. movq 160(%r8), %rdx
  7223. movq %rax, 152(%r8)
  7224. sbbq 160(%rsi), %rdx
  7225. movq 168(%r8), %rax
  7226. movq %rdx, 160(%r8)
  7227. sbbq 168(%rsi), %rax
  7228. movq 176(%r8), %rdx
  7229. movq %rax, 168(%r8)
  7230. sbbq 176(%rsi), %rdx
  7231. movq 184(%r8), %rax
  7232. movq %rdx, 176(%r8)
  7233. sbbq 184(%rsi), %rax
  7234. movq 192(%r8), %rdx
  7235. movq %rax, 184(%r8)
  7236. sbbq 192(%rsi), %rdx
  7237. movq 200(%r8), %rax
  7238. movq %rdx, 192(%r8)
  7239. sbbq 200(%rsi), %rax
  7240. movq 208(%r8), %rdx
  7241. movq %rax, 200(%r8)
  7242. sbbq 208(%rsi), %rdx
  7243. movq 216(%r8), %rax
  7244. movq %rdx, 208(%r8)
  7245. sbbq 216(%rsi), %rax
  7246. movq 224(%r8), %rdx
  7247. movq %rax, 216(%r8)
  7248. sbbq 224(%rsi), %rdx
  7249. movq 232(%r8), %rax
  7250. movq %rdx, 224(%r8)
  7251. sbbq 232(%rsi), %rax
  7252. movq 240(%r8), %rdx
  7253. movq %rax, 232(%r8)
  7254. sbbq 240(%rsi), %rdx
  7255. movq 248(%r8), %rax
  7256. movq %rdx, 240(%r8)
  7257. sbbq 248(%rsi), %rax
  7258. movq %rax, 248(%r8)
  7259. sbbq $0x00, %rcx
  7260. movq (%r8), %rdx
  7261. subq (%rdi), %rdx
  7262. movq 8(%r8), %rax
  7263. movq %rdx, (%r8)
  7264. sbbq 8(%rdi), %rax
  7265. movq 16(%r8), %rdx
  7266. movq %rax, 8(%r8)
  7267. sbbq 16(%rdi), %rdx
  7268. movq 24(%r8), %rax
  7269. movq %rdx, 16(%r8)
  7270. sbbq 24(%rdi), %rax
  7271. movq 32(%r8), %rdx
  7272. movq %rax, 24(%r8)
  7273. sbbq 32(%rdi), %rdx
  7274. movq 40(%r8), %rax
  7275. movq %rdx, 32(%r8)
  7276. sbbq 40(%rdi), %rax
  7277. movq 48(%r8), %rdx
  7278. movq %rax, 40(%r8)
  7279. sbbq 48(%rdi), %rdx
  7280. movq 56(%r8), %rax
  7281. movq %rdx, 48(%r8)
  7282. sbbq 56(%rdi), %rax
  7283. movq 64(%r8), %rdx
  7284. movq %rax, 56(%r8)
  7285. sbbq 64(%rdi), %rdx
  7286. movq 72(%r8), %rax
  7287. movq %rdx, 64(%r8)
  7288. sbbq 72(%rdi), %rax
  7289. movq 80(%r8), %rdx
  7290. movq %rax, 72(%r8)
  7291. sbbq 80(%rdi), %rdx
  7292. movq 88(%r8), %rax
  7293. movq %rdx, 80(%r8)
  7294. sbbq 88(%rdi), %rax
  7295. movq 96(%r8), %rdx
  7296. movq %rax, 88(%r8)
  7297. sbbq 96(%rdi), %rdx
  7298. movq 104(%r8), %rax
  7299. movq %rdx, 96(%r8)
  7300. sbbq 104(%rdi), %rax
  7301. movq 112(%r8), %rdx
  7302. movq %rax, 104(%r8)
  7303. sbbq 112(%rdi), %rdx
  7304. movq 120(%r8), %rax
  7305. movq %rdx, 112(%r8)
  7306. sbbq 120(%rdi), %rax
  7307. movq 128(%r8), %rdx
  7308. movq %rax, 120(%r8)
  7309. sbbq 128(%rdi), %rdx
  7310. movq 136(%r8), %rax
  7311. movq %rdx, 128(%r8)
  7312. sbbq 136(%rdi), %rax
  7313. movq 144(%r8), %rdx
  7314. movq %rax, 136(%r8)
  7315. sbbq 144(%rdi), %rdx
  7316. movq 152(%r8), %rax
  7317. movq %rdx, 144(%r8)
  7318. sbbq 152(%rdi), %rax
  7319. movq 160(%r8), %rdx
  7320. movq %rax, 152(%r8)
  7321. sbbq 160(%rdi), %rdx
  7322. movq 168(%r8), %rax
  7323. movq %rdx, 160(%r8)
  7324. sbbq 168(%rdi), %rax
  7325. movq 176(%r8), %rdx
  7326. movq %rax, 168(%r8)
  7327. sbbq 176(%rdi), %rdx
  7328. movq 184(%r8), %rax
  7329. movq %rdx, 176(%r8)
  7330. sbbq 184(%rdi), %rax
  7331. movq 192(%r8), %rdx
  7332. movq %rax, 184(%r8)
  7333. sbbq 192(%rdi), %rdx
  7334. movq 200(%r8), %rax
  7335. movq %rdx, 192(%r8)
  7336. sbbq 200(%rdi), %rax
  7337. movq 208(%r8), %rdx
  7338. movq %rax, 200(%r8)
  7339. sbbq 208(%rdi), %rdx
  7340. movq 216(%r8), %rax
  7341. movq %rdx, 208(%r8)
  7342. sbbq 216(%rdi), %rax
  7343. movq 224(%r8), %rdx
  7344. movq %rax, 216(%r8)
  7345. sbbq 224(%rdi), %rdx
  7346. movq 232(%r8), %rax
  7347. movq %rdx, 224(%r8)
  7348. sbbq 232(%rdi), %rax
  7349. movq 240(%r8), %rdx
  7350. movq %rax, 232(%r8)
  7351. sbbq 240(%rdi), %rdx
  7352. movq 248(%r8), %rax
  7353. movq %rdx, 240(%r8)
  7354. sbbq 248(%rdi), %rax
  7355. movq %rax, 248(%r8)
  7356. sbbq $0x00, %rcx
  7357. # Add in place
  7358. movq 128(%rdi), %rdx
  7359. addq (%r8), %rdx
  7360. movq 136(%rdi), %rax
  7361. movq %rdx, 128(%rdi)
  7362. adcq 8(%r8), %rax
  7363. movq 144(%rdi), %rdx
  7364. movq %rax, 136(%rdi)
  7365. adcq 16(%r8), %rdx
  7366. movq 152(%rdi), %rax
  7367. movq %rdx, 144(%rdi)
  7368. adcq 24(%r8), %rax
  7369. movq 160(%rdi), %rdx
  7370. movq %rax, 152(%rdi)
  7371. adcq 32(%r8), %rdx
  7372. movq 168(%rdi), %rax
  7373. movq %rdx, 160(%rdi)
  7374. adcq 40(%r8), %rax
  7375. movq 176(%rdi), %rdx
  7376. movq %rax, 168(%rdi)
  7377. adcq 48(%r8), %rdx
  7378. movq 184(%rdi), %rax
  7379. movq %rdx, 176(%rdi)
  7380. adcq 56(%r8), %rax
  7381. movq 192(%rdi), %rdx
  7382. movq %rax, 184(%rdi)
  7383. adcq 64(%r8), %rdx
  7384. movq 200(%rdi), %rax
  7385. movq %rdx, 192(%rdi)
  7386. adcq 72(%r8), %rax
  7387. movq 208(%rdi), %rdx
  7388. movq %rax, 200(%rdi)
  7389. adcq 80(%r8), %rdx
  7390. movq 216(%rdi), %rax
  7391. movq %rdx, 208(%rdi)
  7392. adcq 88(%r8), %rax
  7393. movq 224(%rdi), %rdx
  7394. movq %rax, 216(%rdi)
  7395. adcq 96(%r8), %rdx
  7396. movq 232(%rdi), %rax
  7397. movq %rdx, 224(%rdi)
  7398. adcq 104(%r8), %rax
  7399. movq 240(%rdi), %rdx
  7400. movq %rax, 232(%rdi)
  7401. adcq 112(%r8), %rdx
  7402. movq 248(%rdi), %rax
  7403. movq %rdx, 240(%rdi)
  7404. adcq 120(%r8), %rax
  7405. movq 256(%rdi), %rdx
  7406. movq %rax, 248(%rdi)
  7407. adcq 128(%r8), %rdx
  7408. movq 264(%rdi), %rax
  7409. movq %rdx, 256(%rdi)
  7410. adcq 136(%r8), %rax
  7411. movq 272(%rdi), %rdx
  7412. movq %rax, 264(%rdi)
  7413. adcq 144(%r8), %rdx
  7414. movq 280(%rdi), %rax
  7415. movq %rdx, 272(%rdi)
  7416. adcq 152(%r8), %rax
  7417. movq 288(%rdi), %rdx
  7418. movq %rax, 280(%rdi)
  7419. adcq 160(%r8), %rdx
  7420. movq 296(%rdi), %rax
  7421. movq %rdx, 288(%rdi)
  7422. adcq 168(%r8), %rax
  7423. movq 304(%rdi), %rdx
  7424. movq %rax, 296(%rdi)
  7425. adcq 176(%r8), %rdx
  7426. movq 312(%rdi), %rax
  7427. movq %rdx, 304(%rdi)
  7428. adcq 184(%r8), %rax
  7429. movq 320(%rdi), %rdx
  7430. movq %rax, 312(%rdi)
  7431. adcq 192(%r8), %rdx
  7432. movq 328(%rdi), %rax
  7433. movq %rdx, 320(%rdi)
  7434. adcq 200(%r8), %rax
  7435. movq 336(%rdi), %rdx
  7436. movq %rax, 328(%rdi)
  7437. adcq 208(%r8), %rdx
  7438. movq 344(%rdi), %rax
  7439. movq %rdx, 336(%rdi)
  7440. adcq 216(%r8), %rax
  7441. movq 352(%rdi), %rdx
  7442. movq %rax, 344(%rdi)
  7443. adcq 224(%r8), %rdx
  7444. movq 360(%rdi), %rax
  7445. movq %rdx, 352(%rdi)
  7446. adcq 232(%r8), %rax
  7447. movq 368(%rdi), %rdx
  7448. movq %rax, 360(%rdi)
  7449. adcq 240(%r8), %rdx
  7450. movq 376(%rdi), %rax
  7451. movq %rdx, 368(%rdi)
  7452. adcq 248(%r8), %rax
  7453. movq %rax, 376(%rdi)
  7454. adcq $0x00, %rcx
  7455. movq %rcx, 384(%rdi)
  7456. # Add in place
  7457. movq 256(%rdi), %rdx
  7458. xorq %rcx, %rcx
  7459. addq (%rsi), %rdx
  7460. movq 264(%rdi), %rax
  7461. movq %rdx, 256(%rdi)
  7462. adcq 8(%rsi), %rax
  7463. movq 272(%rdi), %rdx
  7464. movq %rax, 264(%rdi)
  7465. adcq 16(%rsi), %rdx
  7466. movq 280(%rdi), %rax
  7467. movq %rdx, 272(%rdi)
  7468. adcq 24(%rsi), %rax
  7469. movq 288(%rdi), %rdx
  7470. movq %rax, 280(%rdi)
  7471. adcq 32(%rsi), %rdx
  7472. movq 296(%rdi), %rax
  7473. movq %rdx, 288(%rdi)
  7474. adcq 40(%rsi), %rax
  7475. movq 304(%rdi), %rdx
  7476. movq %rax, 296(%rdi)
  7477. adcq 48(%rsi), %rdx
  7478. movq 312(%rdi), %rax
  7479. movq %rdx, 304(%rdi)
  7480. adcq 56(%rsi), %rax
  7481. movq 320(%rdi), %rdx
  7482. movq %rax, 312(%rdi)
  7483. adcq 64(%rsi), %rdx
  7484. movq 328(%rdi), %rax
  7485. movq %rdx, 320(%rdi)
  7486. adcq 72(%rsi), %rax
  7487. movq 336(%rdi), %rdx
  7488. movq %rax, 328(%rdi)
  7489. adcq 80(%rsi), %rdx
  7490. movq 344(%rdi), %rax
  7491. movq %rdx, 336(%rdi)
  7492. adcq 88(%rsi), %rax
  7493. movq 352(%rdi), %rdx
  7494. movq %rax, 344(%rdi)
  7495. adcq 96(%rsi), %rdx
  7496. movq 360(%rdi), %rax
  7497. movq %rdx, 352(%rdi)
  7498. adcq 104(%rsi), %rax
  7499. movq 368(%rdi), %rdx
  7500. movq %rax, 360(%rdi)
  7501. adcq 112(%rsi), %rdx
  7502. movq 376(%rdi), %rax
  7503. movq %rdx, 368(%rdi)
  7504. adcq 120(%rsi), %rax
  7505. movq 384(%rdi), %rdx
  7506. movq %rax, 376(%rdi)
  7507. adcq 128(%rsi), %rdx
  7508. movq %rdx, 384(%rdi)
  7509. adcq $0x00, %rcx
  7510. # Add to zero
  7511. movq 136(%rsi), %rdx
  7512. adcq $0x00, %rdx
  7513. movq 144(%rsi), %rax
  7514. movq %rdx, 392(%rdi)
  7515. adcq $0x00, %rax
  7516. movq 152(%rsi), %rdx
  7517. movq %rax, 400(%rdi)
  7518. adcq $0x00, %rdx
  7519. movq 160(%rsi), %rax
  7520. movq %rdx, 408(%rdi)
  7521. adcq $0x00, %rax
  7522. movq 168(%rsi), %rdx
  7523. movq %rax, 416(%rdi)
  7524. adcq $0x00, %rdx
  7525. movq 176(%rsi), %rax
  7526. movq %rdx, 424(%rdi)
  7527. adcq $0x00, %rax
  7528. movq 184(%rsi), %rdx
  7529. movq %rax, 432(%rdi)
  7530. adcq $0x00, %rdx
  7531. movq 192(%rsi), %rax
  7532. movq %rdx, 440(%rdi)
  7533. adcq $0x00, %rax
  7534. movq 200(%rsi), %rdx
  7535. movq %rax, 448(%rdi)
  7536. adcq $0x00, %rdx
  7537. movq 208(%rsi), %rax
  7538. movq %rdx, 456(%rdi)
  7539. adcq $0x00, %rax
  7540. movq 216(%rsi), %rdx
  7541. movq %rax, 464(%rdi)
  7542. adcq $0x00, %rdx
  7543. movq 224(%rsi), %rax
  7544. movq %rdx, 472(%rdi)
  7545. adcq $0x00, %rax
  7546. movq 232(%rsi), %rdx
  7547. movq %rax, 480(%rdi)
  7548. adcq $0x00, %rdx
  7549. movq 240(%rsi), %rax
  7550. movq %rdx, 488(%rdi)
  7551. adcq $0x00, %rax
  7552. movq 248(%rsi), %rdx
  7553. movq %rax, 496(%rdi)
  7554. adcq $0x00, %rdx
  7555. movq %rdx, 504(%rdi)
  7556. addq $0x298, %rsp
  7557. repz retq
  7558. #ifndef __APPLE__
  7559. .size sp_2048_sqr_32,.-sp_2048_sqr_32
  7560. #endif /* __APPLE__ */
  7561. /* Multiply a and b into r. (r = a * b)
  7562. *
  7563. * r A single precision integer.
  7564. * a A single precision integer.
  7565. * b A single precision integer.
  7566. */
  7567. #ifndef __APPLE__
  7568. .text
  7569. .globl sp_2048_mul_avx2_32
  7570. .type sp_2048_mul_avx2_32,@function
  7571. .align 16
  7572. sp_2048_mul_avx2_32:
  7573. #else
  7574. .section __TEXT,__text
  7575. .globl _sp_2048_mul_avx2_32
  7576. .p2align 4
  7577. _sp_2048_mul_avx2_32:
  7578. #endif /* __APPLE__ */
  7579. pushq %r12
  7580. pushq %r13
  7581. pushq %r14
  7582. pushq %r15
  7583. subq $0x328, %rsp
  7584. movq %rdi, 768(%rsp)
  7585. movq %rsi, 776(%rsp)
  7586. movq %rdx, 784(%rsp)
  7587. leaq 512(%rsp), %r10
  7588. leaq 128(%rsi), %r12
  7589. # Add
  7590. movq (%rsi), %rax
  7591. xorq %r13, %r13
  7592. addq (%r12), %rax
  7593. movq 8(%rsi), %rcx
  7594. movq %rax, (%r10)
  7595. adcq 8(%r12), %rcx
  7596. movq 16(%rsi), %r8
  7597. movq %rcx, 8(%r10)
  7598. adcq 16(%r12), %r8
  7599. movq 24(%rsi), %rax
  7600. movq %r8, 16(%r10)
  7601. adcq 24(%r12), %rax
  7602. movq 32(%rsi), %rcx
  7603. movq %rax, 24(%r10)
  7604. adcq 32(%r12), %rcx
  7605. movq 40(%rsi), %r8
  7606. movq %rcx, 32(%r10)
  7607. adcq 40(%r12), %r8
  7608. movq 48(%rsi), %rax
  7609. movq %r8, 40(%r10)
  7610. adcq 48(%r12), %rax
  7611. movq 56(%rsi), %rcx
  7612. movq %rax, 48(%r10)
  7613. adcq 56(%r12), %rcx
  7614. movq 64(%rsi), %r8
  7615. movq %rcx, 56(%r10)
  7616. adcq 64(%r12), %r8
  7617. movq 72(%rsi), %rax
  7618. movq %r8, 64(%r10)
  7619. adcq 72(%r12), %rax
  7620. movq 80(%rsi), %rcx
  7621. movq %rax, 72(%r10)
  7622. adcq 80(%r12), %rcx
  7623. movq 88(%rsi), %r8
  7624. movq %rcx, 80(%r10)
  7625. adcq 88(%r12), %r8
  7626. movq 96(%rsi), %rax
  7627. movq %r8, 88(%r10)
  7628. adcq 96(%r12), %rax
  7629. movq 104(%rsi), %rcx
  7630. movq %rax, 96(%r10)
  7631. adcq 104(%r12), %rcx
  7632. movq 112(%rsi), %r8
  7633. movq %rcx, 104(%r10)
  7634. adcq 112(%r12), %r8
  7635. movq 120(%rsi), %rax
  7636. movq %r8, 112(%r10)
  7637. adcq 120(%r12), %rax
  7638. movq %rax, 120(%r10)
  7639. adcq $0x00, %r13
  7640. movq %r13, 792(%rsp)
  7641. leaq 640(%rsp), %r11
  7642. leaq 128(%rdx), %r12
  7643. # Add
  7644. movq (%rdx), %rax
  7645. xorq %r14, %r14
  7646. addq (%r12), %rax
  7647. movq 8(%rdx), %rcx
  7648. movq %rax, (%r11)
  7649. adcq 8(%r12), %rcx
  7650. movq 16(%rdx), %r8
  7651. movq %rcx, 8(%r11)
  7652. adcq 16(%r12), %r8
  7653. movq 24(%rdx), %rax
  7654. movq %r8, 16(%r11)
  7655. adcq 24(%r12), %rax
  7656. movq 32(%rdx), %rcx
  7657. movq %rax, 24(%r11)
  7658. adcq 32(%r12), %rcx
  7659. movq 40(%rdx), %r8
  7660. movq %rcx, 32(%r11)
  7661. adcq 40(%r12), %r8
  7662. movq 48(%rdx), %rax
  7663. movq %r8, 40(%r11)
  7664. adcq 48(%r12), %rax
  7665. movq 56(%rdx), %rcx
  7666. movq %rax, 48(%r11)
  7667. adcq 56(%r12), %rcx
  7668. movq 64(%rdx), %r8
  7669. movq %rcx, 56(%r11)
  7670. adcq 64(%r12), %r8
  7671. movq 72(%rdx), %rax
  7672. movq %r8, 64(%r11)
  7673. adcq 72(%r12), %rax
  7674. movq 80(%rdx), %rcx
  7675. movq %rax, 72(%r11)
  7676. adcq 80(%r12), %rcx
  7677. movq 88(%rdx), %r8
  7678. movq %rcx, 80(%r11)
  7679. adcq 88(%r12), %r8
  7680. movq 96(%rdx), %rax
  7681. movq %r8, 88(%r11)
  7682. adcq 96(%r12), %rax
  7683. movq 104(%rdx), %rcx
  7684. movq %rax, 96(%r11)
  7685. adcq 104(%r12), %rcx
  7686. movq 112(%rdx), %r8
  7687. movq %rcx, 104(%r11)
  7688. adcq 112(%r12), %r8
  7689. movq 120(%rdx), %rax
  7690. movq %r8, 112(%r11)
  7691. adcq 120(%r12), %rax
  7692. movq %rax, 120(%r11)
  7693. adcq $0x00, %r14
  7694. movq %r14, 800(%rsp)
  7695. movq %r11, %rdx
  7696. movq %r10, %rsi
  7697. movq %rsp, %rdi
  7698. #ifndef __APPLE__
  7699. callq sp_2048_mul_avx2_16@plt
  7700. #else
  7701. callq _sp_2048_mul_avx2_16
  7702. #endif /* __APPLE__ */
  7703. movq 784(%rsp), %rdx
  7704. movq 776(%rsp), %rsi
  7705. leaq 256(%rsp), %rdi
  7706. addq $0x80, %rdx
  7707. addq $0x80, %rsi
  7708. #ifndef __APPLE__
  7709. callq sp_2048_mul_avx2_16@plt
  7710. #else
  7711. callq _sp_2048_mul_avx2_16
  7712. #endif /* __APPLE__ */
  7713. movq 784(%rsp), %rdx
  7714. movq 776(%rsp), %rsi
  7715. movq 768(%rsp), %rdi
  7716. #ifndef __APPLE__
  7717. callq sp_2048_mul_avx2_16@plt
  7718. #else
  7719. callq _sp_2048_mul_avx2_16
  7720. #endif /* __APPLE__ */
  7721. movq 792(%rsp), %r13
  7722. movq 800(%rsp), %r14
  7723. movq 768(%rsp), %r15
  7724. movq %r13, %r9
  7725. leaq 512(%rsp), %r10
  7726. leaq 640(%rsp), %r11
  7727. andq %r14, %r9
  7728. negq %r13
  7729. negq %r14
  7730. addq $0x100, %r15
  7731. movq (%r10), %rax
  7732. movq (%r11), %rcx
  7733. pextq %r14, %rax, %rax
  7734. pextq %r13, %rcx, %rcx
  7735. addq %rcx, %rax
  7736. movq 8(%r10), %rcx
  7737. movq 8(%r11), %r8
  7738. pextq %r14, %rcx, %rcx
  7739. pextq %r13, %r8, %r8
  7740. movq %rax, (%r15)
  7741. adcq %r8, %rcx
  7742. movq 16(%r10), %r8
  7743. movq 16(%r11), %rax
  7744. pextq %r14, %r8, %r8
  7745. pextq %r13, %rax, %rax
  7746. movq %rcx, 8(%r15)
  7747. adcq %rax, %r8
  7748. movq 24(%r10), %rax
  7749. movq 24(%r11), %rcx
  7750. pextq %r14, %rax, %rax
  7751. pextq %r13, %rcx, %rcx
  7752. movq %r8, 16(%r15)
  7753. adcq %rcx, %rax
  7754. movq 32(%r10), %rcx
  7755. movq 32(%r11), %r8
  7756. pextq %r14, %rcx, %rcx
  7757. pextq %r13, %r8, %r8
  7758. movq %rax, 24(%r15)
  7759. adcq %r8, %rcx
  7760. movq 40(%r10), %r8
  7761. movq 40(%r11), %rax
  7762. pextq %r14, %r8, %r8
  7763. pextq %r13, %rax, %rax
  7764. movq %rcx, 32(%r15)
  7765. adcq %rax, %r8
  7766. movq 48(%r10), %rax
  7767. movq 48(%r11), %rcx
  7768. pextq %r14, %rax, %rax
  7769. pextq %r13, %rcx, %rcx
  7770. movq %r8, 40(%r15)
  7771. adcq %rcx, %rax
  7772. movq 56(%r10), %rcx
  7773. movq 56(%r11), %r8
  7774. pextq %r14, %rcx, %rcx
  7775. pextq %r13, %r8, %r8
  7776. movq %rax, 48(%r15)
  7777. adcq %r8, %rcx
  7778. movq 64(%r10), %r8
  7779. movq 64(%r11), %rax
  7780. pextq %r14, %r8, %r8
  7781. pextq %r13, %rax, %rax
  7782. movq %rcx, 56(%r15)
  7783. adcq %rax, %r8
  7784. movq 72(%r10), %rax
  7785. movq 72(%r11), %rcx
  7786. pextq %r14, %rax, %rax
  7787. pextq %r13, %rcx, %rcx
  7788. movq %r8, 64(%r15)
  7789. adcq %rcx, %rax
  7790. movq 80(%r10), %rcx
  7791. movq 80(%r11), %r8
  7792. pextq %r14, %rcx, %rcx
  7793. pextq %r13, %r8, %r8
  7794. movq %rax, 72(%r15)
  7795. adcq %r8, %rcx
  7796. movq 88(%r10), %r8
  7797. movq 88(%r11), %rax
  7798. pextq %r14, %r8, %r8
  7799. pextq %r13, %rax, %rax
  7800. movq %rcx, 80(%r15)
  7801. adcq %rax, %r8
  7802. movq 96(%r10), %rax
  7803. movq 96(%r11), %rcx
  7804. pextq %r14, %rax, %rax
  7805. pextq %r13, %rcx, %rcx
  7806. movq %r8, 88(%r15)
  7807. adcq %rcx, %rax
  7808. movq 104(%r10), %rcx
  7809. movq 104(%r11), %r8
  7810. pextq %r14, %rcx, %rcx
  7811. pextq %r13, %r8, %r8
  7812. movq %rax, 96(%r15)
  7813. adcq %r8, %rcx
  7814. movq 112(%r10), %r8
  7815. movq 112(%r11), %rax
  7816. pextq %r14, %r8, %r8
  7817. pextq %r13, %rax, %rax
  7818. movq %rcx, 104(%r15)
  7819. adcq %rax, %r8
  7820. movq 120(%r10), %rax
  7821. movq 120(%r11), %rcx
  7822. pextq %r14, %rax, %rax
  7823. pextq %r13, %rcx, %rcx
  7824. movq %r8, 112(%r15)
  7825. adcq %rcx, %rax
  7826. movq %rax, 120(%r15)
  7827. adcq $0x00, %r9
  7828. leaq 256(%rsp), %r11
  7829. movq %rsp, %r10
  7830. movq (%r10), %rax
  7831. subq (%r11), %rax
  7832. movq 8(%r10), %rcx
  7833. movq %rax, (%r10)
  7834. sbbq 8(%r11), %rcx
  7835. movq 16(%r10), %r8
  7836. movq %rcx, 8(%r10)
  7837. sbbq 16(%r11), %r8
  7838. movq 24(%r10), %rax
  7839. movq %r8, 16(%r10)
  7840. sbbq 24(%r11), %rax
  7841. movq 32(%r10), %rcx
  7842. movq %rax, 24(%r10)
  7843. sbbq 32(%r11), %rcx
  7844. movq 40(%r10), %r8
  7845. movq %rcx, 32(%r10)
  7846. sbbq 40(%r11), %r8
  7847. movq 48(%r10), %rax
  7848. movq %r8, 40(%r10)
  7849. sbbq 48(%r11), %rax
  7850. movq 56(%r10), %rcx
  7851. movq %rax, 48(%r10)
  7852. sbbq 56(%r11), %rcx
  7853. movq 64(%r10), %r8
  7854. movq %rcx, 56(%r10)
  7855. sbbq 64(%r11), %r8
  7856. movq 72(%r10), %rax
  7857. movq %r8, 64(%r10)
  7858. sbbq 72(%r11), %rax
  7859. movq 80(%r10), %rcx
  7860. movq %rax, 72(%r10)
  7861. sbbq 80(%r11), %rcx
  7862. movq 88(%r10), %r8
  7863. movq %rcx, 80(%r10)
  7864. sbbq 88(%r11), %r8
  7865. movq 96(%r10), %rax
  7866. movq %r8, 88(%r10)
  7867. sbbq 96(%r11), %rax
  7868. movq 104(%r10), %rcx
  7869. movq %rax, 96(%r10)
  7870. sbbq 104(%r11), %rcx
  7871. movq 112(%r10), %r8
  7872. movq %rcx, 104(%r10)
  7873. sbbq 112(%r11), %r8
  7874. movq 120(%r10), %rax
  7875. movq %r8, 112(%r10)
  7876. sbbq 120(%r11), %rax
  7877. movq 128(%r10), %rcx
  7878. movq %rax, 120(%r10)
  7879. sbbq 128(%r11), %rcx
  7880. movq 136(%r10), %r8
  7881. movq %rcx, 128(%r10)
  7882. sbbq 136(%r11), %r8
  7883. movq 144(%r10), %rax
  7884. movq %r8, 136(%r10)
  7885. sbbq 144(%r11), %rax
  7886. movq 152(%r10), %rcx
  7887. movq %rax, 144(%r10)
  7888. sbbq 152(%r11), %rcx
  7889. movq 160(%r10), %r8
  7890. movq %rcx, 152(%r10)
  7891. sbbq 160(%r11), %r8
  7892. movq 168(%r10), %rax
  7893. movq %r8, 160(%r10)
  7894. sbbq 168(%r11), %rax
  7895. movq 176(%r10), %rcx
  7896. movq %rax, 168(%r10)
  7897. sbbq 176(%r11), %rcx
  7898. movq 184(%r10), %r8
  7899. movq %rcx, 176(%r10)
  7900. sbbq 184(%r11), %r8
  7901. movq 192(%r10), %rax
  7902. movq %r8, 184(%r10)
  7903. sbbq 192(%r11), %rax
  7904. movq 200(%r10), %rcx
  7905. movq %rax, 192(%r10)
  7906. sbbq 200(%r11), %rcx
  7907. movq 208(%r10), %r8
  7908. movq %rcx, 200(%r10)
  7909. sbbq 208(%r11), %r8
  7910. movq 216(%r10), %rax
  7911. movq %r8, 208(%r10)
  7912. sbbq 216(%r11), %rax
  7913. movq 224(%r10), %rcx
  7914. movq %rax, 216(%r10)
  7915. sbbq 224(%r11), %rcx
  7916. movq 232(%r10), %r8
  7917. movq %rcx, 224(%r10)
  7918. sbbq 232(%r11), %r8
  7919. movq 240(%r10), %rax
  7920. movq %r8, 232(%r10)
  7921. sbbq 240(%r11), %rax
  7922. movq 248(%r10), %rcx
  7923. movq %rax, 240(%r10)
  7924. sbbq 248(%r11), %rcx
  7925. movq %rcx, 248(%r10)
  7926. sbbq $0x00, %r9
  7927. movq (%r10), %rax
  7928. subq (%rdi), %rax
  7929. movq 8(%r10), %rcx
  7930. movq %rax, (%r10)
  7931. sbbq 8(%rdi), %rcx
  7932. movq 16(%r10), %r8
  7933. movq %rcx, 8(%r10)
  7934. sbbq 16(%rdi), %r8
  7935. movq 24(%r10), %rax
  7936. movq %r8, 16(%r10)
  7937. sbbq 24(%rdi), %rax
  7938. movq 32(%r10), %rcx
  7939. movq %rax, 24(%r10)
  7940. sbbq 32(%rdi), %rcx
  7941. movq 40(%r10), %r8
  7942. movq %rcx, 32(%r10)
  7943. sbbq 40(%rdi), %r8
  7944. movq 48(%r10), %rax
  7945. movq %r8, 40(%r10)
  7946. sbbq 48(%rdi), %rax
  7947. movq 56(%r10), %rcx
  7948. movq %rax, 48(%r10)
  7949. sbbq 56(%rdi), %rcx
  7950. movq 64(%r10), %r8
  7951. movq %rcx, 56(%r10)
  7952. sbbq 64(%rdi), %r8
  7953. movq 72(%r10), %rax
  7954. movq %r8, 64(%r10)
  7955. sbbq 72(%rdi), %rax
  7956. movq 80(%r10), %rcx
  7957. movq %rax, 72(%r10)
  7958. sbbq 80(%rdi), %rcx
  7959. movq 88(%r10), %r8
  7960. movq %rcx, 80(%r10)
  7961. sbbq 88(%rdi), %r8
  7962. movq 96(%r10), %rax
  7963. movq %r8, 88(%r10)
  7964. sbbq 96(%rdi), %rax
  7965. movq 104(%r10), %rcx
  7966. movq %rax, 96(%r10)
  7967. sbbq 104(%rdi), %rcx
  7968. movq 112(%r10), %r8
  7969. movq %rcx, 104(%r10)
  7970. sbbq 112(%rdi), %r8
  7971. movq 120(%r10), %rax
  7972. movq %r8, 112(%r10)
  7973. sbbq 120(%rdi), %rax
  7974. movq 128(%r10), %rcx
  7975. movq %rax, 120(%r10)
  7976. sbbq 128(%rdi), %rcx
  7977. movq 136(%r10), %r8
  7978. movq %rcx, 128(%r10)
  7979. sbbq 136(%rdi), %r8
  7980. movq 144(%r10), %rax
  7981. movq %r8, 136(%r10)
  7982. sbbq 144(%rdi), %rax
  7983. movq 152(%r10), %rcx
  7984. movq %rax, 144(%r10)
  7985. sbbq 152(%rdi), %rcx
  7986. movq 160(%r10), %r8
  7987. movq %rcx, 152(%r10)
  7988. sbbq 160(%rdi), %r8
  7989. movq 168(%r10), %rax
  7990. movq %r8, 160(%r10)
  7991. sbbq 168(%rdi), %rax
  7992. movq 176(%r10), %rcx
  7993. movq %rax, 168(%r10)
  7994. sbbq 176(%rdi), %rcx
  7995. movq 184(%r10), %r8
  7996. movq %rcx, 176(%r10)
  7997. sbbq 184(%rdi), %r8
  7998. movq 192(%r10), %rax
  7999. movq %r8, 184(%r10)
  8000. sbbq 192(%rdi), %rax
  8001. movq 200(%r10), %rcx
  8002. movq %rax, 192(%r10)
  8003. sbbq 200(%rdi), %rcx
  8004. movq 208(%r10), %r8
  8005. movq %rcx, 200(%r10)
  8006. sbbq 208(%rdi), %r8
  8007. movq 216(%r10), %rax
  8008. movq %r8, 208(%r10)
  8009. sbbq 216(%rdi), %rax
  8010. movq 224(%r10), %rcx
  8011. movq %rax, 216(%r10)
  8012. sbbq 224(%rdi), %rcx
  8013. movq 232(%r10), %r8
  8014. movq %rcx, 224(%r10)
  8015. sbbq 232(%rdi), %r8
  8016. movq 240(%r10), %rax
  8017. movq %r8, 232(%r10)
  8018. sbbq 240(%rdi), %rax
  8019. movq 248(%r10), %rcx
  8020. movq %rax, 240(%r10)
  8021. sbbq 248(%rdi), %rcx
  8022. movq %rcx, 248(%r10)
  8023. sbbq $0x00, %r9
  8024. subq $0x80, %r15
  8025. # Add
  8026. movq (%r15), %rax
  8027. addq (%r10), %rax
  8028. movq 8(%r15), %rcx
  8029. movq %rax, (%r15)
  8030. adcq 8(%r10), %rcx
  8031. movq 16(%r15), %r8
  8032. movq %rcx, 8(%r15)
  8033. adcq 16(%r10), %r8
  8034. movq 24(%r15), %rax
  8035. movq %r8, 16(%r15)
  8036. adcq 24(%r10), %rax
  8037. movq 32(%r15), %rcx
  8038. movq %rax, 24(%r15)
  8039. adcq 32(%r10), %rcx
  8040. movq 40(%r15), %r8
  8041. movq %rcx, 32(%r15)
  8042. adcq 40(%r10), %r8
  8043. movq 48(%r15), %rax
  8044. movq %r8, 40(%r15)
  8045. adcq 48(%r10), %rax
  8046. movq 56(%r15), %rcx
  8047. movq %rax, 48(%r15)
  8048. adcq 56(%r10), %rcx
  8049. movq 64(%r15), %r8
  8050. movq %rcx, 56(%r15)
  8051. adcq 64(%r10), %r8
  8052. movq 72(%r15), %rax
  8053. movq %r8, 64(%r15)
  8054. adcq 72(%r10), %rax
  8055. movq 80(%r15), %rcx
  8056. movq %rax, 72(%r15)
  8057. adcq 80(%r10), %rcx
  8058. movq 88(%r15), %r8
  8059. movq %rcx, 80(%r15)
  8060. adcq 88(%r10), %r8
  8061. movq 96(%r15), %rax
  8062. movq %r8, 88(%r15)
  8063. adcq 96(%r10), %rax
  8064. movq 104(%r15), %rcx
  8065. movq %rax, 96(%r15)
  8066. adcq 104(%r10), %rcx
  8067. movq 112(%r15), %r8
  8068. movq %rcx, 104(%r15)
  8069. adcq 112(%r10), %r8
  8070. movq 120(%r15), %rax
  8071. movq %r8, 112(%r15)
  8072. adcq 120(%r10), %rax
  8073. movq 128(%r15), %rcx
  8074. movq %rax, 120(%r15)
  8075. adcq 128(%r10), %rcx
  8076. movq 136(%r15), %r8
  8077. movq %rcx, 128(%r15)
  8078. adcq 136(%r10), %r8
  8079. movq 144(%r15), %rax
  8080. movq %r8, 136(%r15)
  8081. adcq 144(%r10), %rax
  8082. movq 152(%r15), %rcx
  8083. movq %rax, 144(%r15)
  8084. adcq 152(%r10), %rcx
  8085. movq 160(%r15), %r8
  8086. movq %rcx, 152(%r15)
  8087. adcq 160(%r10), %r8
  8088. movq 168(%r15), %rax
  8089. movq %r8, 160(%r15)
  8090. adcq 168(%r10), %rax
  8091. movq 176(%r15), %rcx
  8092. movq %rax, 168(%r15)
  8093. adcq 176(%r10), %rcx
  8094. movq 184(%r15), %r8
  8095. movq %rcx, 176(%r15)
  8096. adcq 184(%r10), %r8
  8097. movq 192(%r15), %rax
  8098. movq %r8, 184(%r15)
  8099. adcq 192(%r10), %rax
  8100. movq 200(%r15), %rcx
  8101. movq %rax, 192(%r15)
  8102. adcq 200(%r10), %rcx
  8103. movq 208(%r15), %r8
  8104. movq %rcx, 200(%r15)
  8105. adcq 208(%r10), %r8
  8106. movq 216(%r15), %rax
  8107. movq %r8, 208(%r15)
  8108. adcq 216(%r10), %rax
  8109. movq 224(%r15), %rcx
  8110. movq %rax, 216(%r15)
  8111. adcq 224(%r10), %rcx
  8112. movq 232(%r15), %r8
  8113. movq %rcx, 224(%r15)
  8114. adcq 232(%r10), %r8
  8115. movq 240(%r15), %rax
  8116. movq %r8, 232(%r15)
  8117. adcq 240(%r10), %rax
  8118. movq 248(%r15), %rcx
  8119. movq %rax, 240(%r15)
  8120. adcq 248(%r10), %rcx
  8121. movq %rcx, 248(%r15)
  8122. adcq $0x00, %r9
  8123. movq %r9, 384(%rdi)
  8124. addq $0x80, %r15
  8125. # Add
  8126. movq (%r15), %rax
  8127. xorq %r9, %r9
  8128. addq (%r11), %rax
  8129. movq 8(%r15), %rcx
  8130. movq %rax, (%r15)
  8131. adcq 8(%r11), %rcx
  8132. movq 16(%r15), %r8
  8133. movq %rcx, 8(%r15)
  8134. adcq 16(%r11), %r8
  8135. movq 24(%r15), %rax
  8136. movq %r8, 16(%r15)
  8137. adcq 24(%r11), %rax
  8138. movq 32(%r15), %rcx
  8139. movq %rax, 24(%r15)
  8140. adcq 32(%r11), %rcx
  8141. movq 40(%r15), %r8
  8142. movq %rcx, 32(%r15)
  8143. adcq 40(%r11), %r8
  8144. movq 48(%r15), %rax
  8145. movq %r8, 40(%r15)
  8146. adcq 48(%r11), %rax
  8147. movq 56(%r15), %rcx
  8148. movq %rax, 48(%r15)
  8149. adcq 56(%r11), %rcx
  8150. movq 64(%r15), %r8
  8151. movq %rcx, 56(%r15)
  8152. adcq 64(%r11), %r8
  8153. movq 72(%r15), %rax
  8154. movq %r8, 64(%r15)
  8155. adcq 72(%r11), %rax
  8156. movq 80(%r15), %rcx
  8157. movq %rax, 72(%r15)
  8158. adcq 80(%r11), %rcx
  8159. movq 88(%r15), %r8
  8160. movq %rcx, 80(%r15)
  8161. adcq 88(%r11), %r8
  8162. movq 96(%r15), %rax
  8163. movq %r8, 88(%r15)
  8164. adcq 96(%r11), %rax
  8165. movq 104(%r15), %rcx
  8166. movq %rax, 96(%r15)
  8167. adcq 104(%r11), %rcx
  8168. movq 112(%r15), %r8
  8169. movq %rcx, 104(%r15)
  8170. adcq 112(%r11), %r8
  8171. movq 120(%r15), %rax
  8172. movq %r8, 112(%r15)
  8173. adcq 120(%r11), %rax
  8174. movq 128(%r15), %rcx
  8175. movq %rax, 120(%r15)
  8176. adcq 128(%r11), %rcx
  8177. movq %rcx, 128(%r15)
  8178. adcq $0x00, %r9
  8179. # Add to zero
  8180. movq 136(%r11), %rax
  8181. adcq $0x00, %rax
  8182. movq 144(%r11), %rcx
  8183. movq %rax, 136(%r15)
  8184. adcq $0x00, %rcx
  8185. movq 152(%r11), %r8
  8186. movq %rcx, 144(%r15)
  8187. adcq $0x00, %r8
  8188. movq 160(%r11), %rax
  8189. movq %r8, 152(%r15)
  8190. adcq $0x00, %rax
  8191. movq 168(%r11), %rcx
  8192. movq %rax, 160(%r15)
  8193. adcq $0x00, %rcx
  8194. movq 176(%r11), %r8
  8195. movq %rcx, 168(%r15)
  8196. adcq $0x00, %r8
  8197. movq 184(%r11), %rax
  8198. movq %r8, 176(%r15)
  8199. adcq $0x00, %rax
  8200. movq 192(%r11), %rcx
  8201. movq %rax, 184(%r15)
  8202. adcq $0x00, %rcx
  8203. movq 200(%r11), %r8
  8204. movq %rcx, 192(%r15)
  8205. adcq $0x00, %r8
  8206. movq 208(%r11), %rax
  8207. movq %r8, 200(%r15)
  8208. adcq $0x00, %rax
  8209. movq 216(%r11), %rcx
  8210. movq %rax, 208(%r15)
  8211. adcq $0x00, %rcx
  8212. movq 224(%r11), %r8
  8213. movq %rcx, 216(%r15)
  8214. adcq $0x00, %r8
  8215. movq 232(%r11), %rax
  8216. movq %r8, 224(%r15)
  8217. adcq $0x00, %rax
  8218. movq 240(%r11), %rcx
  8219. movq %rax, 232(%r15)
  8220. adcq $0x00, %rcx
  8221. movq 248(%r11), %r8
  8222. movq %rcx, 240(%r15)
  8223. adcq $0x00, %r8
  8224. movq %r8, 248(%r15)
  8225. addq $0x328, %rsp
  8226. popq %r15
  8227. popq %r14
  8228. popq %r13
  8229. popq %r12
  8230. repz retq
  8231. #ifndef __APPLE__
  8232. .size sp_2048_mul_avx2_32,.-sp_2048_mul_avx2_32
  8233. #endif /* __APPLE__ */
  8234. /* Square a and put result in r. (r = a * a)
  8235. *
  8236. * r A single precision integer.
  8237. * a A single precision integer.
  8238. */
  8239. #ifndef __APPLE__
  8240. .text
  8241. .globl sp_2048_sqr_avx2_32
  8242. .type sp_2048_sqr_avx2_32,@function
  8243. .align 16
  8244. sp_2048_sqr_avx2_32:
  8245. #else
  8246. .section __TEXT,__text
  8247. .globl _sp_2048_sqr_avx2_32
  8248. .p2align 4
  8249. _sp_2048_sqr_avx2_32:
  8250. #endif /* __APPLE__ */
  8251. subq $0x298, %rsp
  8252. movq %rdi, 640(%rsp)
  8253. movq %rsi, 648(%rsp)
  8254. leaq 512(%rsp), %r8
  8255. leaq 128(%rsi), %r9
  8256. # Add
  8257. movq (%rsi), %rdx
  8258. xorq %rcx, %rcx
  8259. addq (%r9), %rdx
  8260. movq 8(%rsi), %rax
  8261. movq %rdx, (%r8)
  8262. adcq 8(%r9), %rax
  8263. movq 16(%rsi), %rdx
  8264. movq %rax, 8(%r8)
  8265. adcq 16(%r9), %rdx
  8266. movq 24(%rsi), %rax
  8267. movq %rdx, 16(%r8)
  8268. adcq 24(%r9), %rax
  8269. movq 32(%rsi), %rdx
  8270. movq %rax, 24(%r8)
  8271. adcq 32(%r9), %rdx
  8272. movq 40(%rsi), %rax
  8273. movq %rdx, 32(%r8)
  8274. adcq 40(%r9), %rax
  8275. movq 48(%rsi), %rdx
  8276. movq %rax, 40(%r8)
  8277. adcq 48(%r9), %rdx
  8278. movq 56(%rsi), %rax
  8279. movq %rdx, 48(%r8)
  8280. adcq 56(%r9), %rax
  8281. movq 64(%rsi), %rdx
  8282. movq %rax, 56(%r8)
  8283. adcq 64(%r9), %rdx
  8284. movq 72(%rsi), %rax
  8285. movq %rdx, 64(%r8)
  8286. adcq 72(%r9), %rax
  8287. movq 80(%rsi), %rdx
  8288. movq %rax, 72(%r8)
  8289. adcq 80(%r9), %rdx
  8290. movq 88(%rsi), %rax
  8291. movq %rdx, 80(%r8)
  8292. adcq 88(%r9), %rax
  8293. movq 96(%rsi), %rdx
  8294. movq %rax, 88(%r8)
  8295. adcq 96(%r9), %rdx
  8296. movq 104(%rsi), %rax
  8297. movq %rdx, 96(%r8)
  8298. adcq 104(%r9), %rax
  8299. movq 112(%rsi), %rdx
  8300. movq %rax, 104(%r8)
  8301. adcq 112(%r9), %rdx
  8302. movq 120(%rsi), %rax
  8303. movq %rdx, 112(%r8)
  8304. adcq 120(%r9), %rax
  8305. movq %rax, 120(%r8)
  8306. adcq $0x00, %rcx
  8307. movq %rcx, 656(%rsp)
  8308. movq %r8, %rsi
  8309. movq %rsp, %rdi
  8310. #ifndef __APPLE__
  8311. callq sp_2048_sqr_avx2_16@plt
  8312. #else
  8313. callq _sp_2048_sqr_avx2_16
  8314. #endif /* __APPLE__ */
  8315. movq 648(%rsp), %rsi
  8316. leaq 256(%rsp), %rdi
  8317. addq $0x80, %rsi
  8318. #ifndef __APPLE__
  8319. callq sp_2048_sqr_avx2_16@plt
  8320. #else
  8321. callq _sp_2048_sqr_avx2_16
  8322. #endif /* __APPLE__ */
  8323. movq 648(%rsp), %rsi
  8324. movq 640(%rsp), %rdi
  8325. #ifndef __APPLE__
  8326. callq sp_2048_sqr_avx2_16@plt
  8327. #else
  8328. callq _sp_2048_sqr_avx2_16
  8329. #endif /* __APPLE__ */
  8330. movq 656(%rsp), %r10
  8331. leaq 512(%rsp), %r8
  8332. movq %r10, %rcx
  8333. negq %r10
  8334. movq (%r8), %rdx
  8335. pextq %r10, %rdx, %rdx
  8336. addq %rdx, %rdx
  8337. movq 8(%r8), %rax
  8338. movq %rdx, 256(%rdi)
  8339. pextq %r10, %rax, %rax
  8340. adcq %rax, %rax
  8341. movq 16(%r8), %rdx
  8342. movq %rax, 264(%rdi)
  8343. pextq %r10, %rdx, %rdx
  8344. adcq %rdx, %rdx
  8345. movq 24(%r8), %rax
  8346. movq %rdx, 272(%rdi)
  8347. pextq %r10, %rax, %rax
  8348. adcq %rax, %rax
  8349. movq 32(%r8), %rdx
  8350. movq %rax, 280(%rdi)
  8351. pextq %r10, %rdx, %rdx
  8352. adcq %rdx, %rdx
  8353. movq 40(%r8), %rax
  8354. movq %rdx, 288(%rdi)
  8355. pextq %r10, %rax, %rax
  8356. adcq %rax, %rax
  8357. movq 48(%r8), %rdx
  8358. movq %rax, 296(%rdi)
  8359. pextq %r10, %rdx, %rdx
  8360. adcq %rdx, %rdx
  8361. movq 56(%r8), %rax
  8362. movq %rdx, 304(%rdi)
  8363. pextq %r10, %rax, %rax
  8364. adcq %rax, %rax
  8365. movq 64(%r8), %rdx
  8366. movq %rax, 312(%rdi)
  8367. pextq %r10, %rdx, %rdx
  8368. adcq %rdx, %rdx
  8369. movq 72(%r8), %rax
  8370. movq %rdx, 320(%rdi)
  8371. pextq %r10, %rax, %rax
  8372. adcq %rax, %rax
  8373. movq 80(%r8), %rdx
  8374. movq %rax, 328(%rdi)
  8375. pextq %r10, %rdx, %rdx
  8376. adcq %rdx, %rdx
  8377. movq 88(%r8), %rax
  8378. movq %rdx, 336(%rdi)
  8379. pextq %r10, %rax, %rax
  8380. adcq %rax, %rax
  8381. movq 96(%r8), %rdx
  8382. movq %rax, 344(%rdi)
  8383. pextq %r10, %rdx, %rdx
  8384. adcq %rdx, %rdx
  8385. movq 104(%r8), %rax
  8386. movq %rdx, 352(%rdi)
  8387. pextq %r10, %rax, %rax
  8388. adcq %rax, %rax
  8389. movq 112(%r8), %rdx
  8390. movq %rax, 360(%rdi)
  8391. pextq %r10, %rdx, %rdx
  8392. adcq %rdx, %rdx
  8393. movq 120(%r8), %rax
  8394. movq %rdx, 368(%rdi)
  8395. pextq %r10, %rax, %rax
  8396. adcq %rax, %rax
  8397. movq %rax, 376(%rdi)
  8398. adcq $0x00, %rcx
  8399. leaq 256(%rsp), %rsi
  8400. movq %rsp, %r8
  8401. movq (%r8), %rdx
  8402. subq (%rsi), %rdx
  8403. movq 8(%r8), %rax
  8404. movq %rdx, (%r8)
  8405. sbbq 8(%rsi), %rax
  8406. movq 16(%r8), %rdx
  8407. movq %rax, 8(%r8)
  8408. sbbq 16(%rsi), %rdx
  8409. movq 24(%r8), %rax
  8410. movq %rdx, 16(%r8)
  8411. sbbq 24(%rsi), %rax
  8412. movq 32(%r8), %rdx
  8413. movq %rax, 24(%r8)
  8414. sbbq 32(%rsi), %rdx
  8415. movq 40(%r8), %rax
  8416. movq %rdx, 32(%r8)
  8417. sbbq 40(%rsi), %rax
  8418. movq 48(%r8), %rdx
  8419. movq %rax, 40(%r8)
  8420. sbbq 48(%rsi), %rdx
  8421. movq 56(%r8), %rax
  8422. movq %rdx, 48(%r8)
  8423. sbbq 56(%rsi), %rax
  8424. movq 64(%r8), %rdx
  8425. movq %rax, 56(%r8)
  8426. sbbq 64(%rsi), %rdx
  8427. movq 72(%r8), %rax
  8428. movq %rdx, 64(%r8)
  8429. sbbq 72(%rsi), %rax
  8430. movq 80(%r8), %rdx
  8431. movq %rax, 72(%r8)
  8432. sbbq 80(%rsi), %rdx
  8433. movq 88(%r8), %rax
  8434. movq %rdx, 80(%r8)
  8435. sbbq 88(%rsi), %rax
  8436. movq 96(%r8), %rdx
  8437. movq %rax, 88(%r8)
  8438. sbbq 96(%rsi), %rdx
  8439. movq 104(%r8), %rax
  8440. movq %rdx, 96(%r8)
  8441. sbbq 104(%rsi), %rax
  8442. movq 112(%r8), %rdx
  8443. movq %rax, 104(%r8)
  8444. sbbq 112(%rsi), %rdx
  8445. movq 120(%r8), %rax
  8446. movq %rdx, 112(%r8)
  8447. sbbq 120(%rsi), %rax
  8448. movq 128(%r8), %rdx
  8449. movq %rax, 120(%r8)
  8450. sbbq 128(%rsi), %rdx
  8451. movq 136(%r8), %rax
  8452. movq %rdx, 128(%r8)
  8453. sbbq 136(%rsi), %rax
  8454. movq 144(%r8), %rdx
  8455. movq %rax, 136(%r8)
  8456. sbbq 144(%rsi), %rdx
  8457. movq 152(%r8), %rax
  8458. movq %rdx, 144(%r8)
  8459. sbbq 152(%rsi), %rax
  8460. movq 160(%r8), %rdx
  8461. movq %rax, 152(%r8)
  8462. sbbq 160(%rsi), %rdx
  8463. movq 168(%r8), %rax
  8464. movq %rdx, 160(%r8)
  8465. sbbq 168(%rsi), %rax
  8466. movq 176(%r8), %rdx
  8467. movq %rax, 168(%r8)
  8468. sbbq 176(%rsi), %rdx
  8469. movq 184(%r8), %rax
  8470. movq %rdx, 176(%r8)
  8471. sbbq 184(%rsi), %rax
  8472. movq 192(%r8), %rdx
  8473. movq %rax, 184(%r8)
  8474. sbbq 192(%rsi), %rdx
  8475. movq 200(%r8), %rax
  8476. movq %rdx, 192(%r8)
  8477. sbbq 200(%rsi), %rax
  8478. movq 208(%r8), %rdx
  8479. movq %rax, 200(%r8)
  8480. sbbq 208(%rsi), %rdx
  8481. movq 216(%r8), %rax
  8482. movq %rdx, 208(%r8)
  8483. sbbq 216(%rsi), %rax
  8484. movq 224(%r8), %rdx
  8485. movq %rax, 216(%r8)
  8486. sbbq 224(%rsi), %rdx
  8487. movq 232(%r8), %rax
  8488. movq %rdx, 224(%r8)
  8489. sbbq 232(%rsi), %rax
  8490. movq 240(%r8), %rdx
  8491. movq %rax, 232(%r8)
  8492. sbbq 240(%rsi), %rdx
  8493. movq 248(%r8), %rax
  8494. movq %rdx, 240(%r8)
  8495. sbbq 248(%rsi), %rax
  8496. movq %rax, 248(%r8)
  8497. sbbq $0x00, %rcx
  8498. movq (%r8), %rdx
  8499. subq (%rdi), %rdx
  8500. movq 8(%r8), %rax
  8501. movq %rdx, (%r8)
  8502. sbbq 8(%rdi), %rax
  8503. movq 16(%r8), %rdx
  8504. movq %rax, 8(%r8)
  8505. sbbq 16(%rdi), %rdx
  8506. movq 24(%r8), %rax
  8507. movq %rdx, 16(%r8)
  8508. sbbq 24(%rdi), %rax
  8509. movq 32(%r8), %rdx
  8510. movq %rax, 24(%r8)
  8511. sbbq 32(%rdi), %rdx
  8512. movq 40(%r8), %rax
  8513. movq %rdx, 32(%r8)
  8514. sbbq 40(%rdi), %rax
  8515. movq 48(%r8), %rdx
  8516. movq %rax, 40(%r8)
  8517. sbbq 48(%rdi), %rdx
  8518. movq 56(%r8), %rax
  8519. movq %rdx, 48(%r8)
  8520. sbbq 56(%rdi), %rax
  8521. movq 64(%r8), %rdx
  8522. movq %rax, 56(%r8)
  8523. sbbq 64(%rdi), %rdx
  8524. movq 72(%r8), %rax
  8525. movq %rdx, 64(%r8)
  8526. sbbq 72(%rdi), %rax
  8527. movq 80(%r8), %rdx
  8528. movq %rax, 72(%r8)
  8529. sbbq 80(%rdi), %rdx
  8530. movq 88(%r8), %rax
  8531. movq %rdx, 80(%r8)
  8532. sbbq 88(%rdi), %rax
  8533. movq 96(%r8), %rdx
  8534. movq %rax, 88(%r8)
  8535. sbbq 96(%rdi), %rdx
  8536. movq 104(%r8), %rax
  8537. movq %rdx, 96(%r8)
  8538. sbbq 104(%rdi), %rax
  8539. movq 112(%r8), %rdx
  8540. movq %rax, 104(%r8)
  8541. sbbq 112(%rdi), %rdx
  8542. movq 120(%r8), %rax
  8543. movq %rdx, 112(%r8)
  8544. sbbq 120(%rdi), %rax
  8545. movq 128(%r8), %rdx
  8546. movq %rax, 120(%r8)
  8547. sbbq 128(%rdi), %rdx
  8548. movq 136(%r8), %rax
  8549. movq %rdx, 128(%r8)
  8550. sbbq 136(%rdi), %rax
  8551. movq 144(%r8), %rdx
  8552. movq %rax, 136(%r8)
  8553. sbbq 144(%rdi), %rdx
  8554. movq 152(%r8), %rax
  8555. movq %rdx, 144(%r8)
  8556. sbbq 152(%rdi), %rax
  8557. movq 160(%r8), %rdx
  8558. movq %rax, 152(%r8)
  8559. sbbq 160(%rdi), %rdx
  8560. movq 168(%r8), %rax
  8561. movq %rdx, 160(%r8)
  8562. sbbq 168(%rdi), %rax
  8563. movq 176(%r8), %rdx
  8564. movq %rax, 168(%r8)
  8565. sbbq 176(%rdi), %rdx
  8566. movq 184(%r8), %rax
  8567. movq %rdx, 176(%r8)
  8568. sbbq 184(%rdi), %rax
  8569. movq 192(%r8), %rdx
  8570. movq %rax, 184(%r8)
  8571. sbbq 192(%rdi), %rdx
  8572. movq 200(%r8), %rax
  8573. movq %rdx, 192(%r8)
  8574. sbbq 200(%rdi), %rax
  8575. movq 208(%r8), %rdx
  8576. movq %rax, 200(%r8)
  8577. sbbq 208(%rdi), %rdx
  8578. movq 216(%r8), %rax
  8579. movq %rdx, 208(%r8)
  8580. sbbq 216(%rdi), %rax
  8581. movq 224(%r8), %rdx
  8582. movq %rax, 216(%r8)
  8583. sbbq 224(%rdi), %rdx
  8584. movq 232(%r8), %rax
  8585. movq %rdx, 224(%r8)
  8586. sbbq 232(%rdi), %rax
  8587. movq 240(%r8), %rdx
  8588. movq %rax, 232(%r8)
  8589. sbbq 240(%rdi), %rdx
  8590. movq 248(%r8), %rax
  8591. movq %rdx, 240(%r8)
  8592. sbbq 248(%rdi), %rax
  8593. movq %rax, 248(%r8)
  8594. sbbq $0x00, %rcx
  8595. # Add in place
  8596. movq 128(%rdi), %rdx
  8597. addq (%r8), %rdx
  8598. movq 136(%rdi), %rax
  8599. movq %rdx, 128(%rdi)
  8600. adcq 8(%r8), %rax
  8601. movq 144(%rdi), %rdx
  8602. movq %rax, 136(%rdi)
  8603. adcq 16(%r8), %rdx
  8604. movq 152(%rdi), %rax
  8605. movq %rdx, 144(%rdi)
  8606. adcq 24(%r8), %rax
  8607. movq 160(%rdi), %rdx
  8608. movq %rax, 152(%rdi)
  8609. adcq 32(%r8), %rdx
  8610. movq 168(%rdi), %rax
  8611. movq %rdx, 160(%rdi)
  8612. adcq 40(%r8), %rax
  8613. movq 176(%rdi), %rdx
  8614. movq %rax, 168(%rdi)
  8615. adcq 48(%r8), %rdx
  8616. movq 184(%rdi), %rax
  8617. movq %rdx, 176(%rdi)
  8618. adcq 56(%r8), %rax
  8619. movq 192(%rdi), %rdx
  8620. movq %rax, 184(%rdi)
  8621. adcq 64(%r8), %rdx
  8622. movq 200(%rdi), %rax
  8623. movq %rdx, 192(%rdi)
  8624. adcq 72(%r8), %rax
  8625. movq 208(%rdi), %rdx
  8626. movq %rax, 200(%rdi)
  8627. adcq 80(%r8), %rdx
  8628. movq 216(%rdi), %rax
  8629. movq %rdx, 208(%rdi)
  8630. adcq 88(%r8), %rax
  8631. movq 224(%rdi), %rdx
  8632. movq %rax, 216(%rdi)
  8633. adcq 96(%r8), %rdx
  8634. movq 232(%rdi), %rax
  8635. movq %rdx, 224(%rdi)
  8636. adcq 104(%r8), %rax
  8637. movq 240(%rdi), %rdx
  8638. movq %rax, 232(%rdi)
  8639. adcq 112(%r8), %rdx
  8640. movq 248(%rdi), %rax
  8641. movq %rdx, 240(%rdi)
  8642. adcq 120(%r8), %rax
  8643. movq 256(%rdi), %rdx
  8644. movq %rax, 248(%rdi)
  8645. adcq 128(%r8), %rdx
  8646. movq 264(%rdi), %rax
  8647. movq %rdx, 256(%rdi)
  8648. adcq 136(%r8), %rax
  8649. movq 272(%rdi), %rdx
  8650. movq %rax, 264(%rdi)
  8651. adcq 144(%r8), %rdx
  8652. movq 280(%rdi), %rax
  8653. movq %rdx, 272(%rdi)
  8654. adcq 152(%r8), %rax
  8655. movq 288(%rdi), %rdx
  8656. movq %rax, 280(%rdi)
  8657. adcq 160(%r8), %rdx
  8658. movq 296(%rdi), %rax
  8659. movq %rdx, 288(%rdi)
  8660. adcq 168(%r8), %rax
  8661. movq 304(%rdi), %rdx
  8662. movq %rax, 296(%rdi)
  8663. adcq 176(%r8), %rdx
  8664. movq 312(%rdi), %rax
  8665. movq %rdx, 304(%rdi)
  8666. adcq 184(%r8), %rax
  8667. movq 320(%rdi), %rdx
  8668. movq %rax, 312(%rdi)
  8669. adcq 192(%r8), %rdx
  8670. movq 328(%rdi), %rax
  8671. movq %rdx, 320(%rdi)
  8672. adcq 200(%r8), %rax
  8673. movq 336(%rdi), %rdx
  8674. movq %rax, 328(%rdi)
  8675. adcq 208(%r8), %rdx
  8676. movq 344(%rdi), %rax
  8677. movq %rdx, 336(%rdi)
  8678. adcq 216(%r8), %rax
  8679. movq 352(%rdi), %rdx
  8680. movq %rax, 344(%rdi)
  8681. adcq 224(%r8), %rdx
  8682. movq 360(%rdi), %rax
  8683. movq %rdx, 352(%rdi)
  8684. adcq 232(%r8), %rax
  8685. movq 368(%rdi), %rdx
  8686. movq %rax, 360(%rdi)
  8687. adcq 240(%r8), %rdx
  8688. movq 376(%rdi), %rax
  8689. movq %rdx, 368(%rdi)
  8690. adcq 248(%r8), %rax
  8691. movq %rax, 376(%rdi)
  8692. adcq $0x00, %rcx
  8693. movq %rcx, 384(%rdi)
  8694. # Add in place
  8695. movq 256(%rdi), %rdx
  8696. xorq %rcx, %rcx
  8697. addq (%rsi), %rdx
  8698. movq 264(%rdi), %rax
  8699. movq %rdx, 256(%rdi)
  8700. adcq 8(%rsi), %rax
  8701. movq 272(%rdi), %rdx
  8702. movq %rax, 264(%rdi)
  8703. adcq 16(%rsi), %rdx
  8704. movq 280(%rdi), %rax
  8705. movq %rdx, 272(%rdi)
  8706. adcq 24(%rsi), %rax
  8707. movq 288(%rdi), %rdx
  8708. movq %rax, 280(%rdi)
  8709. adcq 32(%rsi), %rdx
  8710. movq 296(%rdi), %rax
  8711. movq %rdx, 288(%rdi)
  8712. adcq 40(%rsi), %rax
  8713. movq 304(%rdi), %rdx
  8714. movq %rax, 296(%rdi)
  8715. adcq 48(%rsi), %rdx
  8716. movq 312(%rdi), %rax
  8717. movq %rdx, 304(%rdi)
  8718. adcq 56(%rsi), %rax
  8719. movq 320(%rdi), %rdx
  8720. movq %rax, 312(%rdi)
  8721. adcq 64(%rsi), %rdx
  8722. movq 328(%rdi), %rax
  8723. movq %rdx, 320(%rdi)
  8724. adcq 72(%rsi), %rax
  8725. movq 336(%rdi), %rdx
  8726. movq %rax, 328(%rdi)
  8727. adcq 80(%rsi), %rdx
  8728. movq 344(%rdi), %rax
  8729. movq %rdx, 336(%rdi)
  8730. adcq 88(%rsi), %rax
  8731. movq 352(%rdi), %rdx
  8732. movq %rax, 344(%rdi)
  8733. adcq 96(%rsi), %rdx
  8734. movq 360(%rdi), %rax
  8735. movq %rdx, 352(%rdi)
  8736. adcq 104(%rsi), %rax
  8737. movq 368(%rdi), %rdx
  8738. movq %rax, 360(%rdi)
  8739. adcq 112(%rsi), %rdx
  8740. movq 376(%rdi), %rax
  8741. movq %rdx, 368(%rdi)
  8742. adcq 120(%rsi), %rax
  8743. movq 384(%rdi), %rdx
  8744. movq %rax, 376(%rdi)
  8745. adcq 128(%rsi), %rdx
  8746. movq %rdx, 384(%rdi)
  8747. adcq $0x00, %rcx
  8748. # Add to zero
  8749. movq 136(%rsi), %rdx
  8750. adcq $0x00, %rdx
  8751. movq 144(%rsi), %rax
  8752. movq %rdx, 392(%rdi)
  8753. adcq $0x00, %rax
  8754. movq 152(%rsi), %rdx
  8755. movq %rax, 400(%rdi)
  8756. adcq $0x00, %rdx
  8757. movq 160(%rsi), %rax
  8758. movq %rdx, 408(%rdi)
  8759. adcq $0x00, %rax
  8760. movq 168(%rsi), %rdx
  8761. movq %rax, 416(%rdi)
  8762. adcq $0x00, %rdx
  8763. movq 176(%rsi), %rax
  8764. movq %rdx, 424(%rdi)
  8765. adcq $0x00, %rax
  8766. movq 184(%rsi), %rdx
  8767. movq %rax, 432(%rdi)
  8768. adcq $0x00, %rdx
  8769. movq 192(%rsi), %rax
  8770. movq %rdx, 440(%rdi)
  8771. adcq $0x00, %rax
  8772. movq 200(%rsi), %rdx
  8773. movq %rax, 448(%rdi)
  8774. adcq $0x00, %rdx
  8775. movq 208(%rsi), %rax
  8776. movq %rdx, 456(%rdi)
  8777. adcq $0x00, %rax
  8778. movq 216(%rsi), %rdx
  8779. movq %rax, 464(%rdi)
  8780. adcq $0x00, %rdx
  8781. movq 224(%rsi), %rax
  8782. movq %rdx, 472(%rdi)
  8783. adcq $0x00, %rax
  8784. movq 232(%rsi), %rdx
  8785. movq %rax, 480(%rdi)
  8786. adcq $0x00, %rdx
  8787. movq 240(%rsi), %rax
  8788. movq %rdx, 488(%rdi)
  8789. adcq $0x00, %rax
  8790. movq 248(%rsi), %rdx
  8791. movq %rax, 496(%rdi)
  8792. adcq $0x00, %rdx
  8793. movq %rdx, 504(%rdi)
  8794. addq $0x298, %rsp
  8795. repz retq
  8796. #ifndef __APPLE__
  8797. .size sp_2048_sqr_avx2_32,.-sp_2048_sqr_avx2_32
  8798. #endif /* __APPLE__ */
  8799. /* Mul a by digit b into r. (r = a * b)
  8800. *
  8801. * r A single precision integer.
  8802. * a A single precision integer.
  8803. * b A single precision digit.
  8804. */
  8805. #ifndef __APPLE__
  8806. .text
  8807. .globl sp_2048_mul_d_32
  8808. .type sp_2048_mul_d_32,@function
  8809. .align 16
  8810. sp_2048_mul_d_32:
  8811. #else
  8812. .section __TEXT,__text
  8813. .globl _sp_2048_mul_d_32
  8814. .p2align 4
  8815. _sp_2048_mul_d_32:
  8816. #endif /* __APPLE__ */
  8817. movq %rdx, %rcx
  8818. # A[0] * B
  8819. movq %rcx, %rax
  8820. xorq %r10, %r10
  8821. mulq (%rsi)
  8822. movq %rax, %r8
  8823. movq %rdx, %r9
  8824. movq %r8, (%rdi)
  8825. # A[1] * B
  8826. movq %rcx, %rax
  8827. xorq %r8, %r8
  8828. mulq 8(%rsi)
  8829. addq %rax, %r9
  8830. movq %r9, 8(%rdi)
  8831. adcq %rdx, %r10
  8832. adcq $0x00, %r8
  8833. # A[2] * B
  8834. movq %rcx, %rax
  8835. xorq %r9, %r9
  8836. mulq 16(%rsi)
  8837. addq %rax, %r10
  8838. movq %r10, 16(%rdi)
  8839. adcq %rdx, %r8
  8840. adcq $0x00, %r9
  8841. # A[3] * B
  8842. movq %rcx, %rax
  8843. xorq %r10, %r10
  8844. mulq 24(%rsi)
  8845. addq %rax, %r8
  8846. movq %r8, 24(%rdi)
  8847. adcq %rdx, %r9
  8848. adcq $0x00, %r10
  8849. # A[4] * B
  8850. movq %rcx, %rax
  8851. xorq %r8, %r8
  8852. mulq 32(%rsi)
  8853. addq %rax, %r9
  8854. movq %r9, 32(%rdi)
  8855. adcq %rdx, %r10
  8856. adcq $0x00, %r8
  8857. # A[5] * B
  8858. movq %rcx, %rax
  8859. xorq %r9, %r9
  8860. mulq 40(%rsi)
  8861. addq %rax, %r10
  8862. movq %r10, 40(%rdi)
  8863. adcq %rdx, %r8
  8864. adcq $0x00, %r9
  8865. # A[6] * B
  8866. movq %rcx, %rax
  8867. xorq %r10, %r10
  8868. mulq 48(%rsi)
  8869. addq %rax, %r8
  8870. movq %r8, 48(%rdi)
  8871. adcq %rdx, %r9
  8872. adcq $0x00, %r10
  8873. # A[7] * B
  8874. movq %rcx, %rax
  8875. xorq %r8, %r8
  8876. mulq 56(%rsi)
  8877. addq %rax, %r9
  8878. movq %r9, 56(%rdi)
  8879. adcq %rdx, %r10
  8880. adcq $0x00, %r8
  8881. # A[8] * B
  8882. movq %rcx, %rax
  8883. xorq %r9, %r9
  8884. mulq 64(%rsi)
  8885. addq %rax, %r10
  8886. movq %r10, 64(%rdi)
  8887. adcq %rdx, %r8
  8888. adcq $0x00, %r9
  8889. # A[9] * B
  8890. movq %rcx, %rax
  8891. xorq %r10, %r10
  8892. mulq 72(%rsi)
  8893. addq %rax, %r8
  8894. movq %r8, 72(%rdi)
  8895. adcq %rdx, %r9
  8896. adcq $0x00, %r10
  8897. # A[10] * B
  8898. movq %rcx, %rax
  8899. xorq %r8, %r8
  8900. mulq 80(%rsi)
  8901. addq %rax, %r9
  8902. movq %r9, 80(%rdi)
  8903. adcq %rdx, %r10
  8904. adcq $0x00, %r8
  8905. # A[11] * B
  8906. movq %rcx, %rax
  8907. xorq %r9, %r9
  8908. mulq 88(%rsi)
  8909. addq %rax, %r10
  8910. movq %r10, 88(%rdi)
  8911. adcq %rdx, %r8
  8912. adcq $0x00, %r9
  8913. # A[12] * B
  8914. movq %rcx, %rax
  8915. xorq %r10, %r10
  8916. mulq 96(%rsi)
  8917. addq %rax, %r8
  8918. movq %r8, 96(%rdi)
  8919. adcq %rdx, %r9
  8920. adcq $0x00, %r10
  8921. # A[13] * B
  8922. movq %rcx, %rax
  8923. xorq %r8, %r8
  8924. mulq 104(%rsi)
  8925. addq %rax, %r9
  8926. movq %r9, 104(%rdi)
  8927. adcq %rdx, %r10
  8928. adcq $0x00, %r8
  8929. # A[14] * B
  8930. movq %rcx, %rax
  8931. xorq %r9, %r9
  8932. mulq 112(%rsi)
  8933. addq %rax, %r10
  8934. movq %r10, 112(%rdi)
  8935. adcq %rdx, %r8
  8936. adcq $0x00, %r9
  8937. # A[15] * B
  8938. movq %rcx, %rax
  8939. xorq %r10, %r10
  8940. mulq 120(%rsi)
  8941. addq %rax, %r8
  8942. movq %r8, 120(%rdi)
  8943. adcq %rdx, %r9
  8944. adcq $0x00, %r10
  8945. # A[16] * B
  8946. movq %rcx, %rax
  8947. xorq %r8, %r8
  8948. mulq 128(%rsi)
  8949. addq %rax, %r9
  8950. movq %r9, 128(%rdi)
  8951. adcq %rdx, %r10
  8952. adcq $0x00, %r8
  8953. # A[17] * B
  8954. movq %rcx, %rax
  8955. xorq %r9, %r9
  8956. mulq 136(%rsi)
  8957. addq %rax, %r10
  8958. movq %r10, 136(%rdi)
  8959. adcq %rdx, %r8
  8960. adcq $0x00, %r9
  8961. # A[18] * B
  8962. movq %rcx, %rax
  8963. xorq %r10, %r10
  8964. mulq 144(%rsi)
  8965. addq %rax, %r8
  8966. movq %r8, 144(%rdi)
  8967. adcq %rdx, %r9
  8968. adcq $0x00, %r10
  8969. # A[19] * B
  8970. movq %rcx, %rax
  8971. xorq %r8, %r8
  8972. mulq 152(%rsi)
  8973. addq %rax, %r9
  8974. movq %r9, 152(%rdi)
  8975. adcq %rdx, %r10
  8976. adcq $0x00, %r8
  8977. # A[20] * B
  8978. movq %rcx, %rax
  8979. xorq %r9, %r9
  8980. mulq 160(%rsi)
  8981. addq %rax, %r10
  8982. movq %r10, 160(%rdi)
  8983. adcq %rdx, %r8
  8984. adcq $0x00, %r9
  8985. # A[21] * B
  8986. movq %rcx, %rax
  8987. xorq %r10, %r10
  8988. mulq 168(%rsi)
  8989. addq %rax, %r8
  8990. movq %r8, 168(%rdi)
  8991. adcq %rdx, %r9
  8992. adcq $0x00, %r10
  8993. # A[22] * B
  8994. movq %rcx, %rax
  8995. xorq %r8, %r8
  8996. mulq 176(%rsi)
  8997. addq %rax, %r9
  8998. movq %r9, 176(%rdi)
  8999. adcq %rdx, %r10
  9000. adcq $0x00, %r8
  9001. # A[23] * B
  9002. movq %rcx, %rax
  9003. xorq %r9, %r9
  9004. mulq 184(%rsi)
  9005. addq %rax, %r10
  9006. movq %r10, 184(%rdi)
  9007. adcq %rdx, %r8
  9008. adcq $0x00, %r9
  9009. # A[24] * B
  9010. movq %rcx, %rax
  9011. xorq %r10, %r10
  9012. mulq 192(%rsi)
  9013. addq %rax, %r8
  9014. movq %r8, 192(%rdi)
  9015. adcq %rdx, %r9
  9016. adcq $0x00, %r10
  9017. # A[25] * B
  9018. movq %rcx, %rax
  9019. xorq %r8, %r8
  9020. mulq 200(%rsi)
  9021. addq %rax, %r9
  9022. movq %r9, 200(%rdi)
  9023. adcq %rdx, %r10
  9024. adcq $0x00, %r8
  9025. # A[26] * B
  9026. movq %rcx, %rax
  9027. xorq %r9, %r9
  9028. mulq 208(%rsi)
  9029. addq %rax, %r10
  9030. movq %r10, 208(%rdi)
  9031. adcq %rdx, %r8
  9032. adcq $0x00, %r9
  9033. # A[27] * B
  9034. movq %rcx, %rax
  9035. xorq %r10, %r10
  9036. mulq 216(%rsi)
  9037. addq %rax, %r8
  9038. movq %r8, 216(%rdi)
  9039. adcq %rdx, %r9
  9040. adcq $0x00, %r10
  9041. # A[28] * B
  9042. movq %rcx, %rax
  9043. xorq %r8, %r8
  9044. mulq 224(%rsi)
  9045. addq %rax, %r9
  9046. movq %r9, 224(%rdi)
  9047. adcq %rdx, %r10
  9048. adcq $0x00, %r8
  9049. # A[29] * B
  9050. movq %rcx, %rax
  9051. xorq %r9, %r9
  9052. mulq 232(%rsi)
  9053. addq %rax, %r10
  9054. movq %r10, 232(%rdi)
  9055. adcq %rdx, %r8
  9056. adcq $0x00, %r9
  9057. # A[30] * B
  9058. movq %rcx, %rax
  9059. xorq %r10, %r10
  9060. mulq 240(%rsi)
  9061. addq %rax, %r8
  9062. movq %r8, 240(%rdi)
  9063. adcq %rdx, %r9
  9064. adcq $0x00, %r10
  9065. # A[31] * B
  9066. movq %rcx, %rax
  9067. mulq 248(%rsi)
  9068. addq %rax, %r9
  9069. adcq %rdx, %r10
  9070. movq %r9, 248(%rdi)
  9071. movq %r10, 256(%rdi)
  9072. repz retq
  9073. #ifndef __APPLE__
  9074. .size sp_2048_mul_d_32,.-sp_2048_mul_d_32
  9075. #endif /* __APPLE__ */
  9076. /* Sub b from a into a. (a -= b)
  9077. *
  9078. * a A single precision integer and result.
  9079. * b A single precision integer.
  9080. */
  9081. #ifndef __APPLE__
  9082. .text
  9083. .globl sp_2048_sub_in_place_16
  9084. .type sp_2048_sub_in_place_16,@function
  9085. .align 16
  9086. sp_2048_sub_in_place_16:
  9087. #else
  9088. .section __TEXT,__text
  9089. .globl _sp_2048_sub_in_place_16
  9090. .p2align 4
  9091. _sp_2048_sub_in_place_16:
  9092. #endif /* __APPLE__ */
  9093. movq (%rdi), %rdx
  9094. xorq %rax, %rax
  9095. subq (%rsi), %rdx
  9096. movq 8(%rdi), %rcx
  9097. movq %rdx, (%rdi)
  9098. sbbq 8(%rsi), %rcx
  9099. movq 16(%rdi), %rdx
  9100. movq %rcx, 8(%rdi)
  9101. sbbq 16(%rsi), %rdx
  9102. movq 24(%rdi), %rcx
  9103. movq %rdx, 16(%rdi)
  9104. sbbq 24(%rsi), %rcx
  9105. movq 32(%rdi), %rdx
  9106. movq %rcx, 24(%rdi)
  9107. sbbq 32(%rsi), %rdx
  9108. movq 40(%rdi), %rcx
  9109. movq %rdx, 32(%rdi)
  9110. sbbq 40(%rsi), %rcx
  9111. movq 48(%rdi), %rdx
  9112. movq %rcx, 40(%rdi)
  9113. sbbq 48(%rsi), %rdx
  9114. movq 56(%rdi), %rcx
  9115. movq %rdx, 48(%rdi)
  9116. sbbq 56(%rsi), %rcx
  9117. movq 64(%rdi), %rdx
  9118. movq %rcx, 56(%rdi)
  9119. sbbq 64(%rsi), %rdx
  9120. movq 72(%rdi), %rcx
  9121. movq %rdx, 64(%rdi)
  9122. sbbq 72(%rsi), %rcx
  9123. movq 80(%rdi), %rdx
  9124. movq %rcx, 72(%rdi)
  9125. sbbq 80(%rsi), %rdx
  9126. movq 88(%rdi), %rcx
  9127. movq %rdx, 80(%rdi)
  9128. sbbq 88(%rsi), %rcx
  9129. movq 96(%rdi), %rdx
  9130. movq %rcx, 88(%rdi)
  9131. sbbq 96(%rsi), %rdx
  9132. movq 104(%rdi), %rcx
  9133. movq %rdx, 96(%rdi)
  9134. sbbq 104(%rsi), %rcx
  9135. movq 112(%rdi), %rdx
  9136. movq %rcx, 104(%rdi)
  9137. sbbq 112(%rsi), %rdx
  9138. movq 120(%rdi), %rcx
  9139. movq %rdx, 112(%rdi)
  9140. sbbq 120(%rsi), %rcx
  9141. movq %rcx, 120(%rdi)
  9142. sbbq $0x00, %rax
  9143. repz retq
  9144. #ifndef __APPLE__
  9145. .size sp_2048_sub_in_place_16,.-sp_2048_sub_in_place_16
  9146. #endif /* __APPLE__ */
  9147. /* Conditionally subtract b from a using the mask m.
  9148. * m is -1 to subtract and 0 when not copying.
  9149. *
  9150. * r A single precision number representing condition subtract result.
  9151. * a A single precision number to subtract from.
  9152. * b A single precision number to subtract.
  9153. * m Mask value to apply.
  9154. */
  9155. #ifndef __APPLE__
  9156. .text
  9157. .globl sp_2048_cond_sub_16
  9158. .type sp_2048_cond_sub_16,@function
  9159. .align 16
  9160. sp_2048_cond_sub_16:
  9161. #else
  9162. .section __TEXT,__text
  9163. .globl _sp_2048_cond_sub_16
  9164. .p2align 4
  9165. _sp_2048_cond_sub_16:
  9166. #endif /* __APPLE__ */
  9167. subq $0x80, %rsp
  9168. movq $0x00, %rax
  9169. movq (%rdx), %r8
  9170. movq 8(%rdx), %r9
  9171. andq %rcx, %r8
  9172. andq %rcx, %r9
  9173. movq %r8, (%rsp)
  9174. movq %r9, 8(%rsp)
  9175. movq 16(%rdx), %r8
  9176. movq 24(%rdx), %r9
  9177. andq %rcx, %r8
  9178. andq %rcx, %r9
  9179. movq %r8, 16(%rsp)
  9180. movq %r9, 24(%rsp)
  9181. movq 32(%rdx), %r8
  9182. movq 40(%rdx), %r9
  9183. andq %rcx, %r8
  9184. andq %rcx, %r9
  9185. movq %r8, 32(%rsp)
  9186. movq %r9, 40(%rsp)
  9187. movq 48(%rdx), %r8
  9188. movq 56(%rdx), %r9
  9189. andq %rcx, %r8
  9190. andq %rcx, %r9
  9191. movq %r8, 48(%rsp)
  9192. movq %r9, 56(%rsp)
  9193. movq 64(%rdx), %r8
  9194. movq 72(%rdx), %r9
  9195. andq %rcx, %r8
  9196. andq %rcx, %r9
  9197. movq %r8, 64(%rsp)
  9198. movq %r9, 72(%rsp)
  9199. movq 80(%rdx), %r8
  9200. movq 88(%rdx), %r9
  9201. andq %rcx, %r8
  9202. andq %rcx, %r9
  9203. movq %r8, 80(%rsp)
  9204. movq %r9, 88(%rsp)
  9205. movq 96(%rdx), %r8
  9206. movq 104(%rdx), %r9
  9207. andq %rcx, %r8
  9208. andq %rcx, %r9
  9209. movq %r8, 96(%rsp)
  9210. movq %r9, 104(%rsp)
  9211. movq 112(%rdx), %r8
  9212. movq 120(%rdx), %r9
  9213. andq %rcx, %r8
  9214. andq %rcx, %r9
  9215. movq %r8, 112(%rsp)
  9216. movq %r9, 120(%rsp)
  9217. movq (%rsi), %r8
  9218. movq (%rsp), %rdx
  9219. subq %rdx, %r8
  9220. movq 8(%rsi), %r9
  9221. movq 8(%rsp), %rdx
  9222. sbbq %rdx, %r9
  9223. movq %r8, (%rdi)
  9224. movq 16(%rsi), %r8
  9225. movq 16(%rsp), %rdx
  9226. sbbq %rdx, %r8
  9227. movq %r9, 8(%rdi)
  9228. movq 24(%rsi), %r9
  9229. movq 24(%rsp), %rdx
  9230. sbbq %rdx, %r9
  9231. movq %r8, 16(%rdi)
  9232. movq 32(%rsi), %r8
  9233. movq 32(%rsp), %rdx
  9234. sbbq %rdx, %r8
  9235. movq %r9, 24(%rdi)
  9236. movq 40(%rsi), %r9
  9237. movq 40(%rsp), %rdx
  9238. sbbq %rdx, %r9
  9239. movq %r8, 32(%rdi)
  9240. movq 48(%rsi), %r8
  9241. movq 48(%rsp), %rdx
  9242. sbbq %rdx, %r8
  9243. movq %r9, 40(%rdi)
  9244. movq 56(%rsi), %r9
  9245. movq 56(%rsp), %rdx
  9246. sbbq %rdx, %r9
  9247. movq %r8, 48(%rdi)
  9248. movq 64(%rsi), %r8
  9249. movq 64(%rsp), %rdx
  9250. sbbq %rdx, %r8
  9251. movq %r9, 56(%rdi)
  9252. movq 72(%rsi), %r9
  9253. movq 72(%rsp), %rdx
  9254. sbbq %rdx, %r9
  9255. movq %r8, 64(%rdi)
  9256. movq 80(%rsi), %r8
  9257. movq 80(%rsp), %rdx
  9258. sbbq %rdx, %r8
  9259. movq %r9, 72(%rdi)
  9260. movq 88(%rsi), %r9
  9261. movq 88(%rsp), %rdx
  9262. sbbq %rdx, %r9
  9263. movq %r8, 80(%rdi)
  9264. movq 96(%rsi), %r8
  9265. movq 96(%rsp), %rdx
  9266. sbbq %rdx, %r8
  9267. movq %r9, 88(%rdi)
  9268. movq 104(%rsi), %r9
  9269. movq 104(%rsp), %rdx
  9270. sbbq %rdx, %r9
  9271. movq %r8, 96(%rdi)
  9272. movq 112(%rsi), %r8
  9273. movq 112(%rsp), %rdx
  9274. sbbq %rdx, %r8
  9275. movq %r9, 104(%rdi)
  9276. movq 120(%rsi), %r9
  9277. movq 120(%rsp), %rdx
  9278. sbbq %rdx, %r9
  9279. movq %r8, 112(%rdi)
  9280. movq %r9, 120(%rdi)
  9281. sbbq $0x00, %rax
  9282. addq $0x80, %rsp
  9283. repz retq
  9284. #ifndef __APPLE__
  9285. .size sp_2048_cond_sub_16,.-sp_2048_cond_sub_16
  9286. #endif /* __APPLE__ */
  9287. /* Reduce the number back to 2048 bits using Montgomery reduction.
  9288. *
  9289. * a A single precision number to reduce in place.
  9290. * m The single precision number representing the modulus.
  9291. * mp The digit representing the negative inverse of m mod 2^n.
  9292. */
  9293. #ifndef __APPLE__
  9294. .text
  9295. .globl sp_2048_mont_reduce_16
  9296. .type sp_2048_mont_reduce_16,@function
  9297. .align 16
  9298. sp_2048_mont_reduce_16:
  9299. #else
  9300. .section __TEXT,__text
  9301. .globl _sp_2048_mont_reduce_16
  9302. .p2align 4
  9303. _sp_2048_mont_reduce_16:
  9304. #endif /* __APPLE__ */
  9305. pushq %r12
  9306. pushq %r13
  9307. pushq %r14
  9308. pushq %r15
  9309. movq %rdx, %rcx
  9310. xorq %r15, %r15
  9311. # i = 16
  9312. movq $16, %r8
  9313. movq (%rdi), %r13
  9314. movq 8(%rdi), %r14
  9315. L_mont_loop_16:
  9316. # mu = a[i] * mp
  9317. movq %r13, %r11
  9318. imulq %rcx, %r11
  9319. # a[i+0] += m[0] * mu
  9320. movq %r11, %rax
  9321. xorq %r10, %r10
  9322. mulq (%rsi)
  9323. addq %rax, %r13
  9324. adcq %rdx, %r10
  9325. # a[i+1] += m[1] * mu
  9326. movq %r11, %rax
  9327. xorq %r9, %r9
  9328. mulq 8(%rsi)
  9329. movq %r14, %r13
  9330. addq %rax, %r13
  9331. adcq %rdx, %r9
  9332. addq %r10, %r13
  9333. adcq $0x00, %r9
  9334. # a[i+2] += m[2] * mu
  9335. movq %r11, %rax
  9336. xorq %r10, %r10
  9337. mulq 16(%rsi)
  9338. movq 16(%rdi), %r14
  9339. addq %rax, %r14
  9340. adcq %rdx, %r10
  9341. addq %r9, %r14
  9342. adcq $0x00, %r10
  9343. # a[i+3] += m[3] * mu
  9344. movq %r11, %rax
  9345. xorq %r9, %r9
  9346. mulq 24(%rsi)
  9347. movq 24(%rdi), %r12
  9348. addq %rax, %r12
  9349. adcq %rdx, %r9
  9350. addq %r10, %r12
  9351. movq %r12, 24(%rdi)
  9352. adcq $0x00, %r9
  9353. # a[i+4] += m[4] * mu
  9354. movq %r11, %rax
  9355. xorq %r10, %r10
  9356. mulq 32(%rsi)
  9357. movq 32(%rdi), %r12
  9358. addq %rax, %r12
  9359. adcq %rdx, %r10
  9360. addq %r9, %r12
  9361. movq %r12, 32(%rdi)
  9362. adcq $0x00, %r10
  9363. # a[i+5] += m[5] * mu
  9364. movq %r11, %rax
  9365. xorq %r9, %r9
  9366. mulq 40(%rsi)
  9367. movq 40(%rdi), %r12
  9368. addq %rax, %r12
  9369. adcq %rdx, %r9
  9370. addq %r10, %r12
  9371. movq %r12, 40(%rdi)
  9372. adcq $0x00, %r9
  9373. # a[i+6] += m[6] * mu
  9374. movq %r11, %rax
  9375. xorq %r10, %r10
  9376. mulq 48(%rsi)
  9377. movq 48(%rdi), %r12
  9378. addq %rax, %r12
  9379. adcq %rdx, %r10
  9380. addq %r9, %r12
  9381. movq %r12, 48(%rdi)
  9382. adcq $0x00, %r10
  9383. # a[i+7] += m[7] * mu
  9384. movq %r11, %rax
  9385. xorq %r9, %r9
  9386. mulq 56(%rsi)
  9387. movq 56(%rdi), %r12
  9388. addq %rax, %r12
  9389. adcq %rdx, %r9
  9390. addq %r10, %r12
  9391. movq %r12, 56(%rdi)
  9392. adcq $0x00, %r9
  9393. # a[i+8] += m[8] * mu
  9394. movq %r11, %rax
  9395. xorq %r10, %r10
  9396. mulq 64(%rsi)
  9397. movq 64(%rdi), %r12
  9398. addq %rax, %r12
  9399. adcq %rdx, %r10
  9400. addq %r9, %r12
  9401. movq %r12, 64(%rdi)
  9402. adcq $0x00, %r10
  9403. # a[i+9] += m[9] * mu
  9404. movq %r11, %rax
  9405. xorq %r9, %r9
  9406. mulq 72(%rsi)
  9407. movq 72(%rdi), %r12
  9408. addq %rax, %r12
  9409. adcq %rdx, %r9
  9410. addq %r10, %r12
  9411. movq %r12, 72(%rdi)
  9412. adcq $0x00, %r9
  9413. # a[i+10] += m[10] * mu
  9414. movq %r11, %rax
  9415. xorq %r10, %r10
  9416. mulq 80(%rsi)
  9417. movq 80(%rdi), %r12
  9418. addq %rax, %r12
  9419. adcq %rdx, %r10
  9420. addq %r9, %r12
  9421. movq %r12, 80(%rdi)
  9422. adcq $0x00, %r10
  9423. # a[i+11] += m[11] * mu
  9424. movq %r11, %rax
  9425. xorq %r9, %r9
  9426. mulq 88(%rsi)
  9427. movq 88(%rdi), %r12
  9428. addq %rax, %r12
  9429. adcq %rdx, %r9
  9430. addq %r10, %r12
  9431. movq %r12, 88(%rdi)
  9432. adcq $0x00, %r9
  9433. # a[i+12] += m[12] * mu
  9434. movq %r11, %rax
  9435. xorq %r10, %r10
  9436. mulq 96(%rsi)
  9437. movq 96(%rdi), %r12
  9438. addq %rax, %r12
  9439. adcq %rdx, %r10
  9440. addq %r9, %r12
  9441. movq %r12, 96(%rdi)
  9442. adcq $0x00, %r10
  9443. # a[i+13] += m[13] * mu
  9444. movq %r11, %rax
  9445. xorq %r9, %r9
  9446. mulq 104(%rsi)
  9447. movq 104(%rdi), %r12
  9448. addq %rax, %r12
  9449. adcq %rdx, %r9
  9450. addq %r10, %r12
  9451. movq %r12, 104(%rdi)
  9452. adcq $0x00, %r9
  9453. # a[i+14] += m[14] * mu
  9454. movq %r11, %rax
  9455. xorq %r10, %r10
  9456. mulq 112(%rsi)
  9457. movq 112(%rdi), %r12
  9458. addq %rax, %r12
  9459. adcq %rdx, %r10
  9460. addq %r9, %r12
  9461. movq %r12, 112(%rdi)
  9462. adcq $0x00, %r10
  9463. # a[i+15] += m[15] * mu
  9464. movq %r11, %rax
  9465. mulq 120(%rsi)
  9466. movq 120(%rdi), %r12
  9467. addq %rax, %r10
  9468. adcq %r15, %rdx
  9469. movq $0x00, %r15
  9470. adcq $0x00, %r15
  9471. addq %r10, %r12
  9472. movq %r12, 120(%rdi)
  9473. adcq %rdx, 128(%rdi)
  9474. adcq $0x00, %r15
  9475. # i -= 1
  9476. addq $8, %rdi
  9477. decq %r8
  9478. jnz L_mont_loop_16
  9479. movq %r13, (%rdi)
  9480. movq %r14, 8(%rdi)
  9481. negq %r15
  9482. movq %r15, %rcx
  9483. movq %rsi, %rdx
  9484. movq %rdi, %rsi
  9485. movq %rdi, %rdi
  9486. subq $0x80, %rdi
  9487. #ifndef __APPLE__
  9488. callq sp_2048_cond_sub_16@plt
  9489. #else
  9490. callq _sp_2048_cond_sub_16
  9491. #endif /* __APPLE__ */
  9492. popq %r15
  9493. popq %r14
  9494. popq %r13
  9495. popq %r12
  9496. repz retq
  9497. #ifndef __APPLE__
  9498. .size sp_2048_mont_reduce_16,.-sp_2048_mont_reduce_16
  9499. #endif /* __APPLE__ */
  9500. /* Conditionally subtract b from a using the mask m.
  9501. * m is -1 to subtract and 0 when not copying.
  9502. *
  9503. * r A single precision number representing condition subtract result.
  9504. * a A single precision number to subtract from.
  9505. * b A single precision number to subtract.
  9506. * m Mask value to apply.
  9507. */
  9508. #ifndef __APPLE__
  9509. .text
  9510. .globl sp_2048_cond_sub_avx2_16
  9511. .type sp_2048_cond_sub_avx2_16,@function
  9512. .align 16
  9513. sp_2048_cond_sub_avx2_16:
  9514. #else
  9515. .section __TEXT,__text
  9516. .globl _sp_2048_cond_sub_avx2_16
  9517. .p2align 4
  9518. _sp_2048_cond_sub_avx2_16:
  9519. #endif /* __APPLE__ */
  9520. movq $0x00, %rax
  9521. movq (%rdx), %r10
  9522. movq (%rsi), %r8
  9523. pextq %rcx, %r10, %r10
  9524. subq %r10, %r8
  9525. movq 8(%rdx), %r10
  9526. movq 8(%rsi), %r9
  9527. pextq %rcx, %r10, %r10
  9528. movq %r8, (%rdi)
  9529. sbbq %r10, %r9
  9530. movq 16(%rdx), %r8
  9531. movq 16(%rsi), %r10
  9532. pextq %rcx, %r8, %r8
  9533. movq %r9, 8(%rdi)
  9534. sbbq %r8, %r10
  9535. movq 24(%rdx), %r9
  9536. movq 24(%rsi), %r8
  9537. pextq %rcx, %r9, %r9
  9538. movq %r10, 16(%rdi)
  9539. sbbq %r9, %r8
  9540. movq 32(%rdx), %r10
  9541. movq 32(%rsi), %r9
  9542. pextq %rcx, %r10, %r10
  9543. movq %r8, 24(%rdi)
  9544. sbbq %r10, %r9
  9545. movq 40(%rdx), %r8
  9546. movq 40(%rsi), %r10
  9547. pextq %rcx, %r8, %r8
  9548. movq %r9, 32(%rdi)
  9549. sbbq %r8, %r10
  9550. movq 48(%rdx), %r9
  9551. movq 48(%rsi), %r8
  9552. pextq %rcx, %r9, %r9
  9553. movq %r10, 40(%rdi)
  9554. sbbq %r9, %r8
  9555. movq 56(%rdx), %r10
  9556. movq 56(%rsi), %r9
  9557. pextq %rcx, %r10, %r10
  9558. movq %r8, 48(%rdi)
  9559. sbbq %r10, %r9
  9560. movq 64(%rdx), %r8
  9561. movq 64(%rsi), %r10
  9562. pextq %rcx, %r8, %r8
  9563. movq %r9, 56(%rdi)
  9564. sbbq %r8, %r10
  9565. movq 72(%rdx), %r9
  9566. movq 72(%rsi), %r8
  9567. pextq %rcx, %r9, %r9
  9568. movq %r10, 64(%rdi)
  9569. sbbq %r9, %r8
  9570. movq 80(%rdx), %r10
  9571. movq 80(%rsi), %r9
  9572. pextq %rcx, %r10, %r10
  9573. movq %r8, 72(%rdi)
  9574. sbbq %r10, %r9
  9575. movq 88(%rdx), %r8
  9576. movq 88(%rsi), %r10
  9577. pextq %rcx, %r8, %r8
  9578. movq %r9, 80(%rdi)
  9579. sbbq %r8, %r10
  9580. movq 96(%rdx), %r9
  9581. movq 96(%rsi), %r8
  9582. pextq %rcx, %r9, %r9
  9583. movq %r10, 88(%rdi)
  9584. sbbq %r9, %r8
  9585. movq 104(%rdx), %r10
  9586. movq 104(%rsi), %r9
  9587. pextq %rcx, %r10, %r10
  9588. movq %r8, 96(%rdi)
  9589. sbbq %r10, %r9
  9590. movq 112(%rdx), %r8
  9591. movq 112(%rsi), %r10
  9592. pextq %rcx, %r8, %r8
  9593. movq %r9, 104(%rdi)
  9594. sbbq %r8, %r10
  9595. movq 120(%rdx), %r9
  9596. movq 120(%rsi), %r8
  9597. pextq %rcx, %r9, %r9
  9598. movq %r10, 112(%rdi)
  9599. sbbq %r9, %r8
  9600. movq %r8, 120(%rdi)
  9601. sbbq $0x00, %rax
  9602. repz retq
  9603. #ifndef __APPLE__
  9604. .size sp_2048_cond_sub_avx2_16,.-sp_2048_cond_sub_avx2_16
  9605. #endif /* __APPLE__ */
  9606. /* Mul a by digit b into r. (r = a * b)
  9607. *
  9608. * r A single precision integer.
  9609. * a A single precision integer.
  9610. * b A single precision digit.
  9611. */
  9612. #ifndef __APPLE__
  9613. .text
  9614. .globl sp_2048_mul_d_16
  9615. .type sp_2048_mul_d_16,@function
  9616. .align 16
  9617. sp_2048_mul_d_16:
  9618. #else
  9619. .section __TEXT,__text
  9620. .globl _sp_2048_mul_d_16
  9621. .p2align 4
  9622. _sp_2048_mul_d_16:
  9623. #endif /* __APPLE__ */
  9624. movq %rdx, %rcx
  9625. # A[0] * B
  9626. movq %rcx, %rax
  9627. xorq %r10, %r10
  9628. mulq (%rsi)
  9629. movq %rax, %r8
  9630. movq %rdx, %r9
  9631. movq %r8, (%rdi)
  9632. # A[1] * B
  9633. movq %rcx, %rax
  9634. xorq %r8, %r8
  9635. mulq 8(%rsi)
  9636. addq %rax, %r9
  9637. movq %r9, 8(%rdi)
  9638. adcq %rdx, %r10
  9639. adcq $0x00, %r8
  9640. # A[2] * B
  9641. movq %rcx, %rax
  9642. xorq %r9, %r9
  9643. mulq 16(%rsi)
  9644. addq %rax, %r10
  9645. movq %r10, 16(%rdi)
  9646. adcq %rdx, %r8
  9647. adcq $0x00, %r9
  9648. # A[3] * B
  9649. movq %rcx, %rax
  9650. xorq %r10, %r10
  9651. mulq 24(%rsi)
  9652. addq %rax, %r8
  9653. movq %r8, 24(%rdi)
  9654. adcq %rdx, %r9
  9655. adcq $0x00, %r10
  9656. # A[4] * B
  9657. movq %rcx, %rax
  9658. xorq %r8, %r8
  9659. mulq 32(%rsi)
  9660. addq %rax, %r9
  9661. movq %r9, 32(%rdi)
  9662. adcq %rdx, %r10
  9663. adcq $0x00, %r8
  9664. # A[5] * B
  9665. movq %rcx, %rax
  9666. xorq %r9, %r9
  9667. mulq 40(%rsi)
  9668. addq %rax, %r10
  9669. movq %r10, 40(%rdi)
  9670. adcq %rdx, %r8
  9671. adcq $0x00, %r9
  9672. # A[6] * B
  9673. movq %rcx, %rax
  9674. xorq %r10, %r10
  9675. mulq 48(%rsi)
  9676. addq %rax, %r8
  9677. movq %r8, 48(%rdi)
  9678. adcq %rdx, %r9
  9679. adcq $0x00, %r10
  9680. # A[7] * B
  9681. movq %rcx, %rax
  9682. xorq %r8, %r8
  9683. mulq 56(%rsi)
  9684. addq %rax, %r9
  9685. movq %r9, 56(%rdi)
  9686. adcq %rdx, %r10
  9687. adcq $0x00, %r8
  9688. # A[8] * B
  9689. movq %rcx, %rax
  9690. xorq %r9, %r9
  9691. mulq 64(%rsi)
  9692. addq %rax, %r10
  9693. movq %r10, 64(%rdi)
  9694. adcq %rdx, %r8
  9695. adcq $0x00, %r9
  9696. # A[9] * B
  9697. movq %rcx, %rax
  9698. xorq %r10, %r10
  9699. mulq 72(%rsi)
  9700. addq %rax, %r8
  9701. movq %r8, 72(%rdi)
  9702. adcq %rdx, %r9
  9703. adcq $0x00, %r10
  9704. # A[10] * B
  9705. movq %rcx, %rax
  9706. xorq %r8, %r8
  9707. mulq 80(%rsi)
  9708. addq %rax, %r9
  9709. movq %r9, 80(%rdi)
  9710. adcq %rdx, %r10
  9711. adcq $0x00, %r8
  9712. # A[11] * B
  9713. movq %rcx, %rax
  9714. xorq %r9, %r9
  9715. mulq 88(%rsi)
  9716. addq %rax, %r10
  9717. movq %r10, 88(%rdi)
  9718. adcq %rdx, %r8
  9719. adcq $0x00, %r9
  9720. # A[12] * B
  9721. movq %rcx, %rax
  9722. xorq %r10, %r10
  9723. mulq 96(%rsi)
  9724. addq %rax, %r8
  9725. movq %r8, 96(%rdi)
  9726. adcq %rdx, %r9
  9727. adcq $0x00, %r10
  9728. # A[13] * B
  9729. movq %rcx, %rax
  9730. xorq %r8, %r8
  9731. mulq 104(%rsi)
  9732. addq %rax, %r9
  9733. movq %r9, 104(%rdi)
  9734. adcq %rdx, %r10
  9735. adcq $0x00, %r8
  9736. # A[14] * B
  9737. movq %rcx, %rax
  9738. xorq %r9, %r9
  9739. mulq 112(%rsi)
  9740. addq %rax, %r10
  9741. movq %r10, 112(%rdi)
  9742. adcq %rdx, %r8
  9743. adcq $0x00, %r9
  9744. # A[15] * B
  9745. movq %rcx, %rax
  9746. mulq 120(%rsi)
  9747. addq %rax, %r8
  9748. adcq %rdx, %r9
  9749. movq %r8, 120(%rdi)
  9750. movq %r9, 128(%rdi)
  9751. repz retq
  9752. #ifndef __APPLE__
  9753. .size sp_2048_mul_d_16,.-sp_2048_mul_d_16
  9754. #endif /* __APPLE__ */
  9755. #ifdef HAVE_INTEL_AVX2
  9756. /* Mul a by digit b into r. (r = a * b)
  9757. *
  9758. * r A single precision integer.
  9759. * a A single precision integer.
  9760. * b A single precision digit.
  9761. */
  9762. #ifndef __APPLE__
  9763. .text
  9764. .globl sp_2048_mul_d_avx2_16
  9765. .type sp_2048_mul_d_avx2_16,@function
  9766. .align 16
  9767. sp_2048_mul_d_avx2_16:
  9768. #else
  9769. .section __TEXT,__text
  9770. .globl _sp_2048_mul_d_avx2_16
  9771. .p2align 4
  9772. _sp_2048_mul_d_avx2_16:
  9773. #endif /* __APPLE__ */
  9774. movq %rdx, %rax
  9775. # A[0] * B
  9776. movq %rax, %rdx
  9777. xorq %r11, %r11
  9778. mulxq (%rsi), %r9, %r10
  9779. movq %r9, (%rdi)
  9780. # A[1] * B
  9781. mulxq 8(%rsi), %rcx, %r8
  9782. movq %r11, %r9
  9783. adcxq %rcx, %r10
  9784. movq %r10, 8(%rdi)
  9785. adoxq %r8, %r9
  9786. # A[2] * B
  9787. mulxq 16(%rsi), %rcx, %r8
  9788. movq %r11, %r10
  9789. adcxq %rcx, %r9
  9790. movq %r9, 16(%rdi)
  9791. adoxq %r8, %r10
  9792. # A[3] * B
  9793. mulxq 24(%rsi), %rcx, %r8
  9794. movq %r11, %r9
  9795. adcxq %rcx, %r10
  9796. movq %r10, 24(%rdi)
  9797. adoxq %r8, %r9
  9798. # A[4] * B
  9799. mulxq 32(%rsi), %rcx, %r8
  9800. movq %r11, %r10
  9801. adcxq %rcx, %r9
  9802. movq %r9, 32(%rdi)
  9803. adoxq %r8, %r10
  9804. # A[5] * B
  9805. mulxq 40(%rsi), %rcx, %r8
  9806. movq %r11, %r9
  9807. adcxq %rcx, %r10
  9808. movq %r10, 40(%rdi)
  9809. adoxq %r8, %r9
  9810. # A[6] * B
  9811. mulxq 48(%rsi), %rcx, %r8
  9812. movq %r11, %r10
  9813. adcxq %rcx, %r9
  9814. movq %r9, 48(%rdi)
  9815. adoxq %r8, %r10
  9816. # A[7] * B
  9817. mulxq 56(%rsi), %rcx, %r8
  9818. movq %r11, %r9
  9819. adcxq %rcx, %r10
  9820. movq %r10, 56(%rdi)
  9821. adoxq %r8, %r9
  9822. # A[8] * B
  9823. mulxq 64(%rsi), %rcx, %r8
  9824. movq %r11, %r10
  9825. adcxq %rcx, %r9
  9826. movq %r9, 64(%rdi)
  9827. adoxq %r8, %r10
  9828. # A[9] * B
  9829. mulxq 72(%rsi), %rcx, %r8
  9830. movq %r11, %r9
  9831. adcxq %rcx, %r10
  9832. movq %r10, 72(%rdi)
  9833. adoxq %r8, %r9
  9834. # A[10] * B
  9835. mulxq 80(%rsi), %rcx, %r8
  9836. movq %r11, %r10
  9837. adcxq %rcx, %r9
  9838. movq %r9, 80(%rdi)
  9839. adoxq %r8, %r10
  9840. # A[11] * B
  9841. mulxq 88(%rsi), %rcx, %r8
  9842. movq %r11, %r9
  9843. adcxq %rcx, %r10
  9844. movq %r10, 88(%rdi)
  9845. adoxq %r8, %r9
  9846. # A[12] * B
  9847. mulxq 96(%rsi), %rcx, %r8
  9848. movq %r11, %r10
  9849. adcxq %rcx, %r9
  9850. movq %r9, 96(%rdi)
  9851. adoxq %r8, %r10
  9852. # A[13] * B
  9853. mulxq 104(%rsi), %rcx, %r8
  9854. movq %r11, %r9
  9855. adcxq %rcx, %r10
  9856. movq %r10, 104(%rdi)
  9857. adoxq %r8, %r9
  9858. # A[14] * B
  9859. mulxq 112(%rsi), %rcx, %r8
  9860. movq %r11, %r10
  9861. adcxq %rcx, %r9
  9862. movq %r9, 112(%rdi)
  9863. adoxq %r8, %r10
  9864. # A[15] * B
  9865. mulxq 120(%rsi), %rcx, %r8
  9866. movq %r11, %r9
  9867. adcxq %rcx, %r10
  9868. adoxq %r8, %r9
  9869. adcxq %r11, %r9
  9870. movq %r10, 120(%rdi)
  9871. movq %r9, 128(%rdi)
  9872. repz retq
  9873. #ifndef __APPLE__
  9874. .size sp_2048_mul_d_avx2_16,.-sp_2048_mul_d_avx2_16
  9875. #endif /* __APPLE__ */
  9876. #endif /* HAVE_INTEL_AVX2 */
  9877. /* Compare a with b in constant time.
  9878. *
  9879. * a A single precision integer.
  9880. * b A single precision integer.
  9881. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  9882. * respectively.
  9883. */
  9884. #ifndef __APPLE__
  9885. .text
  9886. .globl sp_2048_cmp_16
  9887. .type sp_2048_cmp_16,@function
  9888. .align 16
  9889. sp_2048_cmp_16:
  9890. #else
  9891. .section __TEXT,__text
  9892. .globl _sp_2048_cmp_16
  9893. .p2align 4
  9894. _sp_2048_cmp_16:
  9895. #endif /* __APPLE__ */
  9896. xorq %rcx, %rcx
  9897. movq $-1, %rdx
  9898. movq $-1, %rax
  9899. movq $0x01, %r8
  9900. movq 120(%rdi), %r9
  9901. movq 120(%rsi), %r10
  9902. andq %rdx, %r9
  9903. andq %rdx, %r10
  9904. subq %r10, %r9
  9905. cmova %r8, %rax
  9906. cmovc %rdx, %rax
  9907. cmovnz %rcx, %rdx
  9908. movq 112(%rdi), %r9
  9909. movq 112(%rsi), %r10
  9910. andq %rdx, %r9
  9911. andq %rdx, %r10
  9912. subq %r10, %r9
  9913. cmova %r8, %rax
  9914. cmovc %rdx, %rax
  9915. cmovnz %rcx, %rdx
  9916. movq 104(%rdi), %r9
  9917. movq 104(%rsi), %r10
  9918. andq %rdx, %r9
  9919. andq %rdx, %r10
  9920. subq %r10, %r9
  9921. cmova %r8, %rax
  9922. cmovc %rdx, %rax
  9923. cmovnz %rcx, %rdx
  9924. movq 96(%rdi), %r9
  9925. movq 96(%rsi), %r10
  9926. andq %rdx, %r9
  9927. andq %rdx, %r10
  9928. subq %r10, %r9
  9929. cmova %r8, %rax
  9930. cmovc %rdx, %rax
  9931. cmovnz %rcx, %rdx
  9932. movq 88(%rdi), %r9
  9933. movq 88(%rsi), %r10
  9934. andq %rdx, %r9
  9935. andq %rdx, %r10
  9936. subq %r10, %r9
  9937. cmova %r8, %rax
  9938. cmovc %rdx, %rax
  9939. cmovnz %rcx, %rdx
  9940. movq 80(%rdi), %r9
  9941. movq 80(%rsi), %r10
  9942. andq %rdx, %r9
  9943. andq %rdx, %r10
  9944. subq %r10, %r9
  9945. cmova %r8, %rax
  9946. cmovc %rdx, %rax
  9947. cmovnz %rcx, %rdx
  9948. movq 72(%rdi), %r9
  9949. movq 72(%rsi), %r10
  9950. andq %rdx, %r9
  9951. andq %rdx, %r10
  9952. subq %r10, %r9
  9953. cmova %r8, %rax
  9954. cmovc %rdx, %rax
  9955. cmovnz %rcx, %rdx
  9956. movq 64(%rdi), %r9
  9957. movq 64(%rsi), %r10
  9958. andq %rdx, %r9
  9959. andq %rdx, %r10
  9960. subq %r10, %r9
  9961. cmova %r8, %rax
  9962. cmovc %rdx, %rax
  9963. cmovnz %rcx, %rdx
  9964. movq 56(%rdi), %r9
  9965. movq 56(%rsi), %r10
  9966. andq %rdx, %r9
  9967. andq %rdx, %r10
  9968. subq %r10, %r9
  9969. cmova %r8, %rax
  9970. cmovc %rdx, %rax
  9971. cmovnz %rcx, %rdx
  9972. movq 48(%rdi), %r9
  9973. movq 48(%rsi), %r10
  9974. andq %rdx, %r9
  9975. andq %rdx, %r10
  9976. subq %r10, %r9
  9977. cmova %r8, %rax
  9978. cmovc %rdx, %rax
  9979. cmovnz %rcx, %rdx
  9980. movq 40(%rdi), %r9
  9981. movq 40(%rsi), %r10
  9982. andq %rdx, %r9
  9983. andq %rdx, %r10
  9984. subq %r10, %r9
  9985. cmova %r8, %rax
  9986. cmovc %rdx, %rax
  9987. cmovnz %rcx, %rdx
  9988. movq 32(%rdi), %r9
  9989. movq 32(%rsi), %r10
  9990. andq %rdx, %r9
  9991. andq %rdx, %r10
  9992. subq %r10, %r9
  9993. cmova %r8, %rax
  9994. cmovc %rdx, %rax
  9995. cmovnz %rcx, %rdx
  9996. movq 24(%rdi), %r9
  9997. movq 24(%rsi), %r10
  9998. andq %rdx, %r9
  9999. andq %rdx, %r10
  10000. subq %r10, %r9
  10001. cmova %r8, %rax
  10002. cmovc %rdx, %rax
  10003. cmovnz %rcx, %rdx
  10004. movq 16(%rdi), %r9
  10005. movq 16(%rsi), %r10
  10006. andq %rdx, %r9
  10007. andq %rdx, %r10
  10008. subq %r10, %r9
  10009. cmova %r8, %rax
  10010. cmovc %rdx, %rax
  10011. cmovnz %rcx, %rdx
  10012. movq 8(%rdi), %r9
  10013. movq 8(%rsi), %r10
  10014. andq %rdx, %r9
  10015. andq %rdx, %r10
  10016. subq %r10, %r9
  10017. cmova %r8, %rax
  10018. cmovc %rdx, %rax
  10019. cmovnz %rcx, %rdx
  10020. movq (%rdi), %r9
  10021. movq (%rsi), %r10
  10022. andq %rdx, %r9
  10023. andq %rdx, %r10
  10024. subq %r10, %r9
  10025. cmova %r8, %rax
  10026. cmovc %rdx, %rax
  10027. cmovnz %rcx, %rdx
  10028. xorq %rdx, %rax
  10029. repz retq
  10030. #ifndef __APPLE__
  10031. .size sp_2048_cmp_16,.-sp_2048_cmp_16
  10032. #endif /* __APPLE__ */
  10033. #ifdef HAVE_INTEL_AVX2
  10034. /* Reduce the number back to 2048 bits using Montgomery reduction.
  10035. *
  10036. * a A single precision number to reduce in place.
  10037. * m The single precision number representing the modulus.
  10038. * mp The digit representing the negative inverse of m mod 2^n.
  10039. */
  10040. #ifndef __APPLE__
  10041. .text
  10042. .globl sp_2048_mont_reduce_avx2_16
  10043. .type sp_2048_mont_reduce_avx2_16,@function
  10044. .align 16
  10045. sp_2048_mont_reduce_avx2_16:
  10046. #else
  10047. .section __TEXT,__text
  10048. .globl _sp_2048_mont_reduce_avx2_16
  10049. .p2align 4
  10050. _sp_2048_mont_reduce_avx2_16:
  10051. #endif /* __APPLE__ */
  10052. pushq %r12
  10053. pushq %r13
  10054. pushq %r14
  10055. movq %rdx, %r8
  10056. xorq %r14, %r14
  10057. # i = 16
  10058. movq $16, %r9
  10059. movq (%rdi), %r13
  10060. addq $0x40, %rdi
  10061. xorq %r12, %r12
  10062. L_mont_loop_avx2_16:
  10063. # mu = a[i] * mp
  10064. movq %r13, %rdx
  10065. movq %r13, %r10
  10066. imulq %r8, %rdx
  10067. xorq %r12, %r12
  10068. # a[i+0] += m[0] * mu
  10069. mulxq (%rsi), %rax, %rcx
  10070. movq -56(%rdi), %r13
  10071. adcxq %rax, %r10
  10072. adoxq %rcx, %r13
  10073. # a[i+1] += m[1] * mu
  10074. mulxq 8(%rsi), %rax, %rcx
  10075. movq -48(%rdi), %r10
  10076. adcxq %rax, %r13
  10077. adoxq %rcx, %r10
  10078. # a[i+2] += m[2] * mu
  10079. mulxq 16(%rsi), %rax, %rcx
  10080. movq -40(%rdi), %r11
  10081. adcxq %rax, %r10
  10082. adoxq %rcx, %r11
  10083. movq %r10, -48(%rdi)
  10084. # a[i+3] += m[3] * mu
  10085. mulxq 24(%rsi), %rax, %rcx
  10086. movq -32(%rdi), %r10
  10087. adcxq %rax, %r11
  10088. adoxq %rcx, %r10
  10089. movq %r11, -40(%rdi)
  10090. # a[i+4] += m[4] * mu
  10091. mulxq 32(%rsi), %rax, %rcx
  10092. movq -24(%rdi), %r11
  10093. adcxq %rax, %r10
  10094. adoxq %rcx, %r11
  10095. movq %r10, -32(%rdi)
  10096. # a[i+5] += m[5] * mu
  10097. mulxq 40(%rsi), %rax, %rcx
  10098. movq -16(%rdi), %r10
  10099. adcxq %rax, %r11
  10100. adoxq %rcx, %r10
  10101. movq %r11, -24(%rdi)
  10102. # a[i+6] += m[6] * mu
  10103. mulxq 48(%rsi), %rax, %rcx
  10104. movq -8(%rdi), %r11
  10105. adcxq %rax, %r10
  10106. adoxq %rcx, %r11
  10107. movq %r10, -16(%rdi)
  10108. # a[i+7] += m[7] * mu
  10109. mulxq 56(%rsi), %rax, %rcx
  10110. movq (%rdi), %r10
  10111. adcxq %rax, %r11
  10112. adoxq %rcx, %r10
  10113. movq %r11, -8(%rdi)
  10114. # a[i+8] += m[8] * mu
  10115. mulxq 64(%rsi), %rax, %rcx
  10116. movq 8(%rdi), %r11
  10117. adcxq %rax, %r10
  10118. adoxq %rcx, %r11
  10119. movq %r10, (%rdi)
  10120. # a[i+9] += m[9] * mu
  10121. mulxq 72(%rsi), %rax, %rcx
  10122. movq 16(%rdi), %r10
  10123. adcxq %rax, %r11
  10124. adoxq %rcx, %r10
  10125. movq %r11, 8(%rdi)
  10126. # a[i+10] += m[10] * mu
  10127. mulxq 80(%rsi), %rax, %rcx
  10128. movq 24(%rdi), %r11
  10129. adcxq %rax, %r10
  10130. adoxq %rcx, %r11
  10131. movq %r10, 16(%rdi)
  10132. # a[i+11] += m[11] * mu
  10133. mulxq 88(%rsi), %rax, %rcx
  10134. movq 32(%rdi), %r10
  10135. adcxq %rax, %r11
  10136. adoxq %rcx, %r10
  10137. movq %r11, 24(%rdi)
  10138. # a[i+12] += m[12] * mu
  10139. mulxq 96(%rsi), %rax, %rcx
  10140. movq 40(%rdi), %r11
  10141. adcxq %rax, %r10
  10142. adoxq %rcx, %r11
  10143. movq %r10, 32(%rdi)
  10144. # a[i+13] += m[13] * mu
  10145. mulxq 104(%rsi), %rax, %rcx
  10146. movq 48(%rdi), %r10
  10147. adcxq %rax, %r11
  10148. adoxq %rcx, %r10
  10149. movq %r11, 40(%rdi)
  10150. # a[i+14] += m[14] * mu
  10151. mulxq 112(%rsi), %rax, %rcx
  10152. movq 56(%rdi), %r11
  10153. adcxq %rax, %r10
  10154. adoxq %rcx, %r11
  10155. movq %r10, 48(%rdi)
  10156. # a[i+15] += m[15] * mu
  10157. mulxq 120(%rsi), %rax, %rcx
  10158. movq 64(%rdi), %r10
  10159. adcxq %rax, %r11
  10160. adoxq %rcx, %r10
  10161. movq %r11, 56(%rdi)
  10162. adcxq %r14, %r10
  10163. movq %r10, 64(%rdi)
  10164. movq %r12, %r14
  10165. adoxq %r12, %r14
  10166. adcxq %r12, %r14
  10167. # mu = a[i] * mp
  10168. movq %r13, %rdx
  10169. movq %r13, %r10
  10170. imulq %r8, %rdx
  10171. xorq %r12, %r12
  10172. # a[i+0] += m[0] * mu
  10173. mulxq (%rsi), %rax, %rcx
  10174. movq -48(%rdi), %r13
  10175. adcxq %rax, %r10
  10176. adoxq %rcx, %r13
  10177. # a[i+1] += m[1] * mu
  10178. mulxq 8(%rsi), %rax, %rcx
  10179. movq -40(%rdi), %r10
  10180. adcxq %rax, %r13
  10181. adoxq %rcx, %r10
  10182. # a[i+2] += m[2] * mu
  10183. mulxq 16(%rsi), %rax, %rcx
  10184. movq -32(%rdi), %r11
  10185. adcxq %rax, %r10
  10186. adoxq %rcx, %r11
  10187. movq %r10, -40(%rdi)
  10188. # a[i+3] += m[3] * mu
  10189. mulxq 24(%rsi), %rax, %rcx
  10190. movq -24(%rdi), %r10
  10191. adcxq %rax, %r11
  10192. adoxq %rcx, %r10
  10193. movq %r11, -32(%rdi)
  10194. # a[i+4] += m[4] * mu
  10195. mulxq 32(%rsi), %rax, %rcx
  10196. movq -16(%rdi), %r11
  10197. adcxq %rax, %r10
  10198. adoxq %rcx, %r11
  10199. movq %r10, -24(%rdi)
  10200. # a[i+5] += m[5] * mu
  10201. mulxq 40(%rsi), %rax, %rcx
  10202. movq -8(%rdi), %r10
  10203. adcxq %rax, %r11
  10204. adoxq %rcx, %r10
  10205. movq %r11, -16(%rdi)
  10206. # a[i+6] += m[6] * mu
  10207. mulxq 48(%rsi), %rax, %rcx
  10208. movq (%rdi), %r11
  10209. adcxq %rax, %r10
  10210. adoxq %rcx, %r11
  10211. movq %r10, -8(%rdi)
  10212. # a[i+7] += m[7] * mu
  10213. mulxq 56(%rsi), %rax, %rcx
  10214. movq 8(%rdi), %r10
  10215. adcxq %rax, %r11
  10216. adoxq %rcx, %r10
  10217. movq %r11, (%rdi)
  10218. # a[i+8] += m[8] * mu
  10219. mulxq 64(%rsi), %rax, %rcx
  10220. movq 16(%rdi), %r11
  10221. adcxq %rax, %r10
  10222. adoxq %rcx, %r11
  10223. movq %r10, 8(%rdi)
  10224. # a[i+9] += m[9] * mu
  10225. mulxq 72(%rsi), %rax, %rcx
  10226. movq 24(%rdi), %r10
  10227. adcxq %rax, %r11
  10228. adoxq %rcx, %r10
  10229. movq %r11, 16(%rdi)
  10230. # a[i+10] += m[10] * mu
  10231. mulxq 80(%rsi), %rax, %rcx
  10232. movq 32(%rdi), %r11
  10233. adcxq %rax, %r10
  10234. adoxq %rcx, %r11
  10235. movq %r10, 24(%rdi)
  10236. # a[i+11] += m[11] * mu
  10237. mulxq 88(%rsi), %rax, %rcx
  10238. movq 40(%rdi), %r10
  10239. adcxq %rax, %r11
  10240. adoxq %rcx, %r10
  10241. movq %r11, 32(%rdi)
  10242. # a[i+12] += m[12] * mu
  10243. mulxq 96(%rsi), %rax, %rcx
  10244. movq 48(%rdi), %r11
  10245. adcxq %rax, %r10
  10246. adoxq %rcx, %r11
  10247. movq %r10, 40(%rdi)
  10248. # a[i+13] += m[13] * mu
  10249. mulxq 104(%rsi), %rax, %rcx
  10250. movq 56(%rdi), %r10
  10251. adcxq %rax, %r11
  10252. adoxq %rcx, %r10
  10253. movq %r11, 48(%rdi)
  10254. # a[i+14] += m[14] * mu
  10255. mulxq 112(%rsi), %rax, %rcx
  10256. movq 64(%rdi), %r11
  10257. adcxq %rax, %r10
  10258. adoxq %rcx, %r11
  10259. movq %r10, 56(%rdi)
  10260. # a[i+15] += m[15] * mu
  10261. mulxq 120(%rsi), %rax, %rcx
  10262. movq 72(%rdi), %r10
  10263. adcxq %rax, %r11
  10264. adoxq %rcx, %r10
  10265. movq %r11, 64(%rdi)
  10266. adcxq %r14, %r10
  10267. movq %r10, 72(%rdi)
  10268. movq %r12, %r14
  10269. adoxq %r12, %r14
  10270. adcxq %r12, %r14
  10271. # a += 2
  10272. addq $16, %rdi
  10273. # i -= 2
  10274. subq $2, %r9
  10275. jnz L_mont_loop_avx2_16
  10276. subq $0x40, %rdi
  10277. negq %r14
  10278. movq %rdi, %r8
  10279. subq $0x80, %rdi
  10280. movq (%rsi), %rcx
  10281. movq %r13, %rdx
  10282. pextq %r14, %rcx, %rcx
  10283. subq %rcx, %rdx
  10284. movq 8(%rsi), %rcx
  10285. movq 8(%r8), %rax
  10286. pextq %r14, %rcx, %rcx
  10287. movq %rdx, (%rdi)
  10288. sbbq %rcx, %rax
  10289. movq 16(%rsi), %rdx
  10290. movq 16(%r8), %rcx
  10291. pextq %r14, %rdx, %rdx
  10292. movq %rax, 8(%rdi)
  10293. sbbq %rdx, %rcx
  10294. movq 24(%rsi), %rax
  10295. movq 24(%r8), %rdx
  10296. pextq %r14, %rax, %rax
  10297. movq %rcx, 16(%rdi)
  10298. sbbq %rax, %rdx
  10299. movq 32(%rsi), %rcx
  10300. movq 32(%r8), %rax
  10301. pextq %r14, %rcx, %rcx
  10302. movq %rdx, 24(%rdi)
  10303. sbbq %rcx, %rax
  10304. movq 40(%rsi), %rdx
  10305. movq 40(%r8), %rcx
  10306. pextq %r14, %rdx, %rdx
  10307. movq %rax, 32(%rdi)
  10308. sbbq %rdx, %rcx
  10309. movq 48(%rsi), %rax
  10310. movq 48(%r8), %rdx
  10311. pextq %r14, %rax, %rax
  10312. movq %rcx, 40(%rdi)
  10313. sbbq %rax, %rdx
  10314. movq 56(%rsi), %rcx
  10315. movq 56(%r8), %rax
  10316. pextq %r14, %rcx, %rcx
  10317. movq %rdx, 48(%rdi)
  10318. sbbq %rcx, %rax
  10319. movq 64(%rsi), %rdx
  10320. movq 64(%r8), %rcx
  10321. pextq %r14, %rdx, %rdx
  10322. movq %rax, 56(%rdi)
  10323. sbbq %rdx, %rcx
  10324. movq 72(%rsi), %rax
  10325. movq 72(%r8), %rdx
  10326. pextq %r14, %rax, %rax
  10327. movq %rcx, 64(%rdi)
  10328. sbbq %rax, %rdx
  10329. movq 80(%rsi), %rcx
  10330. movq 80(%r8), %rax
  10331. pextq %r14, %rcx, %rcx
  10332. movq %rdx, 72(%rdi)
  10333. sbbq %rcx, %rax
  10334. movq 88(%rsi), %rdx
  10335. movq 88(%r8), %rcx
  10336. pextq %r14, %rdx, %rdx
  10337. movq %rax, 80(%rdi)
  10338. sbbq %rdx, %rcx
  10339. movq 96(%rsi), %rax
  10340. movq 96(%r8), %rdx
  10341. pextq %r14, %rax, %rax
  10342. movq %rcx, 88(%rdi)
  10343. sbbq %rax, %rdx
  10344. movq 104(%rsi), %rcx
  10345. movq 104(%r8), %rax
  10346. pextq %r14, %rcx, %rcx
  10347. movq %rdx, 96(%rdi)
  10348. sbbq %rcx, %rax
  10349. movq 112(%rsi), %rdx
  10350. movq 112(%r8), %rcx
  10351. pextq %r14, %rdx, %rdx
  10352. movq %rax, 104(%rdi)
  10353. sbbq %rdx, %rcx
  10354. movq 120(%rsi), %rax
  10355. movq 120(%r8), %rdx
  10356. pextq %r14, %rax, %rax
  10357. movq %rcx, 112(%rdi)
  10358. sbbq %rax, %rdx
  10359. movq %rdx, 120(%rdi)
  10360. popq %r14
  10361. popq %r13
  10362. popq %r12
  10363. repz retq
  10364. #ifndef __APPLE__
  10365. .size sp_2048_mont_reduce_avx2_16,.-sp_2048_mont_reduce_avx2_16
  10366. #endif /* __APPLE__ */
  10367. #endif /* HAVE_INTEL_AVX2 */
  10368. /* Conditionally subtract b from a using the mask m.
  10369. * m is -1 to subtract and 0 when not copying.
  10370. *
  10371. * r A single precision number representing condition subtract result.
  10372. * a A single precision number to subtract from.
  10373. * b A single precision number to subtract.
  10374. * m Mask value to apply.
  10375. */
  10376. #ifndef __APPLE__
  10377. .text
  10378. .globl sp_2048_cond_sub_32
  10379. .type sp_2048_cond_sub_32,@function
  10380. .align 16
  10381. sp_2048_cond_sub_32:
  10382. #else
  10383. .section __TEXT,__text
  10384. .globl _sp_2048_cond_sub_32
  10385. .p2align 4
  10386. _sp_2048_cond_sub_32:
  10387. #endif /* __APPLE__ */
  10388. subq $0x100, %rsp
  10389. movq $0x00, %rax
  10390. movq (%rdx), %r8
  10391. movq 8(%rdx), %r9
  10392. andq %rcx, %r8
  10393. andq %rcx, %r9
  10394. movq %r8, (%rsp)
  10395. movq %r9, 8(%rsp)
  10396. movq 16(%rdx), %r8
  10397. movq 24(%rdx), %r9
  10398. andq %rcx, %r8
  10399. andq %rcx, %r9
  10400. movq %r8, 16(%rsp)
  10401. movq %r9, 24(%rsp)
  10402. movq 32(%rdx), %r8
  10403. movq 40(%rdx), %r9
  10404. andq %rcx, %r8
  10405. andq %rcx, %r9
  10406. movq %r8, 32(%rsp)
  10407. movq %r9, 40(%rsp)
  10408. movq 48(%rdx), %r8
  10409. movq 56(%rdx), %r9
  10410. andq %rcx, %r8
  10411. andq %rcx, %r9
  10412. movq %r8, 48(%rsp)
  10413. movq %r9, 56(%rsp)
  10414. movq 64(%rdx), %r8
  10415. movq 72(%rdx), %r9
  10416. andq %rcx, %r8
  10417. andq %rcx, %r9
  10418. movq %r8, 64(%rsp)
  10419. movq %r9, 72(%rsp)
  10420. movq 80(%rdx), %r8
  10421. movq 88(%rdx), %r9
  10422. andq %rcx, %r8
  10423. andq %rcx, %r9
  10424. movq %r8, 80(%rsp)
  10425. movq %r9, 88(%rsp)
  10426. movq 96(%rdx), %r8
  10427. movq 104(%rdx), %r9
  10428. andq %rcx, %r8
  10429. andq %rcx, %r9
  10430. movq %r8, 96(%rsp)
  10431. movq %r9, 104(%rsp)
  10432. movq 112(%rdx), %r8
  10433. movq 120(%rdx), %r9
  10434. andq %rcx, %r8
  10435. andq %rcx, %r9
  10436. movq %r8, 112(%rsp)
  10437. movq %r9, 120(%rsp)
  10438. movq 128(%rdx), %r8
  10439. movq 136(%rdx), %r9
  10440. andq %rcx, %r8
  10441. andq %rcx, %r9
  10442. movq %r8, 128(%rsp)
  10443. movq %r9, 136(%rsp)
  10444. movq 144(%rdx), %r8
  10445. movq 152(%rdx), %r9
  10446. andq %rcx, %r8
  10447. andq %rcx, %r9
  10448. movq %r8, 144(%rsp)
  10449. movq %r9, 152(%rsp)
  10450. movq 160(%rdx), %r8
  10451. movq 168(%rdx), %r9
  10452. andq %rcx, %r8
  10453. andq %rcx, %r9
  10454. movq %r8, 160(%rsp)
  10455. movq %r9, 168(%rsp)
  10456. movq 176(%rdx), %r8
  10457. movq 184(%rdx), %r9
  10458. andq %rcx, %r8
  10459. andq %rcx, %r9
  10460. movq %r8, 176(%rsp)
  10461. movq %r9, 184(%rsp)
  10462. movq 192(%rdx), %r8
  10463. movq 200(%rdx), %r9
  10464. andq %rcx, %r8
  10465. andq %rcx, %r9
  10466. movq %r8, 192(%rsp)
  10467. movq %r9, 200(%rsp)
  10468. movq 208(%rdx), %r8
  10469. movq 216(%rdx), %r9
  10470. andq %rcx, %r8
  10471. andq %rcx, %r9
  10472. movq %r8, 208(%rsp)
  10473. movq %r9, 216(%rsp)
  10474. movq 224(%rdx), %r8
  10475. movq 232(%rdx), %r9
  10476. andq %rcx, %r8
  10477. andq %rcx, %r9
  10478. movq %r8, 224(%rsp)
  10479. movq %r9, 232(%rsp)
  10480. movq 240(%rdx), %r8
  10481. movq 248(%rdx), %r9
  10482. andq %rcx, %r8
  10483. andq %rcx, %r9
  10484. movq %r8, 240(%rsp)
  10485. movq %r9, 248(%rsp)
  10486. movq (%rsi), %r8
  10487. movq (%rsp), %rdx
  10488. subq %rdx, %r8
  10489. movq 8(%rsi), %r9
  10490. movq 8(%rsp), %rdx
  10491. sbbq %rdx, %r9
  10492. movq %r8, (%rdi)
  10493. movq 16(%rsi), %r8
  10494. movq 16(%rsp), %rdx
  10495. sbbq %rdx, %r8
  10496. movq %r9, 8(%rdi)
  10497. movq 24(%rsi), %r9
  10498. movq 24(%rsp), %rdx
  10499. sbbq %rdx, %r9
  10500. movq %r8, 16(%rdi)
  10501. movq 32(%rsi), %r8
  10502. movq 32(%rsp), %rdx
  10503. sbbq %rdx, %r8
  10504. movq %r9, 24(%rdi)
  10505. movq 40(%rsi), %r9
  10506. movq 40(%rsp), %rdx
  10507. sbbq %rdx, %r9
  10508. movq %r8, 32(%rdi)
  10509. movq 48(%rsi), %r8
  10510. movq 48(%rsp), %rdx
  10511. sbbq %rdx, %r8
  10512. movq %r9, 40(%rdi)
  10513. movq 56(%rsi), %r9
  10514. movq 56(%rsp), %rdx
  10515. sbbq %rdx, %r9
  10516. movq %r8, 48(%rdi)
  10517. movq 64(%rsi), %r8
  10518. movq 64(%rsp), %rdx
  10519. sbbq %rdx, %r8
  10520. movq %r9, 56(%rdi)
  10521. movq 72(%rsi), %r9
  10522. movq 72(%rsp), %rdx
  10523. sbbq %rdx, %r9
  10524. movq %r8, 64(%rdi)
  10525. movq 80(%rsi), %r8
  10526. movq 80(%rsp), %rdx
  10527. sbbq %rdx, %r8
  10528. movq %r9, 72(%rdi)
  10529. movq 88(%rsi), %r9
  10530. movq 88(%rsp), %rdx
  10531. sbbq %rdx, %r9
  10532. movq %r8, 80(%rdi)
  10533. movq 96(%rsi), %r8
  10534. movq 96(%rsp), %rdx
  10535. sbbq %rdx, %r8
  10536. movq %r9, 88(%rdi)
  10537. movq 104(%rsi), %r9
  10538. movq 104(%rsp), %rdx
  10539. sbbq %rdx, %r9
  10540. movq %r8, 96(%rdi)
  10541. movq 112(%rsi), %r8
  10542. movq 112(%rsp), %rdx
  10543. sbbq %rdx, %r8
  10544. movq %r9, 104(%rdi)
  10545. movq 120(%rsi), %r9
  10546. movq 120(%rsp), %rdx
  10547. sbbq %rdx, %r9
  10548. movq %r8, 112(%rdi)
  10549. movq 128(%rsi), %r8
  10550. movq 128(%rsp), %rdx
  10551. sbbq %rdx, %r8
  10552. movq %r9, 120(%rdi)
  10553. movq 136(%rsi), %r9
  10554. movq 136(%rsp), %rdx
  10555. sbbq %rdx, %r9
  10556. movq %r8, 128(%rdi)
  10557. movq 144(%rsi), %r8
  10558. movq 144(%rsp), %rdx
  10559. sbbq %rdx, %r8
  10560. movq %r9, 136(%rdi)
  10561. movq 152(%rsi), %r9
  10562. movq 152(%rsp), %rdx
  10563. sbbq %rdx, %r9
  10564. movq %r8, 144(%rdi)
  10565. movq 160(%rsi), %r8
  10566. movq 160(%rsp), %rdx
  10567. sbbq %rdx, %r8
  10568. movq %r9, 152(%rdi)
  10569. movq 168(%rsi), %r9
  10570. movq 168(%rsp), %rdx
  10571. sbbq %rdx, %r9
  10572. movq %r8, 160(%rdi)
  10573. movq 176(%rsi), %r8
  10574. movq 176(%rsp), %rdx
  10575. sbbq %rdx, %r8
  10576. movq %r9, 168(%rdi)
  10577. movq 184(%rsi), %r9
  10578. movq 184(%rsp), %rdx
  10579. sbbq %rdx, %r9
  10580. movq %r8, 176(%rdi)
  10581. movq 192(%rsi), %r8
  10582. movq 192(%rsp), %rdx
  10583. sbbq %rdx, %r8
  10584. movq %r9, 184(%rdi)
  10585. movq 200(%rsi), %r9
  10586. movq 200(%rsp), %rdx
  10587. sbbq %rdx, %r9
  10588. movq %r8, 192(%rdi)
  10589. movq 208(%rsi), %r8
  10590. movq 208(%rsp), %rdx
  10591. sbbq %rdx, %r8
  10592. movq %r9, 200(%rdi)
  10593. movq 216(%rsi), %r9
  10594. movq 216(%rsp), %rdx
  10595. sbbq %rdx, %r9
  10596. movq %r8, 208(%rdi)
  10597. movq 224(%rsi), %r8
  10598. movq 224(%rsp), %rdx
  10599. sbbq %rdx, %r8
  10600. movq %r9, 216(%rdi)
  10601. movq 232(%rsi), %r9
  10602. movq 232(%rsp), %rdx
  10603. sbbq %rdx, %r9
  10604. movq %r8, 224(%rdi)
  10605. movq 240(%rsi), %r8
  10606. movq 240(%rsp), %rdx
  10607. sbbq %rdx, %r8
  10608. movq %r9, 232(%rdi)
  10609. movq 248(%rsi), %r9
  10610. movq 248(%rsp), %rdx
  10611. sbbq %rdx, %r9
  10612. movq %r8, 240(%rdi)
  10613. movq %r9, 248(%rdi)
  10614. sbbq $0x00, %rax
  10615. addq $0x100, %rsp
  10616. repz retq
  10617. #ifndef __APPLE__
  10618. .size sp_2048_cond_sub_32,.-sp_2048_cond_sub_32
  10619. #endif /* __APPLE__ */
  10620. /* Reduce the number back to 2048 bits using Montgomery reduction.
  10621. *
  10622. * a A single precision number to reduce in place.
  10623. * m The single precision number representing the modulus.
  10624. * mp The digit representing the negative inverse of m mod 2^n.
  10625. */
  10626. #ifndef __APPLE__
  10627. .text
  10628. .globl sp_2048_mont_reduce_32
  10629. .type sp_2048_mont_reduce_32,@function
  10630. .align 16
  10631. sp_2048_mont_reduce_32:
  10632. #else
  10633. .section __TEXT,__text
  10634. .globl _sp_2048_mont_reduce_32
  10635. .p2align 4
  10636. _sp_2048_mont_reduce_32:
  10637. #endif /* __APPLE__ */
  10638. pushq %r12
  10639. pushq %r13
  10640. pushq %r14
  10641. pushq %r15
  10642. movq %rdx, %rcx
  10643. xorq %r15, %r15
  10644. # i = 32
  10645. movq $32, %r8
  10646. movq (%rdi), %r13
  10647. movq 8(%rdi), %r14
  10648. L_mont_loop_32:
  10649. # mu = a[i] * mp
  10650. movq %r13, %r11
  10651. imulq %rcx, %r11
  10652. # a[i+0] += m[0] * mu
  10653. movq %r11, %rax
  10654. xorq %r10, %r10
  10655. mulq (%rsi)
  10656. addq %rax, %r13
  10657. adcq %rdx, %r10
  10658. # a[i+1] += m[1] * mu
  10659. movq %r11, %rax
  10660. xorq %r9, %r9
  10661. mulq 8(%rsi)
  10662. movq %r14, %r13
  10663. addq %rax, %r13
  10664. adcq %rdx, %r9
  10665. addq %r10, %r13
  10666. adcq $0x00, %r9
  10667. # a[i+2] += m[2] * mu
  10668. movq %r11, %rax
  10669. xorq %r10, %r10
  10670. mulq 16(%rsi)
  10671. movq 16(%rdi), %r14
  10672. addq %rax, %r14
  10673. adcq %rdx, %r10
  10674. addq %r9, %r14
  10675. adcq $0x00, %r10
  10676. # a[i+3] += m[3] * mu
  10677. movq %r11, %rax
  10678. xorq %r9, %r9
  10679. mulq 24(%rsi)
  10680. movq 24(%rdi), %r12
  10681. addq %rax, %r12
  10682. adcq %rdx, %r9
  10683. addq %r10, %r12
  10684. movq %r12, 24(%rdi)
  10685. adcq $0x00, %r9
  10686. # a[i+4] += m[4] * mu
  10687. movq %r11, %rax
  10688. xorq %r10, %r10
  10689. mulq 32(%rsi)
  10690. movq 32(%rdi), %r12
  10691. addq %rax, %r12
  10692. adcq %rdx, %r10
  10693. addq %r9, %r12
  10694. movq %r12, 32(%rdi)
  10695. adcq $0x00, %r10
  10696. # a[i+5] += m[5] * mu
  10697. movq %r11, %rax
  10698. xorq %r9, %r9
  10699. mulq 40(%rsi)
  10700. movq 40(%rdi), %r12
  10701. addq %rax, %r12
  10702. adcq %rdx, %r9
  10703. addq %r10, %r12
  10704. movq %r12, 40(%rdi)
  10705. adcq $0x00, %r9
  10706. # a[i+6] += m[6] * mu
  10707. movq %r11, %rax
  10708. xorq %r10, %r10
  10709. mulq 48(%rsi)
  10710. movq 48(%rdi), %r12
  10711. addq %rax, %r12
  10712. adcq %rdx, %r10
  10713. addq %r9, %r12
  10714. movq %r12, 48(%rdi)
  10715. adcq $0x00, %r10
  10716. # a[i+7] += m[7] * mu
  10717. movq %r11, %rax
  10718. xorq %r9, %r9
  10719. mulq 56(%rsi)
  10720. movq 56(%rdi), %r12
  10721. addq %rax, %r12
  10722. adcq %rdx, %r9
  10723. addq %r10, %r12
  10724. movq %r12, 56(%rdi)
  10725. adcq $0x00, %r9
  10726. # a[i+8] += m[8] * mu
  10727. movq %r11, %rax
  10728. xorq %r10, %r10
  10729. mulq 64(%rsi)
  10730. movq 64(%rdi), %r12
  10731. addq %rax, %r12
  10732. adcq %rdx, %r10
  10733. addq %r9, %r12
  10734. movq %r12, 64(%rdi)
  10735. adcq $0x00, %r10
  10736. # a[i+9] += m[9] * mu
  10737. movq %r11, %rax
  10738. xorq %r9, %r9
  10739. mulq 72(%rsi)
  10740. movq 72(%rdi), %r12
  10741. addq %rax, %r12
  10742. adcq %rdx, %r9
  10743. addq %r10, %r12
  10744. movq %r12, 72(%rdi)
  10745. adcq $0x00, %r9
  10746. # a[i+10] += m[10] * mu
  10747. movq %r11, %rax
  10748. xorq %r10, %r10
  10749. mulq 80(%rsi)
  10750. movq 80(%rdi), %r12
  10751. addq %rax, %r12
  10752. adcq %rdx, %r10
  10753. addq %r9, %r12
  10754. movq %r12, 80(%rdi)
  10755. adcq $0x00, %r10
  10756. # a[i+11] += m[11] * mu
  10757. movq %r11, %rax
  10758. xorq %r9, %r9
  10759. mulq 88(%rsi)
  10760. movq 88(%rdi), %r12
  10761. addq %rax, %r12
  10762. adcq %rdx, %r9
  10763. addq %r10, %r12
  10764. movq %r12, 88(%rdi)
  10765. adcq $0x00, %r9
  10766. # a[i+12] += m[12] * mu
  10767. movq %r11, %rax
  10768. xorq %r10, %r10
  10769. mulq 96(%rsi)
  10770. movq 96(%rdi), %r12
  10771. addq %rax, %r12
  10772. adcq %rdx, %r10
  10773. addq %r9, %r12
  10774. movq %r12, 96(%rdi)
  10775. adcq $0x00, %r10
  10776. # a[i+13] += m[13] * mu
  10777. movq %r11, %rax
  10778. xorq %r9, %r9
  10779. mulq 104(%rsi)
  10780. movq 104(%rdi), %r12
  10781. addq %rax, %r12
  10782. adcq %rdx, %r9
  10783. addq %r10, %r12
  10784. movq %r12, 104(%rdi)
  10785. adcq $0x00, %r9
  10786. # a[i+14] += m[14] * mu
  10787. movq %r11, %rax
  10788. xorq %r10, %r10
  10789. mulq 112(%rsi)
  10790. movq 112(%rdi), %r12
  10791. addq %rax, %r12
  10792. adcq %rdx, %r10
  10793. addq %r9, %r12
  10794. movq %r12, 112(%rdi)
  10795. adcq $0x00, %r10
  10796. # a[i+15] += m[15] * mu
  10797. movq %r11, %rax
  10798. xorq %r9, %r9
  10799. mulq 120(%rsi)
  10800. movq 120(%rdi), %r12
  10801. addq %rax, %r12
  10802. adcq %rdx, %r9
  10803. addq %r10, %r12
  10804. movq %r12, 120(%rdi)
  10805. adcq $0x00, %r9
  10806. # a[i+16] += m[16] * mu
  10807. movq %r11, %rax
  10808. xorq %r10, %r10
  10809. mulq 128(%rsi)
  10810. movq 128(%rdi), %r12
  10811. addq %rax, %r12
  10812. adcq %rdx, %r10
  10813. addq %r9, %r12
  10814. movq %r12, 128(%rdi)
  10815. adcq $0x00, %r10
  10816. # a[i+17] += m[17] * mu
  10817. movq %r11, %rax
  10818. xorq %r9, %r9
  10819. mulq 136(%rsi)
  10820. movq 136(%rdi), %r12
  10821. addq %rax, %r12
  10822. adcq %rdx, %r9
  10823. addq %r10, %r12
  10824. movq %r12, 136(%rdi)
  10825. adcq $0x00, %r9
  10826. # a[i+18] += m[18] * mu
  10827. movq %r11, %rax
  10828. xorq %r10, %r10
  10829. mulq 144(%rsi)
  10830. movq 144(%rdi), %r12
  10831. addq %rax, %r12
  10832. adcq %rdx, %r10
  10833. addq %r9, %r12
  10834. movq %r12, 144(%rdi)
  10835. adcq $0x00, %r10
  10836. # a[i+19] += m[19] * mu
  10837. movq %r11, %rax
  10838. xorq %r9, %r9
  10839. mulq 152(%rsi)
  10840. movq 152(%rdi), %r12
  10841. addq %rax, %r12
  10842. adcq %rdx, %r9
  10843. addq %r10, %r12
  10844. movq %r12, 152(%rdi)
  10845. adcq $0x00, %r9
  10846. # a[i+20] += m[20] * mu
  10847. movq %r11, %rax
  10848. xorq %r10, %r10
  10849. mulq 160(%rsi)
  10850. movq 160(%rdi), %r12
  10851. addq %rax, %r12
  10852. adcq %rdx, %r10
  10853. addq %r9, %r12
  10854. movq %r12, 160(%rdi)
  10855. adcq $0x00, %r10
  10856. # a[i+21] += m[21] * mu
  10857. movq %r11, %rax
  10858. xorq %r9, %r9
  10859. mulq 168(%rsi)
  10860. movq 168(%rdi), %r12
  10861. addq %rax, %r12
  10862. adcq %rdx, %r9
  10863. addq %r10, %r12
  10864. movq %r12, 168(%rdi)
  10865. adcq $0x00, %r9
  10866. # a[i+22] += m[22] * mu
  10867. movq %r11, %rax
  10868. xorq %r10, %r10
  10869. mulq 176(%rsi)
  10870. movq 176(%rdi), %r12
  10871. addq %rax, %r12
  10872. adcq %rdx, %r10
  10873. addq %r9, %r12
  10874. movq %r12, 176(%rdi)
  10875. adcq $0x00, %r10
  10876. # a[i+23] += m[23] * mu
  10877. movq %r11, %rax
  10878. xorq %r9, %r9
  10879. mulq 184(%rsi)
  10880. movq 184(%rdi), %r12
  10881. addq %rax, %r12
  10882. adcq %rdx, %r9
  10883. addq %r10, %r12
  10884. movq %r12, 184(%rdi)
  10885. adcq $0x00, %r9
  10886. # a[i+24] += m[24] * mu
  10887. movq %r11, %rax
  10888. xorq %r10, %r10
  10889. mulq 192(%rsi)
  10890. movq 192(%rdi), %r12
  10891. addq %rax, %r12
  10892. adcq %rdx, %r10
  10893. addq %r9, %r12
  10894. movq %r12, 192(%rdi)
  10895. adcq $0x00, %r10
  10896. # a[i+25] += m[25] * mu
  10897. movq %r11, %rax
  10898. xorq %r9, %r9
  10899. mulq 200(%rsi)
  10900. movq 200(%rdi), %r12
  10901. addq %rax, %r12
  10902. adcq %rdx, %r9
  10903. addq %r10, %r12
  10904. movq %r12, 200(%rdi)
  10905. adcq $0x00, %r9
  10906. # a[i+26] += m[26] * mu
  10907. movq %r11, %rax
  10908. xorq %r10, %r10
  10909. mulq 208(%rsi)
  10910. movq 208(%rdi), %r12
  10911. addq %rax, %r12
  10912. adcq %rdx, %r10
  10913. addq %r9, %r12
  10914. movq %r12, 208(%rdi)
  10915. adcq $0x00, %r10
  10916. # a[i+27] += m[27] * mu
  10917. movq %r11, %rax
  10918. xorq %r9, %r9
  10919. mulq 216(%rsi)
  10920. movq 216(%rdi), %r12
  10921. addq %rax, %r12
  10922. adcq %rdx, %r9
  10923. addq %r10, %r12
  10924. movq %r12, 216(%rdi)
  10925. adcq $0x00, %r9
  10926. # a[i+28] += m[28] * mu
  10927. movq %r11, %rax
  10928. xorq %r10, %r10
  10929. mulq 224(%rsi)
  10930. movq 224(%rdi), %r12
  10931. addq %rax, %r12
  10932. adcq %rdx, %r10
  10933. addq %r9, %r12
  10934. movq %r12, 224(%rdi)
  10935. adcq $0x00, %r10
  10936. # a[i+29] += m[29] * mu
  10937. movq %r11, %rax
  10938. xorq %r9, %r9
  10939. mulq 232(%rsi)
  10940. movq 232(%rdi), %r12
  10941. addq %rax, %r12
  10942. adcq %rdx, %r9
  10943. addq %r10, %r12
  10944. movq %r12, 232(%rdi)
  10945. adcq $0x00, %r9
  10946. # a[i+30] += m[30] * mu
  10947. movq %r11, %rax
  10948. xorq %r10, %r10
  10949. mulq 240(%rsi)
  10950. movq 240(%rdi), %r12
  10951. addq %rax, %r12
  10952. adcq %rdx, %r10
  10953. addq %r9, %r12
  10954. movq %r12, 240(%rdi)
  10955. adcq $0x00, %r10
  10956. # a[i+31] += m[31] * mu
  10957. movq %r11, %rax
  10958. mulq 248(%rsi)
  10959. movq 248(%rdi), %r12
  10960. addq %rax, %r10
  10961. adcq %r15, %rdx
  10962. movq $0x00, %r15
  10963. adcq $0x00, %r15
  10964. addq %r10, %r12
  10965. movq %r12, 248(%rdi)
  10966. adcq %rdx, 256(%rdi)
  10967. adcq $0x00, %r15
  10968. # i -= 1
  10969. addq $8, %rdi
  10970. decq %r8
  10971. jnz L_mont_loop_32
  10972. movq %r13, (%rdi)
  10973. movq %r14, 8(%rdi)
  10974. negq %r15
  10975. movq %r15, %rcx
  10976. movq %rsi, %rdx
  10977. movq %rdi, %rsi
  10978. movq %rdi, %rdi
  10979. subq $0x100, %rdi
  10980. #ifndef __APPLE__
  10981. callq sp_2048_cond_sub_32@plt
  10982. #else
  10983. callq _sp_2048_cond_sub_32
  10984. #endif /* __APPLE__ */
  10985. popq %r15
  10986. popq %r14
  10987. popq %r13
  10988. popq %r12
  10989. repz retq
  10990. #ifndef __APPLE__
  10991. .size sp_2048_mont_reduce_32,.-sp_2048_mont_reduce_32
  10992. #endif /* __APPLE__ */
  10993. /* Conditionally subtract b from a using the mask m.
  10994. * m is -1 to subtract and 0 when not copying.
  10995. *
  10996. * r A single precision number representing condition subtract result.
  10997. * a A single precision number to subtract from.
  10998. * b A single precision number to subtract.
  10999. * m Mask value to apply.
  11000. */
  11001. #ifndef __APPLE__
  11002. .text
  11003. .globl sp_2048_cond_sub_avx2_32
  11004. .type sp_2048_cond_sub_avx2_32,@function
  11005. .align 16
  11006. sp_2048_cond_sub_avx2_32:
  11007. #else
  11008. .section __TEXT,__text
  11009. .globl _sp_2048_cond_sub_avx2_32
  11010. .p2align 4
  11011. _sp_2048_cond_sub_avx2_32:
  11012. #endif /* __APPLE__ */
  11013. movq $0x00, %rax
  11014. movq (%rdx), %r10
  11015. movq (%rsi), %r8
  11016. pextq %rcx, %r10, %r10
  11017. subq %r10, %r8
  11018. movq 8(%rdx), %r10
  11019. movq 8(%rsi), %r9
  11020. pextq %rcx, %r10, %r10
  11021. movq %r8, (%rdi)
  11022. sbbq %r10, %r9
  11023. movq 16(%rdx), %r8
  11024. movq 16(%rsi), %r10
  11025. pextq %rcx, %r8, %r8
  11026. movq %r9, 8(%rdi)
  11027. sbbq %r8, %r10
  11028. movq 24(%rdx), %r9
  11029. movq 24(%rsi), %r8
  11030. pextq %rcx, %r9, %r9
  11031. movq %r10, 16(%rdi)
  11032. sbbq %r9, %r8
  11033. movq 32(%rdx), %r10
  11034. movq 32(%rsi), %r9
  11035. pextq %rcx, %r10, %r10
  11036. movq %r8, 24(%rdi)
  11037. sbbq %r10, %r9
  11038. movq 40(%rdx), %r8
  11039. movq 40(%rsi), %r10
  11040. pextq %rcx, %r8, %r8
  11041. movq %r9, 32(%rdi)
  11042. sbbq %r8, %r10
  11043. movq 48(%rdx), %r9
  11044. movq 48(%rsi), %r8
  11045. pextq %rcx, %r9, %r9
  11046. movq %r10, 40(%rdi)
  11047. sbbq %r9, %r8
  11048. movq 56(%rdx), %r10
  11049. movq 56(%rsi), %r9
  11050. pextq %rcx, %r10, %r10
  11051. movq %r8, 48(%rdi)
  11052. sbbq %r10, %r9
  11053. movq 64(%rdx), %r8
  11054. movq 64(%rsi), %r10
  11055. pextq %rcx, %r8, %r8
  11056. movq %r9, 56(%rdi)
  11057. sbbq %r8, %r10
  11058. movq 72(%rdx), %r9
  11059. movq 72(%rsi), %r8
  11060. pextq %rcx, %r9, %r9
  11061. movq %r10, 64(%rdi)
  11062. sbbq %r9, %r8
  11063. movq 80(%rdx), %r10
  11064. movq 80(%rsi), %r9
  11065. pextq %rcx, %r10, %r10
  11066. movq %r8, 72(%rdi)
  11067. sbbq %r10, %r9
  11068. movq 88(%rdx), %r8
  11069. movq 88(%rsi), %r10
  11070. pextq %rcx, %r8, %r8
  11071. movq %r9, 80(%rdi)
  11072. sbbq %r8, %r10
  11073. movq 96(%rdx), %r9
  11074. movq 96(%rsi), %r8
  11075. pextq %rcx, %r9, %r9
  11076. movq %r10, 88(%rdi)
  11077. sbbq %r9, %r8
  11078. movq 104(%rdx), %r10
  11079. movq 104(%rsi), %r9
  11080. pextq %rcx, %r10, %r10
  11081. movq %r8, 96(%rdi)
  11082. sbbq %r10, %r9
  11083. movq 112(%rdx), %r8
  11084. movq 112(%rsi), %r10
  11085. pextq %rcx, %r8, %r8
  11086. movq %r9, 104(%rdi)
  11087. sbbq %r8, %r10
  11088. movq 120(%rdx), %r9
  11089. movq 120(%rsi), %r8
  11090. pextq %rcx, %r9, %r9
  11091. movq %r10, 112(%rdi)
  11092. sbbq %r9, %r8
  11093. movq 128(%rdx), %r10
  11094. movq 128(%rsi), %r9
  11095. pextq %rcx, %r10, %r10
  11096. movq %r8, 120(%rdi)
  11097. sbbq %r10, %r9
  11098. movq 136(%rdx), %r8
  11099. movq 136(%rsi), %r10
  11100. pextq %rcx, %r8, %r8
  11101. movq %r9, 128(%rdi)
  11102. sbbq %r8, %r10
  11103. movq 144(%rdx), %r9
  11104. movq 144(%rsi), %r8
  11105. pextq %rcx, %r9, %r9
  11106. movq %r10, 136(%rdi)
  11107. sbbq %r9, %r8
  11108. movq 152(%rdx), %r10
  11109. movq 152(%rsi), %r9
  11110. pextq %rcx, %r10, %r10
  11111. movq %r8, 144(%rdi)
  11112. sbbq %r10, %r9
  11113. movq 160(%rdx), %r8
  11114. movq 160(%rsi), %r10
  11115. pextq %rcx, %r8, %r8
  11116. movq %r9, 152(%rdi)
  11117. sbbq %r8, %r10
  11118. movq 168(%rdx), %r9
  11119. movq 168(%rsi), %r8
  11120. pextq %rcx, %r9, %r9
  11121. movq %r10, 160(%rdi)
  11122. sbbq %r9, %r8
  11123. movq 176(%rdx), %r10
  11124. movq 176(%rsi), %r9
  11125. pextq %rcx, %r10, %r10
  11126. movq %r8, 168(%rdi)
  11127. sbbq %r10, %r9
  11128. movq 184(%rdx), %r8
  11129. movq 184(%rsi), %r10
  11130. pextq %rcx, %r8, %r8
  11131. movq %r9, 176(%rdi)
  11132. sbbq %r8, %r10
  11133. movq 192(%rdx), %r9
  11134. movq 192(%rsi), %r8
  11135. pextq %rcx, %r9, %r9
  11136. movq %r10, 184(%rdi)
  11137. sbbq %r9, %r8
  11138. movq 200(%rdx), %r10
  11139. movq 200(%rsi), %r9
  11140. pextq %rcx, %r10, %r10
  11141. movq %r8, 192(%rdi)
  11142. sbbq %r10, %r9
  11143. movq 208(%rdx), %r8
  11144. movq 208(%rsi), %r10
  11145. pextq %rcx, %r8, %r8
  11146. movq %r9, 200(%rdi)
  11147. sbbq %r8, %r10
  11148. movq 216(%rdx), %r9
  11149. movq 216(%rsi), %r8
  11150. pextq %rcx, %r9, %r9
  11151. movq %r10, 208(%rdi)
  11152. sbbq %r9, %r8
  11153. movq 224(%rdx), %r10
  11154. movq 224(%rsi), %r9
  11155. pextq %rcx, %r10, %r10
  11156. movq %r8, 216(%rdi)
  11157. sbbq %r10, %r9
  11158. movq 232(%rdx), %r8
  11159. movq 232(%rsi), %r10
  11160. pextq %rcx, %r8, %r8
  11161. movq %r9, 224(%rdi)
  11162. sbbq %r8, %r10
  11163. movq 240(%rdx), %r9
  11164. movq 240(%rsi), %r8
  11165. pextq %rcx, %r9, %r9
  11166. movq %r10, 232(%rdi)
  11167. sbbq %r9, %r8
  11168. movq 248(%rdx), %r10
  11169. movq 248(%rsi), %r9
  11170. pextq %rcx, %r10, %r10
  11171. movq %r8, 240(%rdi)
  11172. sbbq %r10, %r9
  11173. movq %r9, 248(%rdi)
  11174. sbbq $0x00, %rax
  11175. repz retq
  11176. #ifndef __APPLE__
  11177. .size sp_2048_cond_sub_avx2_32,.-sp_2048_cond_sub_avx2_32
  11178. #endif /* __APPLE__ */
  11179. #ifdef HAVE_INTEL_AVX2
  11180. /* Mul a by digit b into r. (r = a * b)
  11181. *
  11182. * r A single precision integer.
  11183. * a A single precision integer.
  11184. * b A single precision digit.
  11185. */
  11186. #ifndef __APPLE__
  11187. .text
  11188. .globl sp_2048_mul_d_avx2_32
  11189. .type sp_2048_mul_d_avx2_32,@function
  11190. .align 16
  11191. sp_2048_mul_d_avx2_32:
  11192. #else
  11193. .section __TEXT,__text
  11194. .globl _sp_2048_mul_d_avx2_32
  11195. .p2align 4
  11196. _sp_2048_mul_d_avx2_32:
  11197. #endif /* __APPLE__ */
  11198. movq %rdx, %rax
  11199. # A[0] * B
  11200. movq %rax, %rdx
  11201. xorq %r11, %r11
  11202. mulxq (%rsi), %r9, %r10
  11203. movq %r9, (%rdi)
  11204. # A[1] * B
  11205. mulxq 8(%rsi), %rcx, %r8
  11206. movq %r11, %r9
  11207. adcxq %rcx, %r10
  11208. movq %r10, 8(%rdi)
  11209. adoxq %r8, %r9
  11210. # A[2] * B
  11211. mulxq 16(%rsi), %rcx, %r8
  11212. movq %r11, %r10
  11213. adcxq %rcx, %r9
  11214. movq %r9, 16(%rdi)
  11215. adoxq %r8, %r10
  11216. # A[3] * B
  11217. mulxq 24(%rsi), %rcx, %r8
  11218. movq %r11, %r9
  11219. adcxq %rcx, %r10
  11220. movq %r10, 24(%rdi)
  11221. adoxq %r8, %r9
  11222. # A[4] * B
  11223. mulxq 32(%rsi), %rcx, %r8
  11224. movq %r11, %r10
  11225. adcxq %rcx, %r9
  11226. movq %r9, 32(%rdi)
  11227. adoxq %r8, %r10
  11228. # A[5] * B
  11229. mulxq 40(%rsi), %rcx, %r8
  11230. movq %r11, %r9
  11231. adcxq %rcx, %r10
  11232. movq %r10, 40(%rdi)
  11233. adoxq %r8, %r9
  11234. # A[6] * B
  11235. mulxq 48(%rsi), %rcx, %r8
  11236. movq %r11, %r10
  11237. adcxq %rcx, %r9
  11238. movq %r9, 48(%rdi)
  11239. adoxq %r8, %r10
  11240. # A[7] * B
  11241. mulxq 56(%rsi), %rcx, %r8
  11242. movq %r11, %r9
  11243. adcxq %rcx, %r10
  11244. movq %r10, 56(%rdi)
  11245. adoxq %r8, %r9
  11246. # A[8] * B
  11247. mulxq 64(%rsi), %rcx, %r8
  11248. movq %r11, %r10
  11249. adcxq %rcx, %r9
  11250. movq %r9, 64(%rdi)
  11251. adoxq %r8, %r10
  11252. # A[9] * B
  11253. mulxq 72(%rsi), %rcx, %r8
  11254. movq %r11, %r9
  11255. adcxq %rcx, %r10
  11256. movq %r10, 72(%rdi)
  11257. adoxq %r8, %r9
  11258. # A[10] * B
  11259. mulxq 80(%rsi), %rcx, %r8
  11260. movq %r11, %r10
  11261. adcxq %rcx, %r9
  11262. movq %r9, 80(%rdi)
  11263. adoxq %r8, %r10
  11264. # A[11] * B
  11265. mulxq 88(%rsi), %rcx, %r8
  11266. movq %r11, %r9
  11267. adcxq %rcx, %r10
  11268. movq %r10, 88(%rdi)
  11269. adoxq %r8, %r9
  11270. # A[12] * B
  11271. mulxq 96(%rsi), %rcx, %r8
  11272. movq %r11, %r10
  11273. adcxq %rcx, %r9
  11274. movq %r9, 96(%rdi)
  11275. adoxq %r8, %r10
  11276. # A[13] * B
  11277. mulxq 104(%rsi), %rcx, %r8
  11278. movq %r11, %r9
  11279. adcxq %rcx, %r10
  11280. movq %r10, 104(%rdi)
  11281. adoxq %r8, %r9
  11282. # A[14] * B
  11283. mulxq 112(%rsi), %rcx, %r8
  11284. movq %r11, %r10
  11285. adcxq %rcx, %r9
  11286. movq %r9, 112(%rdi)
  11287. adoxq %r8, %r10
  11288. # A[15] * B
  11289. mulxq 120(%rsi), %rcx, %r8
  11290. movq %r11, %r9
  11291. adcxq %rcx, %r10
  11292. movq %r10, 120(%rdi)
  11293. adoxq %r8, %r9
  11294. # A[16] * B
  11295. mulxq 128(%rsi), %rcx, %r8
  11296. movq %r11, %r10
  11297. adcxq %rcx, %r9
  11298. movq %r9, 128(%rdi)
  11299. adoxq %r8, %r10
  11300. # A[17] * B
  11301. mulxq 136(%rsi), %rcx, %r8
  11302. movq %r11, %r9
  11303. adcxq %rcx, %r10
  11304. movq %r10, 136(%rdi)
  11305. adoxq %r8, %r9
  11306. # A[18] * B
  11307. mulxq 144(%rsi), %rcx, %r8
  11308. movq %r11, %r10
  11309. adcxq %rcx, %r9
  11310. movq %r9, 144(%rdi)
  11311. adoxq %r8, %r10
  11312. # A[19] * B
  11313. mulxq 152(%rsi), %rcx, %r8
  11314. movq %r11, %r9
  11315. adcxq %rcx, %r10
  11316. movq %r10, 152(%rdi)
  11317. adoxq %r8, %r9
  11318. # A[20] * B
  11319. mulxq 160(%rsi), %rcx, %r8
  11320. movq %r11, %r10
  11321. adcxq %rcx, %r9
  11322. movq %r9, 160(%rdi)
  11323. adoxq %r8, %r10
  11324. # A[21] * B
  11325. mulxq 168(%rsi), %rcx, %r8
  11326. movq %r11, %r9
  11327. adcxq %rcx, %r10
  11328. movq %r10, 168(%rdi)
  11329. adoxq %r8, %r9
  11330. # A[22] * B
  11331. mulxq 176(%rsi), %rcx, %r8
  11332. movq %r11, %r10
  11333. adcxq %rcx, %r9
  11334. movq %r9, 176(%rdi)
  11335. adoxq %r8, %r10
  11336. # A[23] * B
  11337. mulxq 184(%rsi), %rcx, %r8
  11338. movq %r11, %r9
  11339. adcxq %rcx, %r10
  11340. movq %r10, 184(%rdi)
  11341. adoxq %r8, %r9
  11342. # A[24] * B
  11343. mulxq 192(%rsi), %rcx, %r8
  11344. movq %r11, %r10
  11345. adcxq %rcx, %r9
  11346. movq %r9, 192(%rdi)
  11347. adoxq %r8, %r10
  11348. # A[25] * B
  11349. mulxq 200(%rsi), %rcx, %r8
  11350. movq %r11, %r9
  11351. adcxq %rcx, %r10
  11352. movq %r10, 200(%rdi)
  11353. adoxq %r8, %r9
  11354. # A[26] * B
  11355. mulxq 208(%rsi), %rcx, %r8
  11356. movq %r11, %r10
  11357. adcxq %rcx, %r9
  11358. movq %r9, 208(%rdi)
  11359. adoxq %r8, %r10
  11360. # A[27] * B
  11361. mulxq 216(%rsi), %rcx, %r8
  11362. movq %r11, %r9
  11363. adcxq %rcx, %r10
  11364. movq %r10, 216(%rdi)
  11365. adoxq %r8, %r9
  11366. # A[28] * B
  11367. mulxq 224(%rsi), %rcx, %r8
  11368. movq %r11, %r10
  11369. adcxq %rcx, %r9
  11370. movq %r9, 224(%rdi)
  11371. adoxq %r8, %r10
  11372. # A[29] * B
  11373. mulxq 232(%rsi), %rcx, %r8
  11374. movq %r11, %r9
  11375. adcxq %rcx, %r10
  11376. movq %r10, 232(%rdi)
  11377. adoxq %r8, %r9
  11378. # A[30] * B
  11379. mulxq 240(%rsi), %rcx, %r8
  11380. movq %r11, %r10
  11381. adcxq %rcx, %r9
  11382. movq %r9, 240(%rdi)
  11383. adoxq %r8, %r10
  11384. # A[31] * B
  11385. mulxq 248(%rsi), %rcx, %r8
  11386. movq %r11, %r9
  11387. adcxq %rcx, %r10
  11388. adoxq %r8, %r9
  11389. adcxq %r11, %r9
  11390. movq %r10, 248(%rdi)
  11391. movq %r9, 256(%rdi)
  11392. repz retq
  11393. #ifndef __APPLE__
  11394. .size sp_2048_mul_d_avx2_32,.-sp_2048_mul_d_avx2_32
  11395. #endif /* __APPLE__ */
  11396. #endif /* HAVE_INTEL_AVX2 */
  11397. /* Compare a with b in constant time.
  11398. *
  11399. * a A single precision integer.
  11400. * b A single precision integer.
  11401. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  11402. * respectively.
  11403. */
  11404. #ifndef __APPLE__
  11405. .text
  11406. .globl sp_2048_cmp_32
  11407. .type sp_2048_cmp_32,@function
  11408. .align 16
  11409. sp_2048_cmp_32:
  11410. #else
  11411. .section __TEXT,__text
  11412. .globl _sp_2048_cmp_32
  11413. .p2align 4
  11414. _sp_2048_cmp_32:
  11415. #endif /* __APPLE__ */
  11416. xorq %rcx, %rcx
  11417. movq $-1, %rdx
  11418. movq $-1, %rax
  11419. movq $0x01, %r8
  11420. movq 248(%rdi), %r9
  11421. movq 248(%rsi), %r10
  11422. andq %rdx, %r9
  11423. andq %rdx, %r10
  11424. subq %r10, %r9
  11425. cmova %r8, %rax
  11426. cmovc %rdx, %rax
  11427. cmovnz %rcx, %rdx
  11428. movq 240(%rdi), %r9
  11429. movq 240(%rsi), %r10
  11430. andq %rdx, %r9
  11431. andq %rdx, %r10
  11432. subq %r10, %r9
  11433. cmova %r8, %rax
  11434. cmovc %rdx, %rax
  11435. cmovnz %rcx, %rdx
  11436. movq 232(%rdi), %r9
  11437. movq 232(%rsi), %r10
  11438. andq %rdx, %r9
  11439. andq %rdx, %r10
  11440. subq %r10, %r9
  11441. cmova %r8, %rax
  11442. cmovc %rdx, %rax
  11443. cmovnz %rcx, %rdx
  11444. movq 224(%rdi), %r9
  11445. movq 224(%rsi), %r10
  11446. andq %rdx, %r9
  11447. andq %rdx, %r10
  11448. subq %r10, %r9
  11449. cmova %r8, %rax
  11450. cmovc %rdx, %rax
  11451. cmovnz %rcx, %rdx
  11452. movq 216(%rdi), %r9
  11453. movq 216(%rsi), %r10
  11454. andq %rdx, %r9
  11455. andq %rdx, %r10
  11456. subq %r10, %r9
  11457. cmova %r8, %rax
  11458. cmovc %rdx, %rax
  11459. cmovnz %rcx, %rdx
  11460. movq 208(%rdi), %r9
  11461. movq 208(%rsi), %r10
  11462. andq %rdx, %r9
  11463. andq %rdx, %r10
  11464. subq %r10, %r9
  11465. cmova %r8, %rax
  11466. cmovc %rdx, %rax
  11467. cmovnz %rcx, %rdx
  11468. movq 200(%rdi), %r9
  11469. movq 200(%rsi), %r10
  11470. andq %rdx, %r9
  11471. andq %rdx, %r10
  11472. subq %r10, %r9
  11473. cmova %r8, %rax
  11474. cmovc %rdx, %rax
  11475. cmovnz %rcx, %rdx
  11476. movq 192(%rdi), %r9
  11477. movq 192(%rsi), %r10
  11478. andq %rdx, %r9
  11479. andq %rdx, %r10
  11480. subq %r10, %r9
  11481. cmova %r8, %rax
  11482. cmovc %rdx, %rax
  11483. cmovnz %rcx, %rdx
  11484. movq 184(%rdi), %r9
  11485. movq 184(%rsi), %r10
  11486. andq %rdx, %r9
  11487. andq %rdx, %r10
  11488. subq %r10, %r9
  11489. cmova %r8, %rax
  11490. cmovc %rdx, %rax
  11491. cmovnz %rcx, %rdx
  11492. movq 176(%rdi), %r9
  11493. movq 176(%rsi), %r10
  11494. andq %rdx, %r9
  11495. andq %rdx, %r10
  11496. subq %r10, %r9
  11497. cmova %r8, %rax
  11498. cmovc %rdx, %rax
  11499. cmovnz %rcx, %rdx
  11500. movq 168(%rdi), %r9
  11501. movq 168(%rsi), %r10
  11502. andq %rdx, %r9
  11503. andq %rdx, %r10
  11504. subq %r10, %r9
  11505. cmova %r8, %rax
  11506. cmovc %rdx, %rax
  11507. cmovnz %rcx, %rdx
  11508. movq 160(%rdi), %r9
  11509. movq 160(%rsi), %r10
  11510. andq %rdx, %r9
  11511. andq %rdx, %r10
  11512. subq %r10, %r9
  11513. cmova %r8, %rax
  11514. cmovc %rdx, %rax
  11515. cmovnz %rcx, %rdx
  11516. movq 152(%rdi), %r9
  11517. movq 152(%rsi), %r10
  11518. andq %rdx, %r9
  11519. andq %rdx, %r10
  11520. subq %r10, %r9
  11521. cmova %r8, %rax
  11522. cmovc %rdx, %rax
  11523. cmovnz %rcx, %rdx
  11524. movq 144(%rdi), %r9
  11525. movq 144(%rsi), %r10
  11526. andq %rdx, %r9
  11527. andq %rdx, %r10
  11528. subq %r10, %r9
  11529. cmova %r8, %rax
  11530. cmovc %rdx, %rax
  11531. cmovnz %rcx, %rdx
  11532. movq 136(%rdi), %r9
  11533. movq 136(%rsi), %r10
  11534. andq %rdx, %r9
  11535. andq %rdx, %r10
  11536. subq %r10, %r9
  11537. cmova %r8, %rax
  11538. cmovc %rdx, %rax
  11539. cmovnz %rcx, %rdx
  11540. movq 128(%rdi), %r9
  11541. movq 128(%rsi), %r10
  11542. andq %rdx, %r9
  11543. andq %rdx, %r10
  11544. subq %r10, %r9
  11545. cmova %r8, %rax
  11546. cmovc %rdx, %rax
  11547. cmovnz %rcx, %rdx
  11548. movq 120(%rdi), %r9
  11549. movq 120(%rsi), %r10
  11550. andq %rdx, %r9
  11551. andq %rdx, %r10
  11552. subq %r10, %r9
  11553. cmova %r8, %rax
  11554. cmovc %rdx, %rax
  11555. cmovnz %rcx, %rdx
  11556. movq 112(%rdi), %r9
  11557. movq 112(%rsi), %r10
  11558. andq %rdx, %r9
  11559. andq %rdx, %r10
  11560. subq %r10, %r9
  11561. cmova %r8, %rax
  11562. cmovc %rdx, %rax
  11563. cmovnz %rcx, %rdx
  11564. movq 104(%rdi), %r9
  11565. movq 104(%rsi), %r10
  11566. andq %rdx, %r9
  11567. andq %rdx, %r10
  11568. subq %r10, %r9
  11569. cmova %r8, %rax
  11570. cmovc %rdx, %rax
  11571. cmovnz %rcx, %rdx
  11572. movq 96(%rdi), %r9
  11573. movq 96(%rsi), %r10
  11574. andq %rdx, %r9
  11575. andq %rdx, %r10
  11576. subq %r10, %r9
  11577. cmova %r8, %rax
  11578. cmovc %rdx, %rax
  11579. cmovnz %rcx, %rdx
  11580. movq 88(%rdi), %r9
  11581. movq 88(%rsi), %r10
  11582. andq %rdx, %r9
  11583. andq %rdx, %r10
  11584. subq %r10, %r9
  11585. cmova %r8, %rax
  11586. cmovc %rdx, %rax
  11587. cmovnz %rcx, %rdx
  11588. movq 80(%rdi), %r9
  11589. movq 80(%rsi), %r10
  11590. andq %rdx, %r9
  11591. andq %rdx, %r10
  11592. subq %r10, %r9
  11593. cmova %r8, %rax
  11594. cmovc %rdx, %rax
  11595. cmovnz %rcx, %rdx
  11596. movq 72(%rdi), %r9
  11597. movq 72(%rsi), %r10
  11598. andq %rdx, %r9
  11599. andq %rdx, %r10
  11600. subq %r10, %r9
  11601. cmova %r8, %rax
  11602. cmovc %rdx, %rax
  11603. cmovnz %rcx, %rdx
  11604. movq 64(%rdi), %r9
  11605. movq 64(%rsi), %r10
  11606. andq %rdx, %r9
  11607. andq %rdx, %r10
  11608. subq %r10, %r9
  11609. cmova %r8, %rax
  11610. cmovc %rdx, %rax
  11611. cmovnz %rcx, %rdx
  11612. movq 56(%rdi), %r9
  11613. movq 56(%rsi), %r10
  11614. andq %rdx, %r9
  11615. andq %rdx, %r10
  11616. subq %r10, %r9
  11617. cmova %r8, %rax
  11618. cmovc %rdx, %rax
  11619. cmovnz %rcx, %rdx
  11620. movq 48(%rdi), %r9
  11621. movq 48(%rsi), %r10
  11622. andq %rdx, %r9
  11623. andq %rdx, %r10
  11624. subq %r10, %r9
  11625. cmova %r8, %rax
  11626. cmovc %rdx, %rax
  11627. cmovnz %rcx, %rdx
  11628. movq 40(%rdi), %r9
  11629. movq 40(%rsi), %r10
  11630. andq %rdx, %r9
  11631. andq %rdx, %r10
  11632. subq %r10, %r9
  11633. cmova %r8, %rax
  11634. cmovc %rdx, %rax
  11635. cmovnz %rcx, %rdx
  11636. movq 32(%rdi), %r9
  11637. movq 32(%rsi), %r10
  11638. andq %rdx, %r9
  11639. andq %rdx, %r10
  11640. subq %r10, %r9
  11641. cmova %r8, %rax
  11642. cmovc %rdx, %rax
  11643. cmovnz %rcx, %rdx
  11644. movq 24(%rdi), %r9
  11645. movq 24(%rsi), %r10
  11646. andq %rdx, %r9
  11647. andq %rdx, %r10
  11648. subq %r10, %r9
  11649. cmova %r8, %rax
  11650. cmovc %rdx, %rax
  11651. cmovnz %rcx, %rdx
  11652. movq 16(%rdi), %r9
  11653. movq 16(%rsi), %r10
  11654. andq %rdx, %r9
  11655. andq %rdx, %r10
  11656. subq %r10, %r9
  11657. cmova %r8, %rax
  11658. cmovc %rdx, %rax
  11659. cmovnz %rcx, %rdx
  11660. movq 8(%rdi), %r9
  11661. movq 8(%rsi), %r10
  11662. andq %rdx, %r9
  11663. andq %rdx, %r10
  11664. subq %r10, %r9
  11665. cmova %r8, %rax
  11666. cmovc %rdx, %rax
  11667. cmovnz %rcx, %rdx
  11668. movq (%rdi), %r9
  11669. movq (%rsi), %r10
  11670. andq %rdx, %r9
  11671. andq %rdx, %r10
  11672. subq %r10, %r9
  11673. cmova %r8, %rax
  11674. cmovc %rdx, %rax
  11675. cmovnz %rcx, %rdx
  11676. xorq %rdx, %rax
  11677. repz retq
  11678. #ifndef __APPLE__
  11679. .size sp_2048_cmp_32,.-sp_2048_cmp_32
  11680. #endif /* __APPLE__ */
  11681. /* Sub b from a into r. (r = a - b)
  11682. *
  11683. * r A single precision integer.
  11684. * a A single precision integer.
  11685. * b A single precision integer.
  11686. */
  11687. #ifndef __APPLE__
  11688. .text
  11689. .globl sp_2048_sub_32
  11690. .type sp_2048_sub_32,@function
  11691. .align 16
  11692. sp_2048_sub_32:
  11693. #else
  11694. .section __TEXT,__text
  11695. .globl _sp_2048_sub_32
  11696. .p2align 4
  11697. _sp_2048_sub_32:
  11698. #endif /* __APPLE__ */
  11699. movq (%rsi), %rcx
  11700. xorq %rax, %rax
  11701. subq (%rdx), %rcx
  11702. movq 8(%rsi), %r8
  11703. movq %rcx, (%rdi)
  11704. sbbq 8(%rdx), %r8
  11705. movq 16(%rsi), %rcx
  11706. movq %r8, 8(%rdi)
  11707. sbbq 16(%rdx), %rcx
  11708. movq 24(%rsi), %r8
  11709. movq %rcx, 16(%rdi)
  11710. sbbq 24(%rdx), %r8
  11711. movq 32(%rsi), %rcx
  11712. movq %r8, 24(%rdi)
  11713. sbbq 32(%rdx), %rcx
  11714. movq 40(%rsi), %r8
  11715. movq %rcx, 32(%rdi)
  11716. sbbq 40(%rdx), %r8
  11717. movq 48(%rsi), %rcx
  11718. movq %r8, 40(%rdi)
  11719. sbbq 48(%rdx), %rcx
  11720. movq 56(%rsi), %r8
  11721. movq %rcx, 48(%rdi)
  11722. sbbq 56(%rdx), %r8
  11723. movq 64(%rsi), %rcx
  11724. movq %r8, 56(%rdi)
  11725. sbbq 64(%rdx), %rcx
  11726. movq 72(%rsi), %r8
  11727. movq %rcx, 64(%rdi)
  11728. sbbq 72(%rdx), %r8
  11729. movq 80(%rsi), %rcx
  11730. movq %r8, 72(%rdi)
  11731. sbbq 80(%rdx), %rcx
  11732. movq 88(%rsi), %r8
  11733. movq %rcx, 80(%rdi)
  11734. sbbq 88(%rdx), %r8
  11735. movq 96(%rsi), %rcx
  11736. movq %r8, 88(%rdi)
  11737. sbbq 96(%rdx), %rcx
  11738. movq 104(%rsi), %r8
  11739. movq %rcx, 96(%rdi)
  11740. sbbq 104(%rdx), %r8
  11741. movq 112(%rsi), %rcx
  11742. movq %r8, 104(%rdi)
  11743. sbbq 112(%rdx), %rcx
  11744. movq 120(%rsi), %r8
  11745. movq %rcx, 112(%rdi)
  11746. sbbq 120(%rdx), %r8
  11747. movq 128(%rsi), %rcx
  11748. movq %r8, 120(%rdi)
  11749. sbbq 128(%rdx), %rcx
  11750. movq 136(%rsi), %r8
  11751. movq %rcx, 128(%rdi)
  11752. sbbq 136(%rdx), %r8
  11753. movq 144(%rsi), %rcx
  11754. movq %r8, 136(%rdi)
  11755. sbbq 144(%rdx), %rcx
  11756. movq 152(%rsi), %r8
  11757. movq %rcx, 144(%rdi)
  11758. sbbq 152(%rdx), %r8
  11759. movq 160(%rsi), %rcx
  11760. movq %r8, 152(%rdi)
  11761. sbbq 160(%rdx), %rcx
  11762. movq 168(%rsi), %r8
  11763. movq %rcx, 160(%rdi)
  11764. sbbq 168(%rdx), %r8
  11765. movq 176(%rsi), %rcx
  11766. movq %r8, 168(%rdi)
  11767. sbbq 176(%rdx), %rcx
  11768. movq 184(%rsi), %r8
  11769. movq %rcx, 176(%rdi)
  11770. sbbq 184(%rdx), %r8
  11771. movq 192(%rsi), %rcx
  11772. movq %r8, 184(%rdi)
  11773. sbbq 192(%rdx), %rcx
  11774. movq 200(%rsi), %r8
  11775. movq %rcx, 192(%rdi)
  11776. sbbq 200(%rdx), %r8
  11777. movq 208(%rsi), %rcx
  11778. movq %r8, 200(%rdi)
  11779. sbbq 208(%rdx), %rcx
  11780. movq 216(%rsi), %r8
  11781. movq %rcx, 208(%rdi)
  11782. sbbq 216(%rdx), %r8
  11783. movq 224(%rsi), %rcx
  11784. movq %r8, 216(%rdi)
  11785. sbbq 224(%rdx), %rcx
  11786. movq 232(%rsi), %r8
  11787. movq %rcx, 224(%rdi)
  11788. sbbq 232(%rdx), %r8
  11789. movq 240(%rsi), %rcx
  11790. movq %r8, 232(%rdi)
  11791. sbbq 240(%rdx), %rcx
  11792. movq 248(%rsi), %r8
  11793. movq %rcx, 240(%rdi)
  11794. sbbq 248(%rdx), %r8
  11795. movq %r8, 248(%rdi)
  11796. sbbq $0x00, %rax
  11797. repz retq
  11798. #ifndef __APPLE__
  11799. .size sp_2048_sub_32,.-sp_2048_sub_32
  11800. #endif /* __APPLE__ */
  11801. #ifdef HAVE_INTEL_AVX2
  11802. /* Reduce the number back to 2048 bits using Montgomery reduction.
  11803. *
  11804. * a A single precision number to reduce in place.
  11805. * m The single precision number representing the modulus.
  11806. * mp The digit representing the negative inverse of m mod 2^n.
  11807. */
  11808. #ifndef __APPLE__
  11809. .text
  11810. .globl sp_2048_mont_reduce_avx2_32
  11811. .type sp_2048_mont_reduce_avx2_32,@function
  11812. .align 16
  11813. sp_2048_mont_reduce_avx2_32:
  11814. #else
  11815. .section __TEXT,__text
  11816. .globl _sp_2048_mont_reduce_avx2_32
  11817. .p2align 4
  11818. _sp_2048_mont_reduce_avx2_32:
  11819. #endif /* __APPLE__ */
  11820. pushq %r12
  11821. pushq %r13
  11822. pushq %r14
  11823. movq %rdx, %r8
  11824. xorq %r14, %r14
  11825. # i = 32
  11826. movq $32, %r9
  11827. movq (%rdi), %r13
  11828. addq $0x80, %rdi
  11829. xorq %r12, %r12
  11830. L_mont_loop_avx2_32:
  11831. # mu = a[i] * mp
  11832. movq %r13, %rdx
  11833. movq %r13, %r10
  11834. imulq %r8, %rdx
  11835. xorq %r12, %r12
  11836. # a[i+0] += m[0] * mu
  11837. mulxq (%rsi), %rax, %rcx
  11838. movq -120(%rdi), %r13
  11839. adcxq %rax, %r10
  11840. adoxq %rcx, %r13
  11841. # a[i+1] += m[1] * mu
  11842. mulxq 8(%rsi), %rax, %rcx
  11843. movq -112(%rdi), %r10
  11844. adcxq %rax, %r13
  11845. adoxq %rcx, %r10
  11846. # a[i+2] += m[2] * mu
  11847. mulxq 16(%rsi), %rax, %rcx
  11848. movq -104(%rdi), %r11
  11849. adcxq %rax, %r10
  11850. adoxq %rcx, %r11
  11851. movq %r10, -112(%rdi)
  11852. # a[i+3] += m[3] * mu
  11853. mulxq 24(%rsi), %rax, %rcx
  11854. movq -96(%rdi), %r10
  11855. adcxq %rax, %r11
  11856. adoxq %rcx, %r10
  11857. movq %r11, -104(%rdi)
  11858. # a[i+4] += m[4] * mu
  11859. mulxq 32(%rsi), %rax, %rcx
  11860. movq -88(%rdi), %r11
  11861. adcxq %rax, %r10
  11862. adoxq %rcx, %r11
  11863. movq %r10, -96(%rdi)
  11864. # a[i+5] += m[5] * mu
  11865. mulxq 40(%rsi), %rax, %rcx
  11866. movq -80(%rdi), %r10
  11867. adcxq %rax, %r11
  11868. adoxq %rcx, %r10
  11869. movq %r11, -88(%rdi)
  11870. # a[i+6] += m[6] * mu
  11871. mulxq 48(%rsi), %rax, %rcx
  11872. movq -72(%rdi), %r11
  11873. adcxq %rax, %r10
  11874. adoxq %rcx, %r11
  11875. movq %r10, -80(%rdi)
  11876. # a[i+7] += m[7] * mu
  11877. mulxq 56(%rsi), %rax, %rcx
  11878. movq -64(%rdi), %r10
  11879. adcxq %rax, %r11
  11880. adoxq %rcx, %r10
  11881. movq %r11, -72(%rdi)
  11882. # a[i+8] += m[8] * mu
  11883. mulxq 64(%rsi), %rax, %rcx
  11884. movq -56(%rdi), %r11
  11885. adcxq %rax, %r10
  11886. adoxq %rcx, %r11
  11887. movq %r10, -64(%rdi)
  11888. # a[i+9] += m[9] * mu
  11889. mulxq 72(%rsi), %rax, %rcx
  11890. movq -48(%rdi), %r10
  11891. adcxq %rax, %r11
  11892. adoxq %rcx, %r10
  11893. movq %r11, -56(%rdi)
  11894. # a[i+10] += m[10] * mu
  11895. mulxq 80(%rsi), %rax, %rcx
  11896. movq -40(%rdi), %r11
  11897. adcxq %rax, %r10
  11898. adoxq %rcx, %r11
  11899. movq %r10, -48(%rdi)
  11900. # a[i+11] += m[11] * mu
  11901. mulxq 88(%rsi), %rax, %rcx
  11902. movq -32(%rdi), %r10
  11903. adcxq %rax, %r11
  11904. adoxq %rcx, %r10
  11905. movq %r11, -40(%rdi)
  11906. # a[i+12] += m[12] * mu
  11907. mulxq 96(%rsi), %rax, %rcx
  11908. movq -24(%rdi), %r11
  11909. adcxq %rax, %r10
  11910. adoxq %rcx, %r11
  11911. movq %r10, -32(%rdi)
  11912. # a[i+13] += m[13] * mu
  11913. mulxq 104(%rsi), %rax, %rcx
  11914. movq -16(%rdi), %r10
  11915. adcxq %rax, %r11
  11916. adoxq %rcx, %r10
  11917. movq %r11, -24(%rdi)
  11918. # a[i+14] += m[14] * mu
  11919. mulxq 112(%rsi), %rax, %rcx
  11920. movq -8(%rdi), %r11
  11921. adcxq %rax, %r10
  11922. adoxq %rcx, %r11
  11923. movq %r10, -16(%rdi)
  11924. # a[i+15] += m[15] * mu
  11925. mulxq 120(%rsi), %rax, %rcx
  11926. movq (%rdi), %r10
  11927. adcxq %rax, %r11
  11928. adoxq %rcx, %r10
  11929. movq %r11, -8(%rdi)
  11930. # a[i+16] += m[16] * mu
  11931. mulxq 128(%rsi), %rax, %rcx
  11932. movq 8(%rdi), %r11
  11933. adcxq %rax, %r10
  11934. adoxq %rcx, %r11
  11935. movq %r10, (%rdi)
  11936. # a[i+17] += m[17] * mu
  11937. mulxq 136(%rsi), %rax, %rcx
  11938. movq 16(%rdi), %r10
  11939. adcxq %rax, %r11
  11940. adoxq %rcx, %r10
  11941. movq %r11, 8(%rdi)
  11942. # a[i+18] += m[18] * mu
  11943. mulxq 144(%rsi), %rax, %rcx
  11944. movq 24(%rdi), %r11
  11945. adcxq %rax, %r10
  11946. adoxq %rcx, %r11
  11947. movq %r10, 16(%rdi)
  11948. # a[i+19] += m[19] * mu
  11949. mulxq 152(%rsi), %rax, %rcx
  11950. movq 32(%rdi), %r10
  11951. adcxq %rax, %r11
  11952. adoxq %rcx, %r10
  11953. movq %r11, 24(%rdi)
  11954. # a[i+20] += m[20] * mu
  11955. mulxq 160(%rsi), %rax, %rcx
  11956. movq 40(%rdi), %r11
  11957. adcxq %rax, %r10
  11958. adoxq %rcx, %r11
  11959. movq %r10, 32(%rdi)
  11960. # a[i+21] += m[21] * mu
  11961. mulxq 168(%rsi), %rax, %rcx
  11962. movq 48(%rdi), %r10
  11963. adcxq %rax, %r11
  11964. adoxq %rcx, %r10
  11965. movq %r11, 40(%rdi)
  11966. # a[i+22] += m[22] * mu
  11967. mulxq 176(%rsi), %rax, %rcx
  11968. movq 56(%rdi), %r11
  11969. adcxq %rax, %r10
  11970. adoxq %rcx, %r11
  11971. movq %r10, 48(%rdi)
  11972. # a[i+23] += m[23] * mu
  11973. mulxq 184(%rsi), %rax, %rcx
  11974. movq 64(%rdi), %r10
  11975. adcxq %rax, %r11
  11976. adoxq %rcx, %r10
  11977. movq %r11, 56(%rdi)
  11978. # a[i+24] += m[24] * mu
  11979. mulxq 192(%rsi), %rax, %rcx
  11980. movq 72(%rdi), %r11
  11981. adcxq %rax, %r10
  11982. adoxq %rcx, %r11
  11983. movq %r10, 64(%rdi)
  11984. # a[i+25] += m[25] * mu
  11985. mulxq 200(%rsi), %rax, %rcx
  11986. movq 80(%rdi), %r10
  11987. adcxq %rax, %r11
  11988. adoxq %rcx, %r10
  11989. movq %r11, 72(%rdi)
  11990. # a[i+26] += m[26] * mu
  11991. mulxq 208(%rsi), %rax, %rcx
  11992. movq 88(%rdi), %r11
  11993. adcxq %rax, %r10
  11994. adoxq %rcx, %r11
  11995. movq %r10, 80(%rdi)
  11996. # a[i+27] += m[27] * mu
  11997. mulxq 216(%rsi), %rax, %rcx
  11998. movq 96(%rdi), %r10
  11999. adcxq %rax, %r11
  12000. adoxq %rcx, %r10
  12001. movq %r11, 88(%rdi)
  12002. # a[i+28] += m[28] * mu
  12003. mulxq 224(%rsi), %rax, %rcx
  12004. movq 104(%rdi), %r11
  12005. adcxq %rax, %r10
  12006. adoxq %rcx, %r11
  12007. movq %r10, 96(%rdi)
  12008. # a[i+29] += m[29] * mu
  12009. mulxq 232(%rsi), %rax, %rcx
  12010. movq 112(%rdi), %r10
  12011. adcxq %rax, %r11
  12012. adoxq %rcx, %r10
  12013. movq %r11, 104(%rdi)
  12014. # a[i+30] += m[30] * mu
  12015. mulxq 240(%rsi), %rax, %rcx
  12016. movq 120(%rdi), %r11
  12017. adcxq %rax, %r10
  12018. adoxq %rcx, %r11
  12019. movq %r10, 112(%rdi)
  12020. # a[i+31] += m[31] * mu
  12021. mulxq 248(%rsi), %rax, %rcx
  12022. movq 128(%rdi), %r10
  12023. adcxq %rax, %r11
  12024. adoxq %rcx, %r10
  12025. movq %r11, 120(%rdi)
  12026. adcxq %r14, %r10
  12027. movq %r10, 128(%rdi)
  12028. movq %r12, %r14
  12029. adoxq %r12, %r14
  12030. adcxq %r12, %r14
  12031. # a += 1
  12032. addq $8, %rdi
  12033. # i -= 1
  12034. subq $0x01, %r9
  12035. jnz L_mont_loop_avx2_32
  12036. subq $0x80, %rdi
  12037. negq %r14
  12038. movq %rdi, %r8
  12039. subq $0x100, %rdi
  12040. movq (%rsi), %rcx
  12041. movq %r13, %rdx
  12042. pextq %r14, %rcx, %rcx
  12043. subq %rcx, %rdx
  12044. movq 8(%rsi), %rcx
  12045. movq 8(%r8), %rax
  12046. pextq %r14, %rcx, %rcx
  12047. movq %rdx, (%rdi)
  12048. sbbq %rcx, %rax
  12049. movq 16(%rsi), %rdx
  12050. movq 16(%r8), %rcx
  12051. pextq %r14, %rdx, %rdx
  12052. movq %rax, 8(%rdi)
  12053. sbbq %rdx, %rcx
  12054. movq 24(%rsi), %rax
  12055. movq 24(%r8), %rdx
  12056. pextq %r14, %rax, %rax
  12057. movq %rcx, 16(%rdi)
  12058. sbbq %rax, %rdx
  12059. movq 32(%rsi), %rcx
  12060. movq 32(%r8), %rax
  12061. pextq %r14, %rcx, %rcx
  12062. movq %rdx, 24(%rdi)
  12063. sbbq %rcx, %rax
  12064. movq 40(%rsi), %rdx
  12065. movq 40(%r8), %rcx
  12066. pextq %r14, %rdx, %rdx
  12067. movq %rax, 32(%rdi)
  12068. sbbq %rdx, %rcx
  12069. movq 48(%rsi), %rax
  12070. movq 48(%r8), %rdx
  12071. pextq %r14, %rax, %rax
  12072. movq %rcx, 40(%rdi)
  12073. sbbq %rax, %rdx
  12074. movq 56(%rsi), %rcx
  12075. movq 56(%r8), %rax
  12076. pextq %r14, %rcx, %rcx
  12077. movq %rdx, 48(%rdi)
  12078. sbbq %rcx, %rax
  12079. movq 64(%rsi), %rdx
  12080. movq 64(%r8), %rcx
  12081. pextq %r14, %rdx, %rdx
  12082. movq %rax, 56(%rdi)
  12083. sbbq %rdx, %rcx
  12084. movq 72(%rsi), %rax
  12085. movq 72(%r8), %rdx
  12086. pextq %r14, %rax, %rax
  12087. movq %rcx, 64(%rdi)
  12088. sbbq %rax, %rdx
  12089. movq 80(%rsi), %rcx
  12090. movq 80(%r8), %rax
  12091. pextq %r14, %rcx, %rcx
  12092. movq %rdx, 72(%rdi)
  12093. sbbq %rcx, %rax
  12094. movq 88(%rsi), %rdx
  12095. movq 88(%r8), %rcx
  12096. pextq %r14, %rdx, %rdx
  12097. movq %rax, 80(%rdi)
  12098. sbbq %rdx, %rcx
  12099. movq 96(%rsi), %rax
  12100. movq 96(%r8), %rdx
  12101. pextq %r14, %rax, %rax
  12102. movq %rcx, 88(%rdi)
  12103. sbbq %rax, %rdx
  12104. movq 104(%rsi), %rcx
  12105. movq 104(%r8), %rax
  12106. pextq %r14, %rcx, %rcx
  12107. movq %rdx, 96(%rdi)
  12108. sbbq %rcx, %rax
  12109. movq 112(%rsi), %rdx
  12110. movq 112(%r8), %rcx
  12111. pextq %r14, %rdx, %rdx
  12112. movq %rax, 104(%rdi)
  12113. sbbq %rdx, %rcx
  12114. movq 120(%rsi), %rax
  12115. movq 120(%r8), %rdx
  12116. pextq %r14, %rax, %rax
  12117. movq %rcx, 112(%rdi)
  12118. sbbq %rax, %rdx
  12119. movq 128(%rsi), %rcx
  12120. movq 128(%r8), %rax
  12121. pextq %r14, %rcx, %rcx
  12122. movq %rdx, 120(%rdi)
  12123. sbbq %rcx, %rax
  12124. movq 136(%rsi), %rdx
  12125. movq 136(%r8), %rcx
  12126. pextq %r14, %rdx, %rdx
  12127. movq %rax, 128(%rdi)
  12128. sbbq %rdx, %rcx
  12129. movq 144(%rsi), %rax
  12130. movq 144(%r8), %rdx
  12131. pextq %r14, %rax, %rax
  12132. movq %rcx, 136(%rdi)
  12133. sbbq %rax, %rdx
  12134. movq 152(%rsi), %rcx
  12135. movq 152(%r8), %rax
  12136. pextq %r14, %rcx, %rcx
  12137. movq %rdx, 144(%rdi)
  12138. sbbq %rcx, %rax
  12139. movq 160(%rsi), %rdx
  12140. movq 160(%r8), %rcx
  12141. pextq %r14, %rdx, %rdx
  12142. movq %rax, 152(%rdi)
  12143. sbbq %rdx, %rcx
  12144. movq 168(%rsi), %rax
  12145. movq 168(%r8), %rdx
  12146. pextq %r14, %rax, %rax
  12147. movq %rcx, 160(%rdi)
  12148. sbbq %rax, %rdx
  12149. movq 176(%rsi), %rcx
  12150. movq 176(%r8), %rax
  12151. pextq %r14, %rcx, %rcx
  12152. movq %rdx, 168(%rdi)
  12153. sbbq %rcx, %rax
  12154. movq 184(%rsi), %rdx
  12155. movq 184(%r8), %rcx
  12156. pextq %r14, %rdx, %rdx
  12157. movq %rax, 176(%rdi)
  12158. sbbq %rdx, %rcx
  12159. movq 192(%rsi), %rax
  12160. movq 192(%r8), %rdx
  12161. pextq %r14, %rax, %rax
  12162. movq %rcx, 184(%rdi)
  12163. sbbq %rax, %rdx
  12164. movq 200(%rsi), %rcx
  12165. movq 200(%r8), %rax
  12166. pextq %r14, %rcx, %rcx
  12167. movq %rdx, 192(%rdi)
  12168. sbbq %rcx, %rax
  12169. movq 208(%rsi), %rdx
  12170. movq 208(%r8), %rcx
  12171. pextq %r14, %rdx, %rdx
  12172. movq %rax, 200(%rdi)
  12173. sbbq %rdx, %rcx
  12174. movq 216(%rsi), %rax
  12175. movq 216(%r8), %rdx
  12176. pextq %r14, %rax, %rax
  12177. movq %rcx, 208(%rdi)
  12178. sbbq %rax, %rdx
  12179. movq 224(%rsi), %rcx
  12180. movq 224(%r8), %rax
  12181. pextq %r14, %rcx, %rcx
  12182. movq %rdx, 216(%rdi)
  12183. sbbq %rcx, %rax
  12184. movq 232(%rsi), %rdx
  12185. movq 232(%r8), %rcx
  12186. pextq %r14, %rdx, %rdx
  12187. movq %rax, 224(%rdi)
  12188. sbbq %rdx, %rcx
  12189. movq 240(%rsi), %rax
  12190. movq 240(%r8), %rdx
  12191. pextq %r14, %rax, %rax
  12192. movq %rcx, 232(%rdi)
  12193. sbbq %rax, %rdx
  12194. movq 248(%rsi), %rcx
  12195. movq 248(%r8), %rax
  12196. pextq %r14, %rcx, %rcx
  12197. movq %rdx, 240(%rdi)
  12198. sbbq %rcx, %rax
  12199. movq %rax, 248(%rdi)
  12200. popq %r14
  12201. popq %r13
  12202. popq %r12
  12203. repz retq
  12204. #ifndef __APPLE__
  12205. .size sp_2048_mont_reduce_avx2_32,.-sp_2048_mont_reduce_avx2_32
  12206. #endif /* __APPLE__ */
  12207. #endif /* HAVE_INTEL_AVX2 */
  12208. /* Conditionally add a and b using the mask m.
  12209. * m is -1 to add and 0 when not.
  12210. *
  12211. * r A single precision number representing conditional add result.
  12212. * a A single precision number to add with.
  12213. * b A single precision number to add.
  12214. * m Mask value to apply.
  12215. */
  12216. #ifndef __APPLE__
  12217. .text
  12218. .globl sp_2048_cond_add_16
  12219. .type sp_2048_cond_add_16,@function
  12220. .align 16
  12221. sp_2048_cond_add_16:
  12222. #else
  12223. .section __TEXT,__text
  12224. .globl _sp_2048_cond_add_16
  12225. .p2align 4
  12226. _sp_2048_cond_add_16:
  12227. #endif /* __APPLE__ */
  12228. subq $0x80, %rsp
  12229. movq $0x00, %rax
  12230. movq (%rdx), %r8
  12231. movq 8(%rdx), %r9
  12232. andq %rcx, %r8
  12233. andq %rcx, %r9
  12234. movq %r8, (%rsp)
  12235. movq %r9, 8(%rsp)
  12236. movq 16(%rdx), %r8
  12237. movq 24(%rdx), %r9
  12238. andq %rcx, %r8
  12239. andq %rcx, %r9
  12240. movq %r8, 16(%rsp)
  12241. movq %r9, 24(%rsp)
  12242. movq 32(%rdx), %r8
  12243. movq 40(%rdx), %r9
  12244. andq %rcx, %r8
  12245. andq %rcx, %r9
  12246. movq %r8, 32(%rsp)
  12247. movq %r9, 40(%rsp)
  12248. movq 48(%rdx), %r8
  12249. movq 56(%rdx), %r9
  12250. andq %rcx, %r8
  12251. andq %rcx, %r9
  12252. movq %r8, 48(%rsp)
  12253. movq %r9, 56(%rsp)
  12254. movq 64(%rdx), %r8
  12255. movq 72(%rdx), %r9
  12256. andq %rcx, %r8
  12257. andq %rcx, %r9
  12258. movq %r8, 64(%rsp)
  12259. movq %r9, 72(%rsp)
  12260. movq 80(%rdx), %r8
  12261. movq 88(%rdx), %r9
  12262. andq %rcx, %r8
  12263. andq %rcx, %r9
  12264. movq %r8, 80(%rsp)
  12265. movq %r9, 88(%rsp)
  12266. movq 96(%rdx), %r8
  12267. movq 104(%rdx), %r9
  12268. andq %rcx, %r8
  12269. andq %rcx, %r9
  12270. movq %r8, 96(%rsp)
  12271. movq %r9, 104(%rsp)
  12272. movq 112(%rdx), %r8
  12273. movq 120(%rdx), %r9
  12274. andq %rcx, %r8
  12275. andq %rcx, %r9
  12276. movq %r8, 112(%rsp)
  12277. movq %r9, 120(%rsp)
  12278. movq (%rsi), %r8
  12279. movq (%rsp), %rdx
  12280. addq %rdx, %r8
  12281. movq 8(%rsi), %r9
  12282. movq 8(%rsp), %rdx
  12283. adcq %rdx, %r9
  12284. movq %r8, (%rdi)
  12285. movq 16(%rsi), %r8
  12286. movq 16(%rsp), %rdx
  12287. adcq %rdx, %r8
  12288. movq %r9, 8(%rdi)
  12289. movq 24(%rsi), %r9
  12290. movq 24(%rsp), %rdx
  12291. adcq %rdx, %r9
  12292. movq %r8, 16(%rdi)
  12293. movq 32(%rsi), %r8
  12294. movq 32(%rsp), %rdx
  12295. adcq %rdx, %r8
  12296. movq %r9, 24(%rdi)
  12297. movq 40(%rsi), %r9
  12298. movq 40(%rsp), %rdx
  12299. adcq %rdx, %r9
  12300. movq %r8, 32(%rdi)
  12301. movq 48(%rsi), %r8
  12302. movq 48(%rsp), %rdx
  12303. adcq %rdx, %r8
  12304. movq %r9, 40(%rdi)
  12305. movq 56(%rsi), %r9
  12306. movq 56(%rsp), %rdx
  12307. adcq %rdx, %r9
  12308. movq %r8, 48(%rdi)
  12309. movq 64(%rsi), %r8
  12310. movq 64(%rsp), %rdx
  12311. adcq %rdx, %r8
  12312. movq %r9, 56(%rdi)
  12313. movq 72(%rsi), %r9
  12314. movq 72(%rsp), %rdx
  12315. adcq %rdx, %r9
  12316. movq %r8, 64(%rdi)
  12317. movq 80(%rsi), %r8
  12318. movq 80(%rsp), %rdx
  12319. adcq %rdx, %r8
  12320. movq %r9, 72(%rdi)
  12321. movq 88(%rsi), %r9
  12322. movq 88(%rsp), %rdx
  12323. adcq %rdx, %r9
  12324. movq %r8, 80(%rdi)
  12325. movq 96(%rsi), %r8
  12326. movq 96(%rsp), %rdx
  12327. adcq %rdx, %r8
  12328. movq %r9, 88(%rdi)
  12329. movq 104(%rsi), %r9
  12330. movq 104(%rsp), %rdx
  12331. adcq %rdx, %r9
  12332. movq %r8, 96(%rdi)
  12333. movq 112(%rsi), %r8
  12334. movq 112(%rsp), %rdx
  12335. adcq %rdx, %r8
  12336. movq %r9, 104(%rdi)
  12337. movq 120(%rsi), %r9
  12338. movq 120(%rsp), %rdx
  12339. adcq %rdx, %r9
  12340. movq %r8, 112(%rdi)
  12341. movq %r9, 120(%rdi)
  12342. adcq $0x00, %rax
  12343. addq $0x80, %rsp
  12344. repz retq
  12345. #ifndef __APPLE__
  12346. .size sp_2048_cond_add_16,.-sp_2048_cond_add_16
  12347. #endif /* __APPLE__ */
  12348. /* Conditionally add a and b using the mask m.
  12349. * m is -1 to add and 0 when not.
  12350. *
  12351. * r A single precision number representing conditional add result.
  12352. * a A single precision number to add with.
  12353. * b A single precision number to add.
  12354. * m Mask value to apply.
  12355. */
  12356. #ifndef __APPLE__
  12357. .text
  12358. .globl sp_2048_cond_add_avx2_16
  12359. .type sp_2048_cond_add_avx2_16,@function
  12360. .align 16
  12361. sp_2048_cond_add_avx2_16:
  12362. #else
  12363. .section __TEXT,__text
  12364. .globl _sp_2048_cond_add_avx2_16
  12365. .p2align 4
  12366. _sp_2048_cond_add_avx2_16:
  12367. #endif /* __APPLE__ */
  12368. movq $0x00, %rax
  12369. movq (%rdx), %r10
  12370. movq (%rsi), %r8
  12371. pextq %rcx, %r10, %r10
  12372. addq %r10, %r8
  12373. movq 8(%rdx), %r10
  12374. movq 8(%rsi), %r9
  12375. pextq %rcx, %r10, %r10
  12376. movq %r8, (%rdi)
  12377. adcq %r10, %r9
  12378. movq 16(%rdx), %r8
  12379. movq 16(%rsi), %r10
  12380. pextq %rcx, %r8, %r8
  12381. movq %r9, 8(%rdi)
  12382. adcq %r8, %r10
  12383. movq 24(%rdx), %r9
  12384. movq 24(%rsi), %r8
  12385. pextq %rcx, %r9, %r9
  12386. movq %r10, 16(%rdi)
  12387. adcq %r9, %r8
  12388. movq 32(%rdx), %r10
  12389. movq 32(%rsi), %r9
  12390. pextq %rcx, %r10, %r10
  12391. movq %r8, 24(%rdi)
  12392. adcq %r10, %r9
  12393. movq 40(%rdx), %r8
  12394. movq 40(%rsi), %r10
  12395. pextq %rcx, %r8, %r8
  12396. movq %r9, 32(%rdi)
  12397. adcq %r8, %r10
  12398. movq 48(%rdx), %r9
  12399. movq 48(%rsi), %r8
  12400. pextq %rcx, %r9, %r9
  12401. movq %r10, 40(%rdi)
  12402. adcq %r9, %r8
  12403. movq 56(%rdx), %r10
  12404. movq 56(%rsi), %r9
  12405. pextq %rcx, %r10, %r10
  12406. movq %r8, 48(%rdi)
  12407. adcq %r10, %r9
  12408. movq 64(%rdx), %r8
  12409. movq 64(%rsi), %r10
  12410. pextq %rcx, %r8, %r8
  12411. movq %r9, 56(%rdi)
  12412. adcq %r8, %r10
  12413. movq 72(%rdx), %r9
  12414. movq 72(%rsi), %r8
  12415. pextq %rcx, %r9, %r9
  12416. movq %r10, 64(%rdi)
  12417. adcq %r9, %r8
  12418. movq 80(%rdx), %r10
  12419. movq 80(%rsi), %r9
  12420. pextq %rcx, %r10, %r10
  12421. movq %r8, 72(%rdi)
  12422. adcq %r10, %r9
  12423. movq 88(%rdx), %r8
  12424. movq 88(%rsi), %r10
  12425. pextq %rcx, %r8, %r8
  12426. movq %r9, 80(%rdi)
  12427. adcq %r8, %r10
  12428. movq 96(%rdx), %r9
  12429. movq 96(%rsi), %r8
  12430. pextq %rcx, %r9, %r9
  12431. movq %r10, 88(%rdi)
  12432. adcq %r9, %r8
  12433. movq 104(%rdx), %r10
  12434. movq 104(%rsi), %r9
  12435. pextq %rcx, %r10, %r10
  12436. movq %r8, 96(%rdi)
  12437. adcq %r10, %r9
  12438. movq 112(%rdx), %r8
  12439. movq 112(%rsi), %r10
  12440. pextq %rcx, %r8, %r8
  12441. movq %r9, 104(%rdi)
  12442. adcq %r8, %r10
  12443. movq 120(%rdx), %r9
  12444. movq 120(%rsi), %r8
  12445. pextq %rcx, %r9, %r9
  12446. movq %r10, 112(%rdi)
  12447. adcq %r9, %r8
  12448. movq %r8, 120(%rdi)
  12449. adcq $0x00, %rax
  12450. repz retq
  12451. #ifndef __APPLE__
  12452. .size sp_2048_cond_add_avx2_16,.-sp_2048_cond_add_avx2_16
  12453. #endif /* __APPLE__ */
  12454. /* Shift number left by n bit. (r = a << n)
  12455. *
  12456. * r Result of left shift by n.
  12457. * a Number to shift.
  12458. * n Amoutnt o shift.
  12459. */
  12460. #ifndef __APPLE__
  12461. .text
  12462. .globl sp_2048_lshift_32
  12463. .type sp_2048_lshift_32,@function
  12464. .align 16
  12465. sp_2048_lshift_32:
  12466. #else
  12467. .section __TEXT,__text
  12468. .globl _sp_2048_lshift_32
  12469. .p2align 4
  12470. _sp_2048_lshift_32:
  12471. #endif /* __APPLE__ */
  12472. movb %dl, %cl
  12473. movq $0x00, %r10
  12474. movq 216(%rsi), %r11
  12475. movq 224(%rsi), %rdx
  12476. movq 232(%rsi), %rax
  12477. movq 240(%rsi), %r8
  12478. movq 248(%rsi), %r9
  12479. shldq %cl, %r9, %r10
  12480. shldq %cl, %r8, %r9
  12481. shldq %cl, %rax, %r8
  12482. shldq %cl, %rdx, %rax
  12483. shldq %cl, %r11, %rdx
  12484. movq %rdx, 224(%rdi)
  12485. movq %rax, 232(%rdi)
  12486. movq %r8, 240(%rdi)
  12487. movq %r9, 248(%rdi)
  12488. movq %r10, 256(%rdi)
  12489. movq 184(%rsi), %r9
  12490. movq 192(%rsi), %rdx
  12491. movq 200(%rsi), %rax
  12492. movq 208(%rsi), %r8
  12493. shldq %cl, %r8, %r11
  12494. shldq %cl, %rax, %r8
  12495. shldq %cl, %rdx, %rax
  12496. shldq %cl, %r9, %rdx
  12497. movq %rdx, 192(%rdi)
  12498. movq %rax, 200(%rdi)
  12499. movq %r8, 208(%rdi)
  12500. movq %r11, 216(%rdi)
  12501. movq 152(%rsi), %r11
  12502. movq 160(%rsi), %rdx
  12503. movq 168(%rsi), %rax
  12504. movq 176(%rsi), %r8
  12505. shldq %cl, %r8, %r9
  12506. shldq %cl, %rax, %r8
  12507. shldq %cl, %rdx, %rax
  12508. shldq %cl, %r11, %rdx
  12509. movq %rdx, 160(%rdi)
  12510. movq %rax, 168(%rdi)
  12511. movq %r8, 176(%rdi)
  12512. movq %r9, 184(%rdi)
  12513. movq 120(%rsi), %r9
  12514. movq 128(%rsi), %rdx
  12515. movq 136(%rsi), %rax
  12516. movq 144(%rsi), %r8
  12517. shldq %cl, %r8, %r11
  12518. shldq %cl, %rax, %r8
  12519. shldq %cl, %rdx, %rax
  12520. shldq %cl, %r9, %rdx
  12521. movq %rdx, 128(%rdi)
  12522. movq %rax, 136(%rdi)
  12523. movq %r8, 144(%rdi)
  12524. movq %r11, 152(%rdi)
  12525. movq 88(%rsi), %r11
  12526. movq 96(%rsi), %rdx
  12527. movq 104(%rsi), %rax
  12528. movq 112(%rsi), %r8
  12529. shldq %cl, %r8, %r9
  12530. shldq %cl, %rax, %r8
  12531. shldq %cl, %rdx, %rax
  12532. shldq %cl, %r11, %rdx
  12533. movq %rdx, 96(%rdi)
  12534. movq %rax, 104(%rdi)
  12535. movq %r8, 112(%rdi)
  12536. movq %r9, 120(%rdi)
  12537. movq 56(%rsi), %r9
  12538. movq 64(%rsi), %rdx
  12539. movq 72(%rsi), %rax
  12540. movq 80(%rsi), %r8
  12541. shldq %cl, %r8, %r11
  12542. shldq %cl, %rax, %r8
  12543. shldq %cl, %rdx, %rax
  12544. shldq %cl, %r9, %rdx
  12545. movq %rdx, 64(%rdi)
  12546. movq %rax, 72(%rdi)
  12547. movq %r8, 80(%rdi)
  12548. movq %r11, 88(%rdi)
  12549. movq 24(%rsi), %r11
  12550. movq 32(%rsi), %rdx
  12551. movq 40(%rsi), %rax
  12552. movq 48(%rsi), %r8
  12553. shldq %cl, %r8, %r9
  12554. shldq %cl, %rax, %r8
  12555. shldq %cl, %rdx, %rax
  12556. shldq %cl, %r11, %rdx
  12557. movq %rdx, 32(%rdi)
  12558. movq %rax, 40(%rdi)
  12559. movq %r8, 48(%rdi)
  12560. movq %r9, 56(%rdi)
  12561. movq (%rsi), %rdx
  12562. movq 8(%rsi), %rax
  12563. movq 16(%rsi), %r8
  12564. shldq %cl, %r8, %r11
  12565. shldq %cl, %rax, %r8
  12566. shldq %cl, %rdx, %rax
  12567. shlq %cl, %rdx
  12568. movq %rdx, (%rdi)
  12569. movq %rax, 8(%rdi)
  12570. movq %r8, 16(%rdi)
  12571. movq %r11, 24(%rdi)
  12572. repz retq
  12573. #endif /* !WOLFSSL_SP_NO_2048 */
  12574. #endif /* !WOLFSSL_SP_NO_2048 */
  12575. #ifndef WOLFSSL_SP_NO_3072
  12576. #ifndef WOLFSSL_SP_NO_3072
  12577. /* Read big endian unsigned byte array into r.
  12578. * Uses the bswap instruction.
  12579. *
  12580. * r A single precision integer.
  12581. * size Maximum number of bytes to convert
  12582. * a Byte array.
  12583. * n Number of bytes in array to read.
  12584. */
  12585. #ifndef __APPLE__
  12586. .text
  12587. .globl sp_3072_from_bin_bswap
  12588. .type sp_3072_from_bin_bswap,@function
  12589. .align 16
  12590. sp_3072_from_bin_bswap:
  12591. #else
  12592. .section __TEXT,__text
  12593. .globl _sp_3072_from_bin_bswap
  12594. .p2align 4
  12595. _sp_3072_from_bin_bswap:
  12596. #endif /* __APPLE__ */
  12597. movq %rdx, %r9
  12598. movq %rdi, %r10
  12599. addq %rcx, %r9
  12600. addq $0x180, %r10
  12601. xorq %r11, %r11
  12602. jmp L_3072_from_bin_bswap_64_end
  12603. L_3072_from_bin_bswap_64_start:
  12604. subq $0x40, %r9
  12605. movq 56(%r9), %rax
  12606. movq 48(%r9), %r8
  12607. bswapq %rax
  12608. bswapq %r8
  12609. movq %rax, (%rdi)
  12610. movq %r8, 8(%rdi)
  12611. movq 40(%r9), %rax
  12612. movq 32(%r9), %r8
  12613. bswapq %rax
  12614. bswapq %r8
  12615. movq %rax, 16(%rdi)
  12616. movq %r8, 24(%rdi)
  12617. movq 24(%r9), %rax
  12618. movq 16(%r9), %r8
  12619. bswapq %rax
  12620. bswapq %r8
  12621. movq %rax, 32(%rdi)
  12622. movq %r8, 40(%rdi)
  12623. movq 8(%r9), %rax
  12624. movq (%r9), %r8
  12625. bswapq %rax
  12626. bswapq %r8
  12627. movq %rax, 48(%rdi)
  12628. movq %r8, 56(%rdi)
  12629. addq $0x40, %rdi
  12630. subq $0x40, %rcx
  12631. L_3072_from_bin_bswap_64_end:
  12632. cmpq $63, %rcx
  12633. jg L_3072_from_bin_bswap_64_start
  12634. jmp L_3072_from_bin_bswap_8_end
  12635. L_3072_from_bin_bswap_8_start:
  12636. subq $8, %r9
  12637. movq (%r9), %rax
  12638. bswapq %rax
  12639. movq %rax, (%rdi)
  12640. addq $8, %rdi
  12641. subq $8, %rcx
  12642. L_3072_from_bin_bswap_8_end:
  12643. cmpq $7, %rcx
  12644. jg L_3072_from_bin_bswap_8_start
  12645. cmpq %r11, %rcx
  12646. je L_3072_from_bin_bswap_hi_end
  12647. movq %r11, %r8
  12648. movq %r11, %rax
  12649. L_3072_from_bin_bswap_hi_start:
  12650. movb (%rdx), %al
  12651. shlq $8, %r8
  12652. incq %rdx
  12653. addq %rax, %r8
  12654. decq %rcx
  12655. jg L_3072_from_bin_bswap_hi_start
  12656. movq %r8, (%rdi)
  12657. addq $8, %rdi
  12658. L_3072_from_bin_bswap_hi_end:
  12659. cmpq %r10, %rdi
  12660. je L_3072_from_bin_bswap_zero_end
  12661. L_3072_from_bin_bswap_zero_start:
  12662. movq %r11, (%rdi)
  12663. addq $8, %rdi
  12664. cmpq %r10, %rdi
  12665. jl L_3072_from_bin_bswap_zero_start
  12666. L_3072_from_bin_bswap_zero_end:
  12667. repz retq
  12668. #ifndef __APPLE__
  12669. .size sp_3072_from_bin_bswap,.-sp_3072_from_bin_bswap
  12670. #endif /* __APPLE__ */
  12671. /* Read big endian unsigned byte array into r.
  12672. * Uses the movbe instruction which is an optional instruction.
  12673. *
  12674. * r A single precision integer.
  12675. * size Maximum number of bytes to convert
  12676. * a Byte array.
  12677. * n Number of bytes in array to read.
  12678. */
  12679. #ifndef __APPLE__
  12680. .text
  12681. .globl sp_3072_from_bin_movbe
  12682. .type sp_3072_from_bin_movbe,@function
  12683. .align 16
  12684. sp_3072_from_bin_movbe:
  12685. #else
  12686. .section __TEXT,__text
  12687. .globl _sp_3072_from_bin_movbe
  12688. .p2align 4
  12689. _sp_3072_from_bin_movbe:
  12690. #endif /* __APPLE__ */
  12691. movq %rdx, %r9
  12692. movq %rdi, %r10
  12693. addq %rcx, %r9
  12694. addq $0x180, %r10
  12695. xorq %r11, %r11
  12696. jmp L_3072_from_bin_movbe_64_end
  12697. L_3072_from_bin_movbe_64_start:
  12698. subq $0x40, %r9
  12699. movbeq 56(%r9), %rax
  12700. movbeq 48(%r9), %r8
  12701. movq %rax, (%rdi)
  12702. movq %r8, 8(%rdi)
  12703. movbeq 40(%r9), %rax
  12704. movbeq 32(%r9), %r8
  12705. movq %rax, 16(%rdi)
  12706. movq %r8, 24(%rdi)
  12707. movbeq 24(%r9), %rax
  12708. movbeq 16(%r9), %r8
  12709. movq %rax, 32(%rdi)
  12710. movq %r8, 40(%rdi)
  12711. movbeq 8(%r9), %rax
  12712. movbeq (%r9), %r8
  12713. movq %rax, 48(%rdi)
  12714. movq %r8, 56(%rdi)
  12715. addq $0x40, %rdi
  12716. subq $0x40, %rcx
  12717. L_3072_from_bin_movbe_64_end:
  12718. cmpq $63, %rcx
  12719. jg L_3072_from_bin_movbe_64_start
  12720. jmp L_3072_from_bin_movbe_8_end
  12721. L_3072_from_bin_movbe_8_start:
  12722. subq $8, %r9
  12723. movbeq (%r9), %rax
  12724. movq %rax, (%rdi)
  12725. addq $8, %rdi
  12726. subq $8, %rcx
  12727. L_3072_from_bin_movbe_8_end:
  12728. cmpq $7, %rcx
  12729. jg L_3072_from_bin_movbe_8_start
  12730. cmpq %r11, %rcx
  12731. je L_3072_from_bin_movbe_hi_end
  12732. movq %r11, %r8
  12733. movq %r11, %rax
  12734. L_3072_from_bin_movbe_hi_start:
  12735. movb (%rdx), %al
  12736. shlq $8, %r8
  12737. incq %rdx
  12738. addq %rax, %r8
  12739. decq %rcx
  12740. jg L_3072_from_bin_movbe_hi_start
  12741. movq %r8, (%rdi)
  12742. addq $8, %rdi
  12743. L_3072_from_bin_movbe_hi_end:
  12744. cmpq %r10, %rdi
  12745. je L_3072_from_bin_movbe_zero_end
  12746. L_3072_from_bin_movbe_zero_start:
  12747. movq %r11, (%rdi)
  12748. addq $8, %rdi
  12749. cmpq %r10, %rdi
  12750. jl L_3072_from_bin_movbe_zero_start
  12751. L_3072_from_bin_movbe_zero_end:
  12752. repz retq
  12753. #ifndef __APPLE__
  12754. .size sp_3072_from_bin_movbe,.-sp_3072_from_bin_movbe
  12755. #endif /* __APPLE__ */
  12756. /* Write r as big endian to byte array.
  12757. * Fixed length number of bytes written: 384
  12758. * Uses the bswap instruction.
  12759. *
  12760. * r A single precision integer.
  12761. * a Byte array.
  12762. */
  12763. #ifndef __APPLE__
  12764. .text
  12765. .globl sp_3072_to_bin_bswap
  12766. .type sp_3072_to_bin_bswap,@function
  12767. .align 16
  12768. sp_3072_to_bin_bswap:
  12769. #else
  12770. .section __TEXT,__text
  12771. .globl _sp_3072_to_bin_bswap
  12772. .p2align 4
  12773. _sp_3072_to_bin_bswap:
  12774. #endif /* __APPLE__ */
  12775. movq 376(%rdi), %rdx
  12776. movq 368(%rdi), %rax
  12777. bswapq %rdx
  12778. bswapq %rax
  12779. movq %rdx, (%rsi)
  12780. movq %rax, 8(%rsi)
  12781. movq 360(%rdi), %rdx
  12782. movq 352(%rdi), %rax
  12783. bswapq %rdx
  12784. bswapq %rax
  12785. movq %rdx, 16(%rsi)
  12786. movq %rax, 24(%rsi)
  12787. movq 344(%rdi), %rdx
  12788. movq 336(%rdi), %rax
  12789. bswapq %rdx
  12790. bswapq %rax
  12791. movq %rdx, 32(%rsi)
  12792. movq %rax, 40(%rsi)
  12793. movq 328(%rdi), %rdx
  12794. movq 320(%rdi), %rax
  12795. bswapq %rdx
  12796. bswapq %rax
  12797. movq %rdx, 48(%rsi)
  12798. movq %rax, 56(%rsi)
  12799. movq 312(%rdi), %rdx
  12800. movq 304(%rdi), %rax
  12801. bswapq %rdx
  12802. bswapq %rax
  12803. movq %rdx, 64(%rsi)
  12804. movq %rax, 72(%rsi)
  12805. movq 296(%rdi), %rdx
  12806. movq 288(%rdi), %rax
  12807. bswapq %rdx
  12808. bswapq %rax
  12809. movq %rdx, 80(%rsi)
  12810. movq %rax, 88(%rsi)
  12811. movq 280(%rdi), %rdx
  12812. movq 272(%rdi), %rax
  12813. bswapq %rdx
  12814. bswapq %rax
  12815. movq %rdx, 96(%rsi)
  12816. movq %rax, 104(%rsi)
  12817. movq 264(%rdi), %rdx
  12818. movq 256(%rdi), %rax
  12819. bswapq %rdx
  12820. bswapq %rax
  12821. movq %rdx, 112(%rsi)
  12822. movq %rax, 120(%rsi)
  12823. movq 248(%rdi), %rdx
  12824. movq 240(%rdi), %rax
  12825. bswapq %rdx
  12826. bswapq %rax
  12827. movq %rdx, 128(%rsi)
  12828. movq %rax, 136(%rsi)
  12829. movq 232(%rdi), %rdx
  12830. movq 224(%rdi), %rax
  12831. bswapq %rdx
  12832. bswapq %rax
  12833. movq %rdx, 144(%rsi)
  12834. movq %rax, 152(%rsi)
  12835. movq 216(%rdi), %rdx
  12836. movq 208(%rdi), %rax
  12837. bswapq %rdx
  12838. bswapq %rax
  12839. movq %rdx, 160(%rsi)
  12840. movq %rax, 168(%rsi)
  12841. movq 200(%rdi), %rdx
  12842. movq 192(%rdi), %rax
  12843. bswapq %rdx
  12844. bswapq %rax
  12845. movq %rdx, 176(%rsi)
  12846. movq %rax, 184(%rsi)
  12847. movq 184(%rdi), %rdx
  12848. movq 176(%rdi), %rax
  12849. bswapq %rdx
  12850. bswapq %rax
  12851. movq %rdx, 192(%rsi)
  12852. movq %rax, 200(%rsi)
  12853. movq 168(%rdi), %rdx
  12854. movq 160(%rdi), %rax
  12855. bswapq %rdx
  12856. bswapq %rax
  12857. movq %rdx, 208(%rsi)
  12858. movq %rax, 216(%rsi)
  12859. movq 152(%rdi), %rdx
  12860. movq 144(%rdi), %rax
  12861. bswapq %rdx
  12862. bswapq %rax
  12863. movq %rdx, 224(%rsi)
  12864. movq %rax, 232(%rsi)
  12865. movq 136(%rdi), %rdx
  12866. movq 128(%rdi), %rax
  12867. bswapq %rdx
  12868. bswapq %rax
  12869. movq %rdx, 240(%rsi)
  12870. movq %rax, 248(%rsi)
  12871. movq 120(%rdi), %rdx
  12872. movq 112(%rdi), %rax
  12873. bswapq %rdx
  12874. bswapq %rax
  12875. movq %rdx, 256(%rsi)
  12876. movq %rax, 264(%rsi)
  12877. movq 104(%rdi), %rdx
  12878. movq 96(%rdi), %rax
  12879. bswapq %rdx
  12880. bswapq %rax
  12881. movq %rdx, 272(%rsi)
  12882. movq %rax, 280(%rsi)
  12883. movq 88(%rdi), %rdx
  12884. movq 80(%rdi), %rax
  12885. bswapq %rdx
  12886. bswapq %rax
  12887. movq %rdx, 288(%rsi)
  12888. movq %rax, 296(%rsi)
  12889. movq 72(%rdi), %rdx
  12890. movq 64(%rdi), %rax
  12891. bswapq %rdx
  12892. bswapq %rax
  12893. movq %rdx, 304(%rsi)
  12894. movq %rax, 312(%rsi)
  12895. movq 56(%rdi), %rdx
  12896. movq 48(%rdi), %rax
  12897. bswapq %rdx
  12898. bswapq %rax
  12899. movq %rdx, 320(%rsi)
  12900. movq %rax, 328(%rsi)
  12901. movq 40(%rdi), %rdx
  12902. movq 32(%rdi), %rax
  12903. bswapq %rdx
  12904. bswapq %rax
  12905. movq %rdx, 336(%rsi)
  12906. movq %rax, 344(%rsi)
  12907. movq 24(%rdi), %rdx
  12908. movq 16(%rdi), %rax
  12909. bswapq %rdx
  12910. bswapq %rax
  12911. movq %rdx, 352(%rsi)
  12912. movq %rax, 360(%rsi)
  12913. movq 8(%rdi), %rdx
  12914. movq (%rdi), %rax
  12915. bswapq %rdx
  12916. bswapq %rax
  12917. movq %rdx, 368(%rsi)
  12918. movq %rax, 376(%rsi)
  12919. repz retq
  12920. #ifndef __APPLE__
  12921. .size sp_3072_to_bin_bswap,.-sp_3072_to_bin_bswap
  12922. #endif /* __APPLE__ */
  12923. /* Write r as big endian to byte array.
  12924. * Fixed length number of bytes written: 384
  12925. * Uses the movbe instruction which is optional.
  12926. *
  12927. * r A single precision integer.
  12928. * a Byte array.
  12929. */
  12930. #ifndef __APPLE__
  12931. .text
  12932. .globl sp_3072_to_bin_movbe
  12933. .type sp_3072_to_bin_movbe,@function
  12934. .align 16
  12935. sp_3072_to_bin_movbe:
  12936. #else
  12937. .section __TEXT,__text
  12938. .globl _sp_3072_to_bin_movbe
  12939. .p2align 4
  12940. _sp_3072_to_bin_movbe:
  12941. #endif /* __APPLE__ */
  12942. movbeq 376(%rdi), %rdx
  12943. movbeq 368(%rdi), %rax
  12944. movq %rdx, (%rsi)
  12945. movq %rax, 8(%rsi)
  12946. movbeq 360(%rdi), %rdx
  12947. movbeq 352(%rdi), %rax
  12948. movq %rdx, 16(%rsi)
  12949. movq %rax, 24(%rsi)
  12950. movbeq 344(%rdi), %rdx
  12951. movbeq 336(%rdi), %rax
  12952. movq %rdx, 32(%rsi)
  12953. movq %rax, 40(%rsi)
  12954. movbeq 328(%rdi), %rdx
  12955. movbeq 320(%rdi), %rax
  12956. movq %rdx, 48(%rsi)
  12957. movq %rax, 56(%rsi)
  12958. movbeq 312(%rdi), %rdx
  12959. movbeq 304(%rdi), %rax
  12960. movq %rdx, 64(%rsi)
  12961. movq %rax, 72(%rsi)
  12962. movbeq 296(%rdi), %rdx
  12963. movbeq 288(%rdi), %rax
  12964. movq %rdx, 80(%rsi)
  12965. movq %rax, 88(%rsi)
  12966. movbeq 280(%rdi), %rdx
  12967. movbeq 272(%rdi), %rax
  12968. movq %rdx, 96(%rsi)
  12969. movq %rax, 104(%rsi)
  12970. movbeq 264(%rdi), %rdx
  12971. movbeq 256(%rdi), %rax
  12972. movq %rdx, 112(%rsi)
  12973. movq %rax, 120(%rsi)
  12974. movbeq 248(%rdi), %rdx
  12975. movbeq 240(%rdi), %rax
  12976. movq %rdx, 128(%rsi)
  12977. movq %rax, 136(%rsi)
  12978. movbeq 232(%rdi), %rdx
  12979. movbeq 224(%rdi), %rax
  12980. movq %rdx, 144(%rsi)
  12981. movq %rax, 152(%rsi)
  12982. movbeq 216(%rdi), %rdx
  12983. movbeq 208(%rdi), %rax
  12984. movq %rdx, 160(%rsi)
  12985. movq %rax, 168(%rsi)
  12986. movbeq 200(%rdi), %rdx
  12987. movbeq 192(%rdi), %rax
  12988. movq %rdx, 176(%rsi)
  12989. movq %rax, 184(%rsi)
  12990. movbeq 184(%rdi), %rdx
  12991. movbeq 176(%rdi), %rax
  12992. movq %rdx, 192(%rsi)
  12993. movq %rax, 200(%rsi)
  12994. movbeq 168(%rdi), %rdx
  12995. movbeq 160(%rdi), %rax
  12996. movq %rdx, 208(%rsi)
  12997. movq %rax, 216(%rsi)
  12998. movbeq 152(%rdi), %rdx
  12999. movbeq 144(%rdi), %rax
  13000. movq %rdx, 224(%rsi)
  13001. movq %rax, 232(%rsi)
  13002. movbeq 136(%rdi), %rdx
  13003. movbeq 128(%rdi), %rax
  13004. movq %rdx, 240(%rsi)
  13005. movq %rax, 248(%rsi)
  13006. movbeq 120(%rdi), %rdx
  13007. movbeq 112(%rdi), %rax
  13008. movq %rdx, 256(%rsi)
  13009. movq %rax, 264(%rsi)
  13010. movbeq 104(%rdi), %rdx
  13011. movbeq 96(%rdi), %rax
  13012. movq %rdx, 272(%rsi)
  13013. movq %rax, 280(%rsi)
  13014. movbeq 88(%rdi), %rdx
  13015. movbeq 80(%rdi), %rax
  13016. movq %rdx, 288(%rsi)
  13017. movq %rax, 296(%rsi)
  13018. movbeq 72(%rdi), %rdx
  13019. movbeq 64(%rdi), %rax
  13020. movq %rdx, 304(%rsi)
  13021. movq %rax, 312(%rsi)
  13022. movbeq 56(%rdi), %rdx
  13023. movbeq 48(%rdi), %rax
  13024. movq %rdx, 320(%rsi)
  13025. movq %rax, 328(%rsi)
  13026. movbeq 40(%rdi), %rdx
  13027. movbeq 32(%rdi), %rax
  13028. movq %rdx, 336(%rsi)
  13029. movq %rax, 344(%rsi)
  13030. movbeq 24(%rdi), %rdx
  13031. movbeq 16(%rdi), %rax
  13032. movq %rdx, 352(%rsi)
  13033. movq %rax, 360(%rsi)
  13034. movbeq 8(%rdi), %rdx
  13035. movbeq (%rdi), %rax
  13036. movq %rdx, 368(%rsi)
  13037. movq %rax, 376(%rsi)
  13038. repz retq
  13039. #ifndef __APPLE__
  13040. .size sp_3072_to_bin_movbe,.-sp_3072_to_bin_movbe
  13041. #endif /* __APPLE__ */
  13042. /* Multiply a and b into r. (r = a * b)
  13043. *
  13044. * r A single precision integer.
  13045. * a A single precision integer.
  13046. * b A single precision integer.
  13047. */
  13048. #ifndef __APPLE__
  13049. .text
  13050. .globl sp_3072_mul_12
  13051. .type sp_3072_mul_12,@function
  13052. .align 16
  13053. sp_3072_mul_12:
  13054. #else
  13055. .section __TEXT,__text
  13056. .globl _sp_3072_mul_12
  13057. .p2align 4
  13058. _sp_3072_mul_12:
  13059. #endif /* __APPLE__ */
  13060. movq %rdx, %rcx
  13061. subq $0x60, %rsp
  13062. # A[0] * B[0]
  13063. movq (%rcx), %rax
  13064. mulq (%rsi)
  13065. xorq %r10, %r10
  13066. movq %rax, (%rsp)
  13067. movq %rdx, %r9
  13068. # A[0] * B[1]
  13069. movq 8(%rcx), %rax
  13070. mulq (%rsi)
  13071. xorq %r8, %r8
  13072. addq %rax, %r9
  13073. adcq %rdx, %r10
  13074. adcq $0x00, %r8
  13075. # A[1] * B[0]
  13076. movq (%rcx), %rax
  13077. mulq 8(%rsi)
  13078. addq %rax, %r9
  13079. adcq %rdx, %r10
  13080. adcq $0x00, %r8
  13081. movq %r9, 8(%rsp)
  13082. # A[0] * B[2]
  13083. movq 16(%rcx), %rax
  13084. mulq (%rsi)
  13085. xorq %r9, %r9
  13086. addq %rax, %r10
  13087. adcq %rdx, %r8
  13088. adcq $0x00, %r9
  13089. # A[1] * B[1]
  13090. movq 8(%rcx), %rax
  13091. mulq 8(%rsi)
  13092. addq %rax, %r10
  13093. adcq %rdx, %r8
  13094. adcq $0x00, %r9
  13095. # A[2] * B[0]
  13096. movq (%rcx), %rax
  13097. mulq 16(%rsi)
  13098. addq %rax, %r10
  13099. adcq %rdx, %r8
  13100. adcq $0x00, %r9
  13101. movq %r10, 16(%rsp)
  13102. # A[0] * B[3]
  13103. movq 24(%rcx), %rax
  13104. mulq (%rsi)
  13105. xorq %r10, %r10
  13106. addq %rax, %r8
  13107. adcq %rdx, %r9
  13108. adcq $0x00, %r10
  13109. # A[1] * B[2]
  13110. movq 16(%rcx), %rax
  13111. mulq 8(%rsi)
  13112. addq %rax, %r8
  13113. adcq %rdx, %r9
  13114. adcq $0x00, %r10
  13115. # A[2] * B[1]
  13116. movq 8(%rcx), %rax
  13117. mulq 16(%rsi)
  13118. addq %rax, %r8
  13119. adcq %rdx, %r9
  13120. adcq $0x00, %r10
  13121. # A[3] * B[0]
  13122. movq (%rcx), %rax
  13123. mulq 24(%rsi)
  13124. addq %rax, %r8
  13125. adcq %rdx, %r9
  13126. adcq $0x00, %r10
  13127. movq %r8, 24(%rsp)
  13128. # A[0] * B[4]
  13129. movq 32(%rcx), %rax
  13130. mulq (%rsi)
  13131. xorq %r8, %r8
  13132. addq %rax, %r9
  13133. adcq %rdx, %r10
  13134. adcq $0x00, %r8
  13135. # A[1] * B[3]
  13136. movq 24(%rcx), %rax
  13137. mulq 8(%rsi)
  13138. addq %rax, %r9
  13139. adcq %rdx, %r10
  13140. adcq $0x00, %r8
  13141. # A[2] * B[2]
  13142. movq 16(%rcx), %rax
  13143. mulq 16(%rsi)
  13144. addq %rax, %r9
  13145. adcq %rdx, %r10
  13146. adcq $0x00, %r8
  13147. # A[3] * B[1]
  13148. movq 8(%rcx), %rax
  13149. mulq 24(%rsi)
  13150. addq %rax, %r9
  13151. adcq %rdx, %r10
  13152. adcq $0x00, %r8
  13153. # A[4] * B[0]
  13154. movq (%rcx), %rax
  13155. mulq 32(%rsi)
  13156. addq %rax, %r9
  13157. adcq %rdx, %r10
  13158. adcq $0x00, %r8
  13159. movq %r9, 32(%rsp)
  13160. # A[0] * B[5]
  13161. movq 40(%rcx), %rax
  13162. mulq (%rsi)
  13163. xorq %r9, %r9
  13164. addq %rax, %r10
  13165. adcq %rdx, %r8
  13166. adcq $0x00, %r9
  13167. # A[1] * B[4]
  13168. movq 32(%rcx), %rax
  13169. mulq 8(%rsi)
  13170. addq %rax, %r10
  13171. adcq %rdx, %r8
  13172. adcq $0x00, %r9
  13173. # A[2] * B[3]
  13174. movq 24(%rcx), %rax
  13175. mulq 16(%rsi)
  13176. addq %rax, %r10
  13177. adcq %rdx, %r8
  13178. adcq $0x00, %r9
  13179. # A[3] * B[2]
  13180. movq 16(%rcx), %rax
  13181. mulq 24(%rsi)
  13182. addq %rax, %r10
  13183. adcq %rdx, %r8
  13184. adcq $0x00, %r9
  13185. # A[4] * B[1]
  13186. movq 8(%rcx), %rax
  13187. mulq 32(%rsi)
  13188. addq %rax, %r10
  13189. adcq %rdx, %r8
  13190. adcq $0x00, %r9
  13191. # A[5] * B[0]
  13192. movq (%rcx), %rax
  13193. mulq 40(%rsi)
  13194. addq %rax, %r10
  13195. adcq %rdx, %r8
  13196. adcq $0x00, %r9
  13197. movq %r10, 40(%rsp)
  13198. # A[0] * B[6]
  13199. movq 48(%rcx), %rax
  13200. mulq (%rsi)
  13201. xorq %r10, %r10
  13202. addq %rax, %r8
  13203. adcq %rdx, %r9
  13204. adcq $0x00, %r10
  13205. # A[1] * B[5]
  13206. movq 40(%rcx), %rax
  13207. mulq 8(%rsi)
  13208. addq %rax, %r8
  13209. adcq %rdx, %r9
  13210. adcq $0x00, %r10
  13211. # A[2] * B[4]
  13212. movq 32(%rcx), %rax
  13213. mulq 16(%rsi)
  13214. addq %rax, %r8
  13215. adcq %rdx, %r9
  13216. adcq $0x00, %r10
  13217. # A[3] * B[3]
  13218. movq 24(%rcx), %rax
  13219. mulq 24(%rsi)
  13220. addq %rax, %r8
  13221. adcq %rdx, %r9
  13222. adcq $0x00, %r10
  13223. # A[4] * B[2]
  13224. movq 16(%rcx), %rax
  13225. mulq 32(%rsi)
  13226. addq %rax, %r8
  13227. adcq %rdx, %r9
  13228. adcq $0x00, %r10
  13229. # A[5] * B[1]
  13230. movq 8(%rcx), %rax
  13231. mulq 40(%rsi)
  13232. addq %rax, %r8
  13233. adcq %rdx, %r9
  13234. adcq $0x00, %r10
  13235. # A[6] * B[0]
  13236. movq (%rcx), %rax
  13237. mulq 48(%rsi)
  13238. addq %rax, %r8
  13239. adcq %rdx, %r9
  13240. adcq $0x00, %r10
  13241. movq %r8, 48(%rsp)
  13242. # A[0] * B[7]
  13243. movq 56(%rcx), %rax
  13244. mulq (%rsi)
  13245. xorq %r8, %r8
  13246. addq %rax, %r9
  13247. adcq %rdx, %r10
  13248. adcq $0x00, %r8
  13249. # A[1] * B[6]
  13250. movq 48(%rcx), %rax
  13251. mulq 8(%rsi)
  13252. addq %rax, %r9
  13253. adcq %rdx, %r10
  13254. adcq $0x00, %r8
  13255. # A[2] * B[5]
  13256. movq 40(%rcx), %rax
  13257. mulq 16(%rsi)
  13258. addq %rax, %r9
  13259. adcq %rdx, %r10
  13260. adcq $0x00, %r8
  13261. # A[3] * B[4]
  13262. movq 32(%rcx), %rax
  13263. mulq 24(%rsi)
  13264. addq %rax, %r9
  13265. adcq %rdx, %r10
  13266. adcq $0x00, %r8
  13267. # A[4] * B[3]
  13268. movq 24(%rcx), %rax
  13269. mulq 32(%rsi)
  13270. addq %rax, %r9
  13271. adcq %rdx, %r10
  13272. adcq $0x00, %r8
  13273. # A[5] * B[2]
  13274. movq 16(%rcx), %rax
  13275. mulq 40(%rsi)
  13276. addq %rax, %r9
  13277. adcq %rdx, %r10
  13278. adcq $0x00, %r8
  13279. # A[6] * B[1]
  13280. movq 8(%rcx), %rax
  13281. mulq 48(%rsi)
  13282. addq %rax, %r9
  13283. adcq %rdx, %r10
  13284. adcq $0x00, %r8
  13285. # A[7] * B[0]
  13286. movq (%rcx), %rax
  13287. mulq 56(%rsi)
  13288. addq %rax, %r9
  13289. adcq %rdx, %r10
  13290. adcq $0x00, %r8
  13291. movq %r9, 56(%rsp)
  13292. # A[0] * B[8]
  13293. movq 64(%rcx), %rax
  13294. mulq (%rsi)
  13295. xorq %r9, %r9
  13296. addq %rax, %r10
  13297. adcq %rdx, %r8
  13298. adcq $0x00, %r9
  13299. # A[1] * B[7]
  13300. movq 56(%rcx), %rax
  13301. mulq 8(%rsi)
  13302. addq %rax, %r10
  13303. adcq %rdx, %r8
  13304. adcq $0x00, %r9
  13305. # A[2] * B[6]
  13306. movq 48(%rcx), %rax
  13307. mulq 16(%rsi)
  13308. addq %rax, %r10
  13309. adcq %rdx, %r8
  13310. adcq $0x00, %r9
  13311. # A[3] * B[5]
  13312. movq 40(%rcx), %rax
  13313. mulq 24(%rsi)
  13314. addq %rax, %r10
  13315. adcq %rdx, %r8
  13316. adcq $0x00, %r9
  13317. # A[4] * B[4]
  13318. movq 32(%rcx), %rax
  13319. mulq 32(%rsi)
  13320. addq %rax, %r10
  13321. adcq %rdx, %r8
  13322. adcq $0x00, %r9
  13323. # A[5] * B[3]
  13324. movq 24(%rcx), %rax
  13325. mulq 40(%rsi)
  13326. addq %rax, %r10
  13327. adcq %rdx, %r8
  13328. adcq $0x00, %r9
  13329. # A[6] * B[2]
  13330. movq 16(%rcx), %rax
  13331. mulq 48(%rsi)
  13332. addq %rax, %r10
  13333. adcq %rdx, %r8
  13334. adcq $0x00, %r9
  13335. # A[7] * B[1]
  13336. movq 8(%rcx), %rax
  13337. mulq 56(%rsi)
  13338. addq %rax, %r10
  13339. adcq %rdx, %r8
  13340. adcq $0x00, %r9
  13341. # A[8] * B[0]
  13342. movq (%rcx), %rax
  13343. mulq 64(%rsi)
  13344. addq %rax, %r10
  13345. adcq %rdx, %r8
  13346. adcq $0x00, %r9
  13347. movq %r10, 64(%rsp)
  13348. # A[0] * B[9]
  13349. movq 72(%rcx), %rax
  13350. mulq (%rsi)
  13351. xorq %r10, %r10
  13352. addq %rax, %r8
  13353. adcq %rdx, %r9
  13354. adcq $0x00, %r10
  13355. # A[1] * B[8]
  13356. movq 64(%rcx), %rax
  13357. mulq 8(%rsi)
  13358. addq %rax, %r8
  13359. adcq %rdx, %r9
  13360. adcq $0x00, %r10
  13361. # A[2] * B[7]
  13362. movq 56(%rcx), %rax
  13363. mulq 16(%rsi)
  13364. addq %rax, %r8
  13365. adcq %rdx, %r9
  13366. adcq $0x00, %r10
  13367. # A[3] * B[6]
  13368. movq 48(%rcx), %rax
  13369. mulq 24(%rsi)
  13370. addq %rax, %r8
  13371. adcq %rdx, %r9
  13372. adcq $0x00, %r10
  13373. # A[4] * B[5]
  13374. movq 40(%rcx), %rax
  13375. mulq 32(%rsi)
  13376. addq %rax, %r8
  13377. adcq %rdx, %r9
  13378. adcq $0x00, %r10
  13379. # A[5] * B[4]
  13380. movq 32(%rcx), %rax
  13381. mulq 40(%rsi)
  13382. addq %rax, %r8
  13383. adcq %rdx, %r9
  13384. adcq $0x00, %r10
  13385. # A[6] * B[3]
  13386. movq 24(%rcx), %rax
  13387. mulq 48(%rsi)
  13388. addq %rax, %r8
  13389. adcq %rdx, %r9
  13390. adcq $0x00, %r10
  13391. # A[7] * B[2]
  13392. movq 16(%rcx), %rax
  13393. mulq 56(%rsi)
  13394. addq %rax, %r8
  13395. adcq %rdx, %r9
  13396. adcq $0x00, %r10
  13397. # A[8] * B[1]
  13398. movq 8(%rcx), %rax
  13399. mulq 64(%rsi)
  13400. addq %rax, %r8
  13401. adcq %rdx, %r9
  13402. adcq $0x00, %r10
  13403. # A[9] * B[0]
  13404. movq (%rcx), %rax
  13405. mulq 72(%rsi)
  13406. addq %rax, %r8
  13407. adcq %rdx, %r9
  13408. adcq $0x00, %r10
  13409. movq %r8, 72(%rsp)
  13410. # A[0] * B[10]
  13411. movq 80(%rcx), %rax
  13412. mulq (%rsi)
  13413. xorq %r8, %r8
  13414. addq %rax, %r9
  13415. adcq %rdx, %r10
  13416. adcq $0x00, %r8
  13417. # A[1] * B[9]
  13418. movq 72(%rcx), %rax
  13419. mulq 8(%rsi)
  13420. addq %rax, %r9
  13421. adcq %rdx, %r10
  13422. adcq $0x00, %r8
  13423. # A[2] * B[8]
  13424. movq 64(%rcx), %rax
  13425. mulq 16(%rsi)
  13426. addq %rax, %r9
  13427. adcq %rdx, %r10
  13428. adcq $0x00, %r8
  13429. # A[3] * B[7]
  13430. movq 56(%rcx), %rax
  13431. mulq 24(%rsi)
  13432. addq %rax, %r9
  13433. adcq %rdx, %r10
  13434. adcq $0x00, %r8
  13435. # A[4] * B[6]
  13436. movq 48(%rcx), %rax
  13437. mulq 32(%rsi)
  13438. addq %rax, %r9
  13439. adcq %rdx, %r10
  13440. adcq $0x00, %r8
  13441. # A[5] * B[5]
  13442. movq 40(%rcx), %rax
  13443. mulq 40(%rsi)
  13444. addq %rax, %r9
  13445. adcq %rdx, %r10
  13446. adcq $0x00, %r8
  13447. # A[6] * B[4]
  13448. movq 32(%rcx), %rax
  13449. mulq 48(%rsi)
  13450. addq %rax, %r9
  13451. adcq %rdx, %r10
  13452. adcq $0x00, %r8
  13453. # A[7] * B[3]
  13454. movq 24(%rcx), %rax
  13455. mulq 56(%rsi)
  13456. addq %rax, %r9
  13457. adcq %rdx, %r10
  13458. adcq $0x00, %r8
  13459. # A[8] * B[2]
  13460. movq 16(%rcx), %rax
  13461. mulq 64(%rsi)
  13462. addq %rax, %r9
  13463. adcq %rdx, %r10
  13464. adcq $0x00, %r8
  13465. # A[9] * B[1]
  13466. movq 8(%rcx), %rax
  13467. mulq 72(%rsi)
  13468. addq %rax, %r9
  13469. adcq %rdx, %r10
  13470. adcq $0x00, %r8
  13471. # A[10] * B[0]
  13472. movq (%rcx), %rax
  13473. mulq 80(%rsi)
  13474. addq %rax, %r9
  13475. adcq %rdx, %r10
  13476. adcq $0x00, %r8
  13477. movq %r9, 80(%rsp)
  13478. # A[0] * B[11]
  13479. movq 88(%rcx), %rax
  13480. mulq (%rsi)
  13481. xorq %r9, %r9
  13482. addq %rax, %r10
  13483. adcq %rdx, %r8
  13484. adcq $0x00, %r9
  13485. # A[1] * B[10]
  13486. movq 80(%rcx), %rax
  13487. mulq 8(%rsi)
  13488. addq %rax, %r10
  13489. adcq %rdx, %r8
  13490. adcq $0x00, %r9
  13491. # A[2] * B[9]
  13492. movq 72(%rcx), %rax
  13493. mulq 16(%rsi)
  13494. addq %rax, %r10
  13495. adcq %rdx, %r8
  13496. adcq $0x00, %r9
  13497. # A[3] * B[8]
  13498. movq 64(%rcx), %rax
  13499. mulq 24(%rsi)
  13500. addq %rax, %r10
  13501. adcq %rdx, %r8
  13502. adcq $0x00, %r9
  13503. # A[4] * B[7]
  13504. movq 56(%rcx), %rax
  13505. mulq 32(%rsi)
  13506. addq %rax, %r10
  13507. adcq %rdx, %r8
  13508. adcq $0x00, %r9
  13509. # A[5] * B[6]
  13510. movq 48(%rcx), %rax
  13511. mulq 40(%rsi)
  13512. addq %rax, %r10
  13513. adcq %rdx, %r8
  13514. adcq $0x00, %r9
  13515. # A[6] * B[5]
  13516. movq 40(%rcx), %rax
  13517. mulq 48(%rsi)
  13518. addq %rax, %r10
  13519. adcq %rdx, %r8
  13520. adcq $0x00, %r9
  13521. # A[7] * B[4]
  13522. movq 32(%rcx), %rax
  13523. mulq 56(%rsi)
  13524. addq %rax, %r10
  13525. adcq %rdx, %r8
  13526. adcq $0x00, %r9
  13527. # A[8] * B[3]
  13528. movq 24(%rcx), %rax
  13529. mulq 64(%rsi)
  13530. addq %rax, %r10
  13531. adcq %rdx, %r8
  13532. adcq $0x00, %r9
  13533. # A[9] * B[2]
  13534. movq 16(%rcx), %rax
  13535. mulq 72(%rsi)
  13536. addq %rax, %r10
  13537. adcq %rdx, %r8
  13538. adcq $0x00, %r9
  13539. # A[10] * B[1]
  13540. movq 8(%rcx), %rax
  13541. mulq 80(%rsi)
  13542. addq %rax, %r10
  13543. adcq %rdx, %r8
  13544. adcq $0x00, %r9
  13545. # A[11] * B[0]
  13546. movq (%rcx), %rax
  13547. mulq 88(%rsi)
  13548. addq %rax, %r10
  13549. adcq %rdx, %r8
  13550. adcq $0x00, %r9
  13551. movq %r10, 88(%rsp)
  13552. # A[1] * B[11]
  13553. movq 88(%rcx), %rax
  13554. mulq 8(%rsi)
  13555. xorq %r10, %r10
  13556. addq %rax, %r8
  13557. adcq %rdx, %r9
  13558. adcq $0x00, %r10
  13559. # A[2] * B[10]
  13560. movq 80(%rcx), %rax
  13561. mulq 16(%rsi)
  13562. addq %rax, %r8
  13563. adcq %rdx, %r9
  13564. adcq $0x00, %r10
  13565. # A[3] * B[9]
  13566. movq 72(%rcx), %rax
  13567. mulq 24(%rsi)
  13568. addq %rax, %r8
  13569. adcq %rdx, %r9
  13570. adcq $0x00, %r10
  13571. # A[4] * B[8]
  13572. movq 64(%rcx), %rax
  13573. mulq 32(%rsi)
  13574. addq %rax, %r8
  13575. adcq %rdx, %r9
  13576. adcq $0x00, %r10
  13577. # A[5] * B[7]
  13578. movq 56(%rcx), %rax
  13579. mulq 40(%rsi)
  13580. addq %rax, %r8
  13581. adcq %rdx, %r9
  13582. adcq $0x00, %r10
  13583. # A[6] * B[6]
  13584. movq 48(%rcx), %rax
  13585. mulq 48(%rsi)
  13586. addq %rax, %r8
  13587. adcq %rdx, %r9
  13588. adcq $0x00, %r10
  13589. # A[7] * B[5]
  13590. movq 40(%rcx), %rax
  13591. mulq 56(%rsi)
  13592. addq %rax, %r8
  13593. adcq %rdx, %r9
  13594. adcq $0x00, %r10
  13595. # A[8] * B[4]
  13596. movq 32(%rcx), %rax
  13597. mulq 64(%rsi)
  13598. addq %rax, %r8
  13599. adcq %rdx, %r9
  13600. adcq $0x00, %r10
  13601. # A[9] * B[3]
  13602. movq 24(%rcx), %rax
  13603. mulq 72(%rsi)
  13604. addq %rax, %r8
  13605. adcq %rdx, %r9
  13606. adcq $0x00, %r10
  13607. # A[10] * B[2]
  13608. movq 16(%rcx), %rax
  13609. mulq 80(%rsi)
  13610. addq %rax, %r8
  13611. adcq %rdx, %r9
  13612. adcq $0x00, %r10
  13613. # A[11] * B[1]
  13614. movq 8(%rcx), %rax
  13615. mulq 88(%rsi)
  13616. addq %rax, %r8
  13617. adcq %rdx, %r9
  13618. adcq $0x00, %r10
  13619. movq %r8, 96(%rdi)
  13620. # A[2] * B[11]
  13621. movq 88(%rcx), %rax
  13622. mulq 16(%rsi)
  13623. xorq %r8, %r8
  13624. addq %rax, %r9
  13625. adcq %rdx, %r10
  13626. adcq $0x00, %r8
  13627. # A[3] * B[10]
  13628. movq 80(%rcx), %rax
  13629. mulq 24(%rsi)
  13630. addq %rax, %r9
  13631. adcq %rdx, %r10
  13632. adcq $0x00, %r8
  13633. # A[4] * B[9]
  13634. movq 72(%rcx), %rax
  13635. mulq 32(%rsi)
  13636. addq %rax, %r9
  13637. adcq %rdx, %r10
  13638. adcq $0x00, %r8
  13639. # A[5] * B[8]
  13640. movq 64(%rcx), %rax
  13641. mulq 40(%rsi)
  13642. addq %rax, %r9
  13643. adcq %rdx, %r10
  13644. adcq $0x00, %r8
  13645. # A[6] * B[7]
  13646. movq 56(%rcx), %rax
  13647. mulq 48(%rsi)
  13648. addq %rax, %r9
  13649. adcq %rdx, %r10
  13650. adcq $0x00, %r8
  13651. # A[7] * B[6]
  13652. movq 48(%rcx), %rax
  13653. mulq 56(%rsi)
  13654. addq %rax, %r9
  13655. adcq %rdx, %r10
  13656. adcq $0x00, %r8
  13657. # A[8] * B[5]
  13658. movq 40(%rcx), %rax
  13659. mulq 64(%rsi)
  13660. addq %rax, %r9
  13661. adcq %rdx, %r10
  13662. adcq $0x00, %r8
  13663. # A[9] * B[4]
  13664. movq 32(%rcx), %rax
  13665. mulq 72(%rsi)
  13666. addq %rax, %r9
  13667. adcq %rdx, %r10
  13668. adcq $0x00, %r8
  13669. # A[10] * B[3]
  13670. movq 24(%rcx), %rax
  13671. mulq 80(%rsi)
  13672. addq %rax, %r9
  13673. adcq %rdx, %r10
  13674. adcq $0x00, %r8
  13675. # A[11] * B[2]
  13676. movq 16(%rcx), %rax
  13677. mulq 88(%rsi)
  13678. addq %rax, %r9
  13679. adcq %rdx, %r10
  13680. adcq $0x00, %r8
  13681. movq %r9, 104(%rdi)
  13682. # A[3] * B[11]
  13683. movq 88(%rcx), %rax
  13684. mulq 24(%rsi)
  13685. xorq %r9, %r9
  13686. addq %rax, %r10
  13687. adcq %rdx, %r8
  13688. adcq $0x00, %r9
  13689. # A[4] * B[10]
  13690. movq 80(%rcx), %rax
  13691. mulq 32(%rsi)
  13692. addq %rax, %r10
  13693. adcq %rdx, %r8
  13694. adcq $0x00, %r9
  13695. # A[5] * B[9]
  13696. movq 72(%rcx), %rax
  13697. mulq 40(%rsi)
  13698. addq %rax, %r10
  13699. adcq %rdx, %r8
  13700. adcq $0x00, %r9
  13701. # A[6] * B[8]
  13702. movq 64(%rcx), %rax
  13703. mulq 48(%rsi)
  13704. addq %rax, %r10
  13705. adcq %rdx, %r8
  13706. adcq $0x00, %r9
  13707. # A[7] * B[7]
  13708. movq 56(%rcx), %rax
  13709. mulq 56(%rsi)
  13710. addq %rax, %r10
  13711. adcq %rdx, %r8
  13712. adcq $0x00, %r9
  13713. # A[8] * B[6]
  13714. movq 48(%rcx), %rax
  13715. mulq 64(%rsi)
  13716. addq %rax, %r10
  13717. adcq %rdx, %r8
  13718. adcq $0x00, %r9
  13719. # A[9] * B[5]
  13720. movq 40(%rcx), %rax
  13721. mulq 72(%rsi)
  13722. addq %rax, %r10
  13723. adcq %rdx, %r8
  13724. adcq $0x00, %r9
  13725. # A[10] * B[4]
  13726. movq 32(%rcx), %rax
  13727. mulq 80(%rsi)
  13728. addq %rax, %r10
  13729. adcq %rdx, %r8
  13730. adcq $0x00, %r9
  13731. # A[11] * B[3]
  13732. movq 24(%rcx), %rax
  13733. mulq 88(%rsi)
  13734. addq %rax, %r10
  13735. adcq %rdx, %r8
  13736. adcq $0x00, %r9
  13737. movq %r10, 112(%rdi)
  13738. # A[4] * B[11]
  13739. movq 88(%rcx), %rax
  13740. mulq 32(%rsi)
  13741. xorq %r10, %r10
  13742. addq %rax, %r8
  13743. adcq %rdx, %r9
  13744. adcq $0x00, %r10
  13745. # A[5] * B[10]
  13746. movq 80(%rcx), %rax
  13747. mulq 40(%rsi)
  13748. addq %rax, %r8
  13749. adcq %rdx, %r9
  13750. adcq $0x00, %r10
  13751. # A[6] * B[9]
  13752. movq 72(%rcx), %rax
  13753. mulq 48(%rsi)
  13754. addq %rax, %r8
  13755. adcq %rdx, %r9
  13756. adcq $0x00, %r10
  13757. # A[7] * B[8]
  13758. movq 64(%rcx), %rax
  13759. mulq 56(%rsi)
  13760. addq %rax, %r8
  13761. adcq %rdx, %r9
  13762. adcq $0x00, %r10
  13763. # A[8] * B[7]
  13764. movq 56(%rcx), %rax
  13765. mulq 64(%rsi)
  13766. addq %rax, %r8
  13767. adcq %rdx, %r9
  13768. adcq $0x00, %r10
  13769. # A[9] * B[6]
  13770. movq 48(%rcx), %rax
  13771. mulq 72(%rsi)
  13772. addq %rax, %r8
  13773. adcq %rdx, %r9
  13774. adcq $0x00, %r10
  13775. # A[10] * B[5]
  13776. movq 40(%rcx), %rax
  13777. mulq 80(%rsi)
  13778. addq %rax, %r8
  13779. adcq %rdx, %r9
  13780. adcq $0x00, %r10
  13781. # A[11] * B[4]
  13782. movq 32(%rcx), %rax
  13783. mulq 88(%rsi)
  13784. addq %rax, %r8
  13785. adcq %rdx, %r9
  13786. adcq $0x00, %r10
  13787. movq %r8, 120(%rdi)
  13788. # A[5] * B[11]
  13789. movq 88(%rcx), %rax
  13790. mulq 40(%rsi)
  13791. xorq %r8, %r8
  13792. addq %rax, %r9
  13793. adcq %rdx, %r10
  13794. adcq $0x00, %r8
  13795. # A[6] * B[10]
  13796. movq 80(%rcx), %rax
  13797. mulq 48(%rsi)
  13798. addq %rax, %r9
  13799. adcq %rdx, %r10
  13800. adcq $0x00, %r8
  13801. # A[7] * B[9]
  13802. movq 72(%rcx), %rax
  13803. mulq 56(%rsi)
  13804. addq %rax, %r9
  13805. adcq %rdx, %r10
  13806. adcq $0x00, %r8
  13807. # A[8] * B[8]
  13808. movq 64(%rcx), %rax
  13809. mulq 64(%rsi)
  13810. addq %rax, %r9
  13811. adcq %rdx, %r10
  13812. adcq $0x00, %r8
  13813. # A[9] * B[7]
  13814. movq 56(%rcx), %rax
  13815. mulq 72(%rsi)
  13816. addq %rax, %r9
  13817. adcq %rdx, %r10
  13818. adcq $0x00, %r8
  13819. # A[10] * B[6]
  13820. movq 48(%rcx), %rax
  13821. mulq 80(%rsi)
  13822. addq %rax, %r9
  13823. adcq %rdx, %r10
  13824. adcq $0x00, %r8
  13825. # A[11] * B[5]
  13826. movq 40(%rcx), %rax
  13827. mulq 88(%rsi)
  13828. addq %rax, %r9
  13829. adcq %rdx, %r10
  13830. adcq $0x00, %r8
  13831. movq %r9, 128(%rdi)
  13832. # A[6] * B[11]
  13833. movq 88(%rcx), %rax
  13834. mulq 48(%rsi)
  13835. xorq %r9, %r9
  13836. addq %rax, %r10
  13837. adcq %rdx, %r8
  13838. adcq $0x00, %r9
  13839. # A[7] * B[10]
  13840. movq 80(%rcx), %rax
  13841. mulq 56(%rsi)
  13842. addq %rax, %r10
  13843. adcq %rdx, %r8
  13844. adcq $0x00, %r9
  13845. # A[8] * B[9]
  13846. movq 72(%rcx), %rax
  13847. mulq 64(%rsi)
  13848. addq %rax, %r10
  13849. adcq %rdx, %r8
  13850. adcq $0x00, %r9
  13851. # A[9] * B[8]
  13852. movq 64(%rcx), %rax
  13853. mulq 72(%rsi)
  13854. addq %rax, %r10
  13855. adcq %rdx, %r8
  13856. adcq $0x00, %r9
  13857. # A[10] * B[7]
  13858. movq 56(%rcx), %rax
  13859. mulq 80(%rsi)
  13860. addq %rax, %r10
  13861. adcq %rdx, %r8
  13862. adcq $0x00, %r9
  13863. # A[11] * B[6]
  13864. movq 48(%rcx), %rax
  13865. mulq 88(%rsi)
  13866. addq %rax, %r10
  13867. adcq %rdx, %r8
  13868. adcq $0x00, %r9
  13869. movq %r10, 136(%rdi)
  13870. # A[7] * B[11]
  13871. movq 88(%rcx), %rax
  13872. mulq 56(%rsi)
  13873. xorq %r10, %r10
  13874. addq %rax, %r8
  13875. adcq %rdx, %r9
  13876. adcq $0x00, %r10
  13877. # A[8] * B[10]
  13878. movq 80(%rcx), %rax
  13879. mulq 64(%rsi)
  13880. addq %rax, %r8
  13881. adcq %rdx, %r9
  13882. adcq $0x00, %r10
  13883. # A[9] * B[9]
  13884. movq 72(%rcx), %rax
  13885. mulq 72(%rsi)
  13886. addq %rax, %r8
  13887. adcq %rdx, %r9
  13888. adcq $0x00, %r10
  13889. # A[10] * B[8]
  13890. movq 64(%rcx), %rax
  13891. mulq 80(%rsi)
  13892. addq %rax, %r8
  13893. adcq %rdx, %r9
  13894. adcq $0x00, %r10
  13895. # A[11] * B[7]
  13896. movq 56(%rcx), %rax
  13897. mulq 88(%rsi)
  13898. addq %rax, %r8
  13899. adcq %rdx, %r9
  13900. adcq $0x00, %r10
  13901. movq %r8, 144(%rdi)
  13902. # A[8] * B[11]
  13903. movq 88(%rcx), %rax
  13904. mulq 64(%rsi)
  13905. xorq %r8, %r8
  13906. addq %rax, %r9
  13907. adcq %rdx, %r10
  13908. adcq $0x00, %r8
  13909. # A[9] * B[10]
  13910. movq 80(%rcx), %rax
  13911. mulq 72(%rsi)
  13912. addq %rax, %r9
  13913. adcq %rdx, %r10
  13914. adcq $0x00, %r8
  13915. # A[10] * B[9]
  13916. movq 72(%rcx), %rax
  13917. mulq 80(%rsi)
  13918. addq %rax, %r9
  13919. adcq %rdx, %r10
  13920. adcq $0x00, %r8
  13921. # A[11] * B[8]
  13922. movq 64(%rcx), %rax
  13923. mulq 88(%rsi)
  13924. addq %rax, %r9
  13925. adcq %rdx, %r10
  13926. adcq $0x00, %r8
  13927. movq %r9, 152(%rdi)
  13928. # A[9] * B[11]
  13929. movq 88(%rcx), %rax
  13930. mulq 72(%rsi)
  13931. xorq %r9, %r9
  13932. addq %rax, %r10
  13933. adcq %rdx, %r8
  13934. adcq $0x00, %r9
  13935. # A[10] * B[10]
  13936. movq 80(%rcx), %rax
  13937. mulq 80(%rsi)
  13938. addq %rax, %r10
  13939. adcq %rdx, %r8
  13940. adcq $0x00, %r9
  13941. # A[11] * B[9]
  13942. movq 72(%rcx), %rax
  13943. mulq 88(%rsi)
  13944. addq %rax, %r10
  13945. adcq %rdx, %r8
  13946. adcq $0x00, %r9
  13947. movq %r10, 160(%rdi)
  13948. # A[10] * B[11]
  13949. movq 88(%rcx), %rax
  13950. mulq 80(%rsi)
  13951. xorq %r10, %r10
  13952. addq %rax, %r8
  13953. adcq %rdx, %r9
  13954. adcq $0x00, %r10
  13955. # A[11] * B[10]
  13956. movq 80(%rcx), %rax
  13957. mulq 88(%rsi)
  13958. addq %rax, %r8
  13959. adcq %rdx, %r9
  13960. adcq $0x00, %r10
  13961. movq %r8, 168(%rdi)
  13962. # A[11] * B[11]
  13963. movq 88(%rcx), %rax
  13964. mulq 88(%rsi)
  13965. addq %rax, %r9
  13966. adcq %rdx, %r10
  13967. movq %r9, 176(%rdi)
  13968. movq %r10, 184(%rdi)
  13969. movq (%rsp), %rax
  13970. movq 8(%rsp), %rdx
  13971. movq 16(%rsp), %r8
  13972. movq 24(%rsp), %r9
  13973. movq %rax, (%rdi)
  13974. movq %rdx, 8(%rdi)
  13975. movq %r8, 16(%rdi)
  13976. movq %r9, 24(%rdi)
  13977. movq 32(%rsp), %rax
  13978. movq 40(%rsp), %rdx
  13979. movq 48(%rsp), %r8
  13980. movq 56(%rsp), %r9
  13981. movq %rax, 32(%rdi)
  13982. movq %rdx, 40(%rdi)
  13983. movq %r8, 48(%rdi)
  13984. movq %r9, 56(%rdi)
  13985. movq 64(%rsp), %rax
  13986. movq 72(%rsp), %rdx
  13987. movq 80(%rsp), %r8
  13988. movq 88(%rsp), %r9
  13989. movq %rax, 64(%rdi)
  13990. movq %rdx, 72(%rdi)
  13991. movq %r8, 80(%rdi)
  13992. movq %r9, 88(%rdi)
  13993. addq $0x60, %rsp
  13994. repz retq
  13995. #ifndef __APPLE__
  13996. .size sp_3072_mul_12,.-sp_3072_mul_12
  13997. #endif /* __APPLE__ */
  13998. /* Square a and put result in r. (r = a * a)
  13999. *
  14000. * r A single precision integer.
  14001. * a A single precision integer.
  14002. */
  14003. #ifndef __APPLE__
  14004. .text
  14005. .globl sp_3072_sqr_12
  14006. .type sp_3072_sqr_12,@function
  14007. .align 16
  14008. sp_3072_sqr_12:
  14009. #else
  14010. .section __TEXT,__text
  14011. .globl _sp_3072_sqr_12
  14012. .p2align 4
  14013. _sp_3072_sqr_12:
  14014. #endif /* __APPLE__ */
  14015. pushq %r12
  14016. subq $0x60, %rsp
  14017. # A[0] * A[0]
  14018. movq (%rsi), %rax
  14019. mulq %rax
  14020. xorq %r9, %r9
  14021. movq %rax, (%rsp)
  14022. movq %rdx, %r8
  14023. # A[0] * A[1]
  14024. movq 8(%rsi), %rax
  14025. mulq (%rsi)
  14026. xorq %rcx, %rcx
  14027. addq %rax, %r8
  14028. adcq %rdx, %r9
  14029. adcq $0x00, %rcx
  14030. addq %rax, %r8
  14031. adcq %rdx, %r9
  14032. adcq $0x00, %rcx
  14033. movq %r8, 8(%rsp)
  14034. # A[0] * A[2]
  14035. movq 16(%rsi), %rax
  14036. mulq (%rsi)
  14037. xorq %r8, %r8
  14038. addq %rax, %r9
  14039. adcq %rdx, %rcx
  14040. adcq $0x00, %r8
  14041. addq %rax, %r9
  14042. adcq %rdx, %rcx
  14043. adcq $0x00, %r8
  14044. # A[1] * A[1]
  14045. movq 8(%rsi), %rax
  14046. mulq %rax
  14047. addq %rax, %r9
  14048. adcq %rdx, %rcx
  14049. adcq $0x00, %r8
  14050. movq %r9, 16(%rsp)
  14051. # A[0] * A[3]
  14052. movq 24(%rsi), %rax
  14053. mulq (%rsi)
  14054. xorq %r9, %r9
  14055. addq %rax, %rcx
  14056. adcq %rdx, %r8
  14057. adcq $0x00, %r9
  14058. addq %rax, %rcx
  14059. adcq %rdx, %r8
  14060. adcq $0x00, %r9
  14061. # A[1] * A[2]
  14062. movq 16(%rsi), %rax
  14063. mulq 8(%rsi)
  14064. addq %rax, %rcx
  14065. adcq %rdx, %r8
  14066. adcq $0x00, %r9
  14067. addq %rax, %rcx
  14068. adcq %rdx, %r8
  14069. adcq $0x00, %r9
  14070. movq %rcx, 24(%rsp)
  14071. # A[0] * A[4]
  14072. movq 32(%rsi), %rax
  14073. mulq (%rsi)
  14074. xorq %rcx, %rcx
  14075. addq %rax, %r8
  14076. adcq %rdx, %r9
  14077. adcq $0x00, %rcx
  14078. addq %rax, %r8
  14079. adcq %rdx, %r9
  14080. adcq $0x00, %rcx
  14081. # A[1] * A[3]
  14082. movq 24(%rsi), %rax
  14083. mulq 8(%rsi)
  14084. addq %rax, %r8
  14085. adcq %rdx, %r9
  14086. adcq $0x00, %rcx
  14087. addq %rax, %r8
  14088. adcq %rdx, %r9
  14089. adcq $0x00, %rcx
  14090. # A[2] * A[2]
  14091. movq 16(%rsi), %rax
  14092. mulq %rax
  14093. addq %rax, %r8
  14094. adcq %rdx, %r9
  14095. adcq $0x00, %rcx
  14096. movq %r8, 32(%rsp)
  14097. # A[0] * A[5]
  14098. movq 40(%rsi), %rax
  14099. mulq (%rsi)
  14100. xorq %r8, %r8
  14101. xorq %r12, %r12
  14102. movq %rax, %r10
  14103. movq %rdx, %r11
  14104. # A[1] * A[4]
  14105. movq 32(%rsi), %rax
  14106. mulq 8(%rsi)
  14107. addq %rax, %r10
  14108. adcq %rdx, %r11
  14109. adcq $0x00, %r12
  14110. # A[2] * A[3]
  14111. movq 24(%rsi), %rax
  14112. mulq 16(%rsi)
  14113. addq %rax, %r10
  14114. adcq %rdx, %r11
  14115. adcq $0x00, %r12
  14116. addq %r10, %r10
  14117. adcq %r11, %r11
  14118. adcq %r12, %r12
  14119. addq %r10, %r9
  14120. adcq %r11, %rcx
  14121. adcq %r12, %r8
  14122. movq %r9, 40(%rsp)
  14123. # A[0] * A[6]
  14124. movq 48(%rsi), %rax
  14125. mulq (%rsi)
  14126. xorq %r9, %r9
  14127. xorq %r12, %r12
  14128. movq %rax, %r10
  14129. movq %rdx, %r11
  14130. # A[1] * A[5]
  14131. movq 40(%rsi), %rax
  14132. mulq 8(%rsi)
  14133. addq %rax, %r10
  14134. adcq %rdx, %r11
  14135. adcq $0x00, %r12
  14136. # A[2] * A[4]
  14137. movq 32(%rsi), %rax
  14138. mulq 16(%rsi)
  14139. addq %rax, %r10
  14140. adcq %rdx, %r11
  14141. adcq $0x00, %r12
  14142. # A[3] * A[3]
  14143. movq 24(%rsi), %rax
  14144. mulq %rax
  14145. addq %r10, %r10
  14146. adcq %r11, %r11
  14147. adcq %r12, %r12
  14148. addq %rax, %r10
  14149. adcq %rdx, %r11
  14150. adcq $0x00, %r12
  14151. addq %r10, %rcx
  14152. adcq %r11, %r8
  14153. adcq %r12, %r9
  14154. movq %rcx, 48(%rsp)
  14155. # A[0] * A[7]
  14156. movq 56(%rsi), %rax
  14157. mulq (%rsi)
  14158. xorq %rcx, %rcx
  14159. xorq %r12, %r12
  14160. movq %rax, %r10
  14161. movq %rdx, %r11
  14162. # A[1] * A[6]
  14163. movq 48(%rsi), %rax
  14164. mulq 8(%rsi)
  14165. addq %rax, %r10
  14166. adcq %rdx, %r11
  14167. adcq $0x00, %r12
  14168. # A[2] * A[5]
  14169. movq 40(%rsi), %rax
  14170. mulq 16(%rsi)
  14171. addq %rax, %r10
  14172. adcq %rdx, %r11
  14173. adcq $0x00, %r12
  14174. # A[3] * A[4]
  14175. movq 32(%rsi), %rax
  14176. mulq 24(%rsi)
  14177. addq %rax, %r10
  14178. adcq %rdx, %r11
  14179. adcq $0x00, %r12
  14180. addq %r10, %r10
  14181. adcq %r11, %r11
  14182. adcq %r12, %r12
  14183. addq %r10, %r8
  14184. adcq %r11, %r9
  14185. adcq %r12, %rcx
  14186. movq %r8, 56(%rsp)
  14187. # A[0] * A[8]
  14188. movq 64(%rsi), %rax
  14189. mulq (%rsi)
  14190. xorq %r8, %r8
  14191. xorq %r12, %r12
  14192. movq %rax, %r10
  14193. movq %rdx, %r11
  14194. # A[1] * A[7]
  14195. movq 56(%rsi), %rax
  14196. mulq 8(%rsi)
  14197. addq %rax, %r10
  14198. adcq %rdx, %r11
  14199. adcq $0x00, %r12
  14200. # A[2] * A[6]
  14201. movq 48(%rsi), %rax
  14202. mulq 16(%rsi)
  14203. addq %rax, %r10
  14204. adcq %rdx, %r11
  14205. adcq $0x00, %r12
  14206. # A[3] * A[5]
  14207. movq 40(%rsi), %rax
  14208. mulq 24(%rsi)
  14209. addq %rax, %r10
  14210. adcq %rdx, %r11
  14211. adcq $0x00, %r12
  14212. # A[4] * A[4]
  14213. movq 32(%rsi), %rax
  14214. mulq %rax
  14215. addq %r10, %r10
  14216. adcq %r11, %r11
  14217. adcq %r12, %r12
  14218. addq %rax, %r10
  14219. adcq %rdx, %r11
  14220. adcq $0x00, %r12
  14221. addq %r10, %r9
  14222. adcq %r11, %rcx
  14223. adcq %r12, %r8
  14224. movq %r9, 64(%rsp)
  14225. # A[0] * A[9]
  14226. movq 72(%rsi), %rax
  14227. mulq (%rsi)
  14228. xorq %r9, %r9
  14229. xorq %r12, %r12
  14230. movq %rax, %r10
  14231. movq %rdx, %r11
  14232. # A[1] * A[8]
  14233. movq 64(%rsi), %rax
  14234. mulq 8(%rsi)
  14235. addq %rax, %r10
  14236. adcq %rdx, %r11
  14237. adcq $0x00, %r12
  14238. # A[2] * A[7]
  14239. movq 56(%rsi), %rax
  14240. mulq 16(%rsi)
  14241. addq %rax, %r10
  14242. adcq %rdx, %r11
  14243. adcq $0x00, %r12
  14244. # A[3] * A[6]
  14245. movq 48(%rsi), %rax
  14246. mulq 24(%rsi)
  14247. addq %rax, %r10
  14248. adcq %rdx, %r11
  14249. adcq $0x00, %r12
  14250. # A[4] * A[5]
  14251. movq 40(%rsi), %rax
  14252. mulq 32(%rsi)
  14253. addq %rax, %r10
  14254. adcq %rdx, %r11
  14255. adcq $0x00, %r12
  14256. addq %r10, %r10
  14257. adcq %r11, %r11
  14258. adcq %r12, %r12
  14259. addq %r10, %rcx
  14260. adcq %r11, %r8
  14261. adcq %r12, %r9
  14262. movq %rcx, 72(%rsp)
  14263. # A[0] * A[10]
  14264. movq 80(%rsi), %rax
  14265. mulq (%rsi)
  14266. xorq %rcx, %rcx
  14267. xorq %r12, %r12
  14268. movq %rax, %r10
  14269. movq %rdx, %r11
  14270. # A[1] * A[9]
  14271. movq 72(%rsi), %rax
  14272. mulq 8(%rsi)
  14273. addq %rax, %r10
  14274. adcq %rdx, %r11
  14275. adcq $0x00, %r12
  14276. # A[2] * A[8]
  14277. movq 64(%rsi), %rax
  14278. mulq 16(%rsi)
  14279. addq %rax, %r10
  14280. adcq %rdx, %r11
  14281. adcq $0x00, %r12
  14282. # A[3] * A[7]
  14283. movq 56(%rsi), %rax
  14284. mulq 24(%rsi)
  14285. addq %rax, %r10
  14286. adcq %rdx, %r11
  14287. adcq $0x00, %r12
  14288. # A[4] * A[6]
  14289. movq 48(%rsi), %rax
  14290. mulq 32(%rsi)
  14291. addq %rax, %r10
  14292. adcq %rdx, %r11
  14293. adcq $0x00, %r12
  14294. # A[5] * A[5]
  14295. movq 40(%rsi), %rax
  14296. mulq %rax
  14297. addq %r10, %r10
  14298. adcq %r11, %r11
  14299. adcq %r12, %r12
  14300. addq %rax, %r10
  14301. adcq %rdx, %r11
  14302. adcq $0x00, %r12
  14303. addq %r10, %r8
  14304. adcq %r11, %r9
  14305. adcq %r12, %rcx
  14306. movq %r8, 80(%rsp)
  14307. # A[0] * A[11]
  14308. movq 88(%rsi), %rax
  14309. mulq (%rsi)
  14310. xorq %r8, %r8
  14311. xorq %r12, %r12
  14312. movq %rax, %r10
  14313. movq %rdx, %r11
  14314. # A[1] * A[10]
  14315. movq 80(%rsi), %rax
  14316. mulq 8(%rsi)
  14317. addq %rax, %r10
  14318. adcq %rdx, %r11
  14319. adcq $0x00, %r12
  14320. # A[2] * A[9]
  14321. movq 72(%rsi), %rax
  14322. mulq 16(%rsi)
  14323. addq %rax, %r10
  14324. adcq %rdx, %r11
  14325. adcq $0x00, %r12
  14326. # A[3] * A[8]
  14327. movq 64(%rsi), %rax
  14328. mulq 24(%rsi)
  14329. addq %rax, %r10
  14330. adcq %rdx, %r11
  14331. adcq $0x00, %r12
  14332. # A[4] * A[7]
  14333. movq 56(%rsi), %rax
  14334. mulq 32(%rsi)
  14335. addq %rax, %r10
  14336. adcq %rdx, %r11
  14337. adcq $0x00, %r12
  14338. # A[5] * A[6]
  14339. movq 48(%rsi), %rax
  14340. mulq 40(%rsi)
  14341. addq %rax, %r10
  14342. adcq %rdx, %r11
  14343. adcq $0x00, %r12
  14344. addq %r10, %r10
  14345. adcq %r11, %r11
  14346. adcq %r12, %r12
  14347. addq %r10, %r9
  14348. adcq %r11, %rcx
  14349. adcq %r12, %r8
  14350. movq %r9, 88(%rsp)
  14351. # A[1] * A[11]
  14352. movq 88(%rsi), %rax
  14353. mulq 8(%rsi)
  14354. xorq %r9, %r9
  14355. xorq %r12, %r12
  14356. movq %rax, %r10
  14357. movq %rdx, %r11
  14358. # A[2] * A[10]
  14359. movq 80(%rsi), %rax
  14360. mulq 16(%rsi)
  14361. addq %rax, %r10
  14362. adcq %rdx, %r11
  14363. adcq $0x00, %r12
  14364. # A[3] * A[9]
  14365. movq 72(%rsi), %rax
  14366. mulq 24(%rsi)
  14367. addq %rax, %r10
  14368. adcq %rdx, %r11
  14369. adcq $0x00, %r12
  14370. # A[4] * A[8]
  14371. movq 64(%rsi), %rax
  14372. mulq 32(%rsi)
  14373. addq %rax, %r10
  14374. adcq %rdx, %r11
  14375. adcq $0x00, %r12
  14376. # A[5] * A[7]
  14377. movq 56(%rsi), %rax
  14378. mulq 40(%rsi)
  14379. addq %rax, %r10
  14380. adcq %rdx, %r11
  14381. adcq $0x00, %r12
  14382. # A[6] * A[6]
  14383. movq 48(%rsi), %rax
  14384. mulq %rax
  14385. addq %r10, %r10
  14386. adcq %r11, %r11
  14387. adcq %r12, %r12
  14388. addq %rax, %r10
  14389. adcq %rdx, %r11
  14390. adcq $0x00, %r12
  14391. addq %r10, %rcx
  14392. adcq %r11, %r8
  14393. adcq %r12, %r9
  14394. movq %rcx, 96(%rdi)
  14395. # A[2] * A[11]
  14396. movq 88(%rsi), %rax
  14397. mulq 16(%rsi)
  14398. xorq %rcx, %rcx
  14399. xorq %r12, %r12
  14400. movq %rax, %r10
  14401. movq %rdx, %r11
  14402. # A[3] * A[10]
  14403. movq 80(%rsi), %rax
  14404. mulq 24(%rsi)
  14405. addq %rax, %r10
  14406. adcq %rdx, %r11
  14407. adcq $0x00, %r12
  14408. # A[4] * A[9]
  14409. movq 72(%rsi), %rax
  14410. mulq 32(%rsi)
  14411. addq %rax, %r10
  14412. adcq %rdx, %r11
  14413. adcq $0x00, %r12
  14414. # A[5] * A[8]
  14415. movq 64(%rsi), %rax
  14416. mulq 40(%rsi)
  14417. addq %rax, %r10
  14418. adcq %rdx, %r11
  14419. adcq $0x00, %r12
  14420. # A[6] * A[7]
  14421. movq 56(%rsi), %rax
  14422. mulq 48(%rsi)
  14423. addq %rax, %r10
  14424. adcq %rdx, %r11
  14425. adcq $0x00, %r12
  14426. addq %r10, %r10
  14427. adcq %r11, %r11
  14428. adcq %r12, %r12
  14429. addq %r10, %r8
  14430. adcq %r11, %r9
  14431. adcq %r12, %rcx
  14432. movq %r8, 104(%rdi)
  14433. # A[3] * A[11]
  14434. movq 88(%rsi), %rax
  14435. mulq 24(%rsi)
  14436. xorq %r8, %r8
  14437. xorq %r12, %r12
  14438. movq %rax, %r10
  14439. movq %rdx, %r11
  14440. # A[4] * A[10]
  14441. movq 80(%rsi), %rax
  14442. mulq 32(%rsi)
  14443. addq %rax, %r10
  14444. adcq %rdx, %r11
  14445. adcq $0x00, %r12
  14446. # A[5] * A[9]
  14447. movq 72(%rsi), %rax
  14448. mulq 40(%rsi)
  14449. addq %rax, %r10
  14450. adcq %rdx, %r11
  14451. adcq $0x00, %r12
  14452. # A[6] * A[8]
  14453. movq 64(%rsi), %rax
  14454. mulq 48(%rsi)
  14455. addq %rax, %r10
  14456. adcq %rdx, %r11
  14457. adcq $0x00, %r12
  14458. # A[7] * A[7]
  14459. movq 56(%rsi), %rax
  14460. mulq %rax
  14461. addq %r10, %r10
  14462. adcq %r11, %r11
  14463. adcq %r12, %r12
  14464. addq %rax, %r10
  14465. adcq %rdx, %r11
  14466. adcq $0x00, %r12
  14467. addq %r10, %r9
  14468. adcq %r11, %rcx
  14469. adcq %r12, %r8
  14470. movq %r9, 112(%rdi)
  14471. # A[4] * A[11]
  14472. movq 88(%rsi), %rax
  14473. mulq 32(%rsi)
  14474. xorq %r9, %r9
  14475. xorq %r12, %r12
  14476. movq %rax, %r10
  14477. movq %rdx, %r11
  14478. # A[5] * A[10]
  14479. movq 80(%rsi), %rax
  14480. mulq 40(%rsi)
  14481. addq %rax, %r10
  14482. adcq %rdx, %r11
  14483. adcq $0x00, %r12
  14484. # A[6] * A[9]
  14485. movq 72(%rsi), %rax
  14486. mulq 48(%rsi)
  14487. addq %rax, %r10
  14488. adcq %rdx, %r11
  14489. adcq $0x00, %r12
  14490. # A[7] * A[8]
  14491. movq 64(%rsi), %rax
  14492. mulq 56(%rsi)
  14493. addq %rax, %r10
  14494. adcq %rdx, %r11
  14495. adcq $0x00, %r12
  14496. addq %r10, %r10
  14497. adcq %r11, %r11
  14498. adcq %r12, %r12
  14499. addq %r10, %rcx
  14500. adcq %r11, %r8
  14501. adcq %r12, %r9
  14502. movq %rcx, 120(%rdi)
  14503. # A[5] * A[11]
  14504. movq 88(%rsi), %rax
  14505. mulq 40(%rsi)
  14506. xorq %rcx, %rcx
  14507. xorq %r12, %r12
  14508. movq %rax, %r10
  14509. movq %rdx, %r11
  14510. # A[6] * A[10]
  14511. movq 80(%rsi), %rax
  14512. mulq 48(%rsi)
  14513. addq %rax, %r10
  14514. adcq %rdx, %r11
  14515. adcq $0x00, %r12
  14516. # A[7] * A[9]
  14517. movq 72(%rsi), %rax
  14518. mulq 56(%rsi)
  14519. addq %rax, %r10
  14520. adcq %rdx, %r11
  14521. adcq $0x00, %r12
  14522. # A[8] * A[8]
  14523. movq 64(%rsi), %rax
  14524. mulq %rax
  14525. addq %r10, %r10
  14526. adcq %r11, %r11
  14527. adcq %r12, %r12
  14528. addq %rax, %r10
  14529. adcq %rdx, %r11
  14530. adcq $0x00, %r12
  14531. addq %r10, %r8
  14532. adcq %r11, %r9
  14533. adcq %r12, %rcx
  14534. movq %r8, 128(%rdi)
  14535. # A[6] * A[11]
  14536. movq 88(%rsi), %rax
  14537. mulq 48(%rsi)
  14538. xorq %r8, %r8
  14539. xorq %r12, %r12
  14540. movq %rax, %r10
  14541. movq %rdx, %r11
  14542. # A[7] * A[10]
  14543. movq 80(%rsi), %rax
  14544. mulq 56(%rsi)
  14545. addq %rax, %r10
  14546. adcq %rdx, %r11
  14547. adcq $0x00, %r12
  14548. # A[8] * A[9]
  14549. movq 72(%rsi), %rax
  14550. mulq 64(%rsi)
  14551. addq %rax, %r10
  14552. adcq %rdx, %r11
  14553. adcq $0x00, %r12
  14554. addq %r10, %r10
  14555. adcq %r11, %r11
  14556. adcq %r12, %r12
  14557. addq %r10, %r9
  14558. adcq %r11, %rcx
  14559. adcq %r12, %r8
  14560. movq %r9, 136(%rdi)
  14561. # A[7] * A[11]
  14562. movq 88(%rsi), %rax
  14563. mulq 56(%rsi)
  14564. xorq %r9, %r9
  14565. addq %rax, %rcx
  14566. adcq %rdx, %r8
  14567. adcq $0x00, %r9
  14568. addq %rax, %rcx
  14569. adcq %rdx, %r8
  14570. adcq $0x00, %r9
  14571. # A[8] * A[10]
  14572. movq 80(%rsi), %rax
  14573. mulq 64(%rsi)
  14574. addq %rax, %rcx
  14575. adcq %rdx, %r8
  14576. adcq $0x00, %r9
  14577. addq %rax, %rcx
  14578. adcq %rdx, %r8
  14579. adcq $0x00, %r9
  14580. # A[9] * A[9]
  14581. movq 72(%rsi), %rax
  14582. mulq %rax
  14583. addq %rax, %rcx
  14584. adcq %rdx, %r8
  14585. adcq $0x00, %r9
  14586. movq %rcx, 144(%rdi)
  14587. # A[8] * A[11]
  14588. movq 88(%rsi), %rax
  14589. mulq 64(%rsi)
  14590. xorq %rcx, %rcx
  14591. addq %rax, %r8
  14592. adcq %rdx, %r9
  14593. adcq $0x00, %rcx
  14594. addq %rax, %r8
  14595. adcq %rdx, %r9
  14596. adcq $0x00, %rcx
  14597. # A[9] * A[10]
  14598. movq 80(%rsi), %rax
  14599. mulq 72(%rsi)
  14600. addq %rax, %r8
  14601. adcq %rdx, %r9
  14602. adcq $0x00, %rcx
  14603. addq %rax, %r8
  14604. adcq %rdx, %r9
  14605. adcq $0x00, %rcx
  14606. movq %r8, 152(%rdi)
  14607. # A[9] * A[11]
  14608. movq 88(%rsi), %rax
  14609. mulq 72(%rsi)
  14610. xorq %r8, %r8
  14611. addq %rax, %r9
  14612. adcq %rdx, %rcx
  14613. adcq $0x00, %r8
  14614. addq %rax, %r9
  14615. adcq %rdx, %rcx
  14616. adcq $0x00, %r8
  14617. # A[10] * A[10]
  14618. movq 80(%rsi), %rax
  14619. mulq %rax
  14620. addq %rax, %r9
  14621. adcq %rdx, %rcx
  14622. adcq $0x00, %r8
  14623. movq %r9, 160(%rdi)
  14624. # A[10] * A[11]
  14625. movq 88(%rsi), %rax
  14626. mulq 80(%rsi)
  14627. xorq %r9, %r9
  14628. addq %rax, %rcx
  14629. adcq %rdx, %r8
  14630. adcq $0x00, %r9
  14631. addq %rax, %rcx
  14632. adcq %rdx, %r8
  14633. adcq $0x00, %r9
  14634. movq %rcx, 168(%rdi)
  14635. # A[11] * A[11]
  14636. movq 88(%rsi), %rax
  14637. mulq %rax
  14638. addq %rax, %r8
  14639. adcq %rdx, %r9
  14640. movq %r8, 176(%rdi)
  14641. movq %r9, 184(%rdi)
  14642. movq (%rsp), %rax
  14643. movq 8(%rsp), %rdx
  14644. movq 16(%rsp), %r10
  14645. movq 24(%rsp), %r11
  14646. movq %rax, (%rdi)
  14647. movq %rdx, 8(%rdi)
  14648. movq %r10, 16(%rdi)
  14649. movq %r11, 24(%rdi)
  14650. movq 32(%rsp), %rax
  14651. movq 40(%rsp), %rdx
  14652. movq 48(%rsp), %r10
  14653. movq 56(%rsp), %r11
  14654. movq %rax, 32(%rdi)
  14655. movq %rdx, 40(%rdi)
  14656. movq %r10, 48(%rdi)
  14657. movq %r11, 56(%rdi)
  14658. movq 64(%rsp), %rax
  14659. movq 72(%rsp), %rdx
  14660. movq 80(%rsp), %r10
  14661. movq 88(%rsp), %r11
  14662. movq %rax, 64(%rdi)
  14663. movq %rdx, 72(%rdi)
  14664. movq %r10, 80(%rdi)
  14665. movq %r11, 88(%rdi)
  14666. addq $0x60, %rsp
  14667. popq %r12
  14668. repz retq
  14669. #ifndef __APPLE__
  14670. .size sp_3072_sqr_12,.-sp_3072_sqr_12
  14671. #endif /* __APPLE__ */
  14672. #ifdef HAVE_INTEL_AVX2
  14673. /* Multiply a and b into r. (r = a * b)
  14674. *
  14675. * r Result of multiplication.
  14676. * a First number to multiply.
  14677. * b Second number to multiply.
  14678. */
  14679. #ifndef __APPLE__
  14680. .text
  14681. .globl sp_3072_mul_avx2_12
  14682. .type sp_3072_mul_avx2_12,@function
  14683. .align 16
  14684. sp_3072_mul_avx2_12:
  14685. #else
  14686. .section __TEXT,__text
  14687. .globl _sp_3072_mul_avx2_12
  14688. .p2align 4
  14689. _sp_3072_mul_avx2_12:
  14690. #endif /* __APPLE__ */
  14691. pushq %rbx
  14692. pushq %rbp
  14693. pushq %r12
  14694. movq %rdx, %rbp
  14695. subq $0x60, %rsp
  14696. cmpq %rdi, %rsi
  14697. movq %rsp, %rbx
  14698. cmovne %rdi, %rbx
  14699. cmpq %rdi, %rbp
  14700. cmove %rsp, %rbx
  14701. xorq %r12, %r12
  14702. movq (%rsi), %rdx
  14703. # A[0] * B[0]
  14704. mulx (%rbp), %r8, %r9
  14705. # A[0] * B[1]
  14706. mulx 8(%rbp), %rax, %r10
  14707. movq %r8, (%rbx)
  14708. adcxq %rax, %r9
  14709. movq %r9, 8(%rbx)
  14710. # A[0] * B[2]
  14711. mulx 16(%rbp), %rax, %r8
  14712. adcxq %rax, %r10
  14713. # A[0] * B[3]
  14714. mulx 24(%rbp), %rax, %r9
  14715. movq %r10, 16(%rbx)
  14716. adcxq %rax, %r8
  14717. movq %r8, 24(%rbx)
  14718. # A[0] * B[4]
  14719. mulx 32(%rbp), %rax, %r10
  14720. adcxq %rax, %r9
  14721. # A[0] * B[5]
  14722. mulx 40(%rbp), %rax, %r8
  14723. movq %r9, 32(%rbx)
  14724. adcxq %rax, %r10
  14725. movq %r10, 40(%rbx)
  14726. # A[0] * B[6]
  14727. mulx 48(%rbp), %rax, %r9
  14728. adcxq %rax, %r8
  14729. # A[0] * B[7]
  14730. mulx 56(%rbp), %rax, %r10
  14731. movq %r8, 48(%rbx)
  14732. adcxq %rax, %r9
  14733. movq %r9, 56(%rbx)
  14734. # A[0] * B[8]
  14735. mulx 64(%rbp), %rax, %r8
  14736. adcxq %rax, %r10
  14737. # A[0] * B[9]
  14738. mulx 72(%rbp), %rax, %r9
  14739. movq %r10, 64(%rbx)
  14740. adcxq %rax, %r8
  14741. movq %r8, 72(%rbx)
  14742. # A[0] * B[10]
  14743. mulx 80(%rbp), %rax, %r10
  14744. adcxq %rax, %r9
  14745. # A[0] * B[11]
  14746. mulx 88(%rbp), %rax, %r8
  14747. movq %r9, 80(%rbx)
  14748. adcxq %rax, %r10
  14749. adcxq %r12, %r8
  14750. movq %r12, %r11
  14751. adcxq %r12, %r11
  14752. movq %r10, 88(%rbx)
  14753. movq %r8, 96(%rdi)
  14754. movq 8(%rsi), %rdx
  14755. movq 8(%rbx), %r9
  14756. movq 16(%rbx), %r10
  14757. movq 24(%rbx), %r8
  14758. # A[1] * B[0]
  14759. mulx (%rbp), %rax, %rcx
  14760. adcxq %rax, %r9
  14761. adoxq %rcx, %r10
  14762. # A[1] * B[1]
  14763. mulx 8(%rbp), %rax, %rcx
  14764. movq %r9, 8(%rbx)
  14765. adcxq %rax, %r10
  14766. adoxq %rcx, %r8
  14767. movq %r10, 16(%rbx)
  14768. movq 32(%rbx), %r9
  14769. movq 40(%rbx), %r10
  14770. # A[1] * B[2]
  14771. mulx 16(%rbp), %rax, %rcx
  14772. adcxq %rax, %r8
  14773. adoxq %rcx, %r9
  14774. # A[1] * B[3]
  14775. mulx 24(%rbp), %rax, %rcx
  14776. movq %r8, 24(%rbx)
  14777. adcxq %rax, %r9
  14778. adoxq %rcx, %r10
  14779. movq %r9, 32(%rbx)
  14780. movq 48(%rbx), %r8
  14781. movq 56(%rbx), %r9
  14782. # A[1] * B[4]
  14783. mulx 32(%rbp), %rax, %rcx
  14784. adcxq %rax, %r10
  14785. adoxq %rcx, %r8
  14786. # A[1] * B[5]
  14787. mulx 40(%rbp), %rax, %rcx
  14788. movq %r10, 40(%rbx)
  14789. adcxq %rax, %r8
  14790. adoxq %rcx, %r9
  14791. movq %r8, 48(%rbx)
  14792. movq 64(%rbx), %r10
  14793. movq 72(%rbx), %r8
  14794. # A[1] * B[6]
  14795. mulx 48(%rbp), %rax, %rcx
  14796. adcxq %rax, %r9
  14797. adoxq %rcx, %r10
  14798. # A[1] * B[7]
  14799. mulx 56(%rbp), %rax, %rcx
  14800. movq %r9, 56(%rbx)
  14801. adcxq %rax, %r10
  14802. adoxq %rcx, %r8
  14803. movq %r10, 64(%rbx)
  14804. movq 80(%rbx), %r9
  14805. movq 88(%rbx), %r10
  14806. # A[1] * B[8]
  14807. mulx 64(%rbp), %rax, %rcx
  14808. adcxq %rax, %r8
  14809. adoxq %rcx, %r9
  14810. # A[1] * B[9]
  14811. mulx 72(%rbp), %rax, %rcx
  14812. movq %r8, 72(%rbx)
  14813. adcxq %rax, %r9
  14814. adoxq %rcx, %r10
  14815. movq %r9, 80(%rbx)
  14816. movq 96(%rdi), %r8
  14817. # A[1] * B[10]
  14818. mulx 80(%rbp), %rax, %rcx
  14819. adcxq %rax, %r10
  14820. adoxq %rcx, %r8
  14821. # A[1] * B[11]
  14822. mulx 88(%rbp), %rax, %rcx
  14823. movq %r10, 88(%rbx)
  14824. movq %r12, %r9
  14825. adcxq %rax, %r8
  14826. adoxq %rcx, %r9
  14827. adcxq %r11, %r9
  14828. movq %r12, %r11
  14829. adoxq %r12, %r11
  14830. adcxq %r12, %r11
  14831. movq %r8, 96(%rdi)
  14832. movq %r9, 104(%rdi)
  14833. movq 16(%rsi), %rdx
  14834. movq 16(%rbx), %r10
  14835. movq 24(%rbx), %r8
  14836. movq 32(%rbx), %r9
  14837. # A[2] * B[0]
  14838. mulx (%rbp), %rax, %rcx
  14839. adcxq %rax, %r10
  14840. adoxq %rcx, %r8
  14841. # A[2] * B[1]
  14842. mulx 8(%rbp), %rax, %rcx
  14843. movq %r10, 16(%rbx)
  14844. adcxq %rax, %r8
  14845. adoxq %rcx, %r9
  14846. movq %r8, 24(%rbx)
  14847. movq 40(%rbx), %r10
  14848. movq 48(%rbx), %r8
  14849. # A[2] * B[2]
  14850. mulx 16(%rbp), %rax, %rcx
  14851. adcxq %rax, %r9
  14852. adoxq %rcx, %r10
  14853. # A[2] * B[3]
  14854. mulx 24(%rbp), %rax, %rcx
  14855. movq %r9, 32(%rbx)
  14856. adcxq %rax, %r10
  14857. adoxq %rcx, %r8
  14858. movq %r10, 40(%rbx)
  14859. movq 56(%rbx), %r9
  14860. movq 64(%rbx), %r10
  14861. # A[2] * B[4]
  14862. mulx 32(%rbp), %rax, %rcx
  14863. adcxq %rax, %r8
  14864. adoxq %rcx, %r9
  14865. # A[2] * B[5]
  14866. mulx 40(%rbp), %rax, %rcx
  14867. movq %r8, 48(%rbx)
  14868. adcxq %rax, %r9
  14869. adoxq %rcx, %r10
  14870. movq %r9, 56(%rbx)
  14871. movq 72(%rbx), %r8
  14872. movq 80(%rbx), %r9
  14873. # A[2] * B[6]
  14874. mulx 48(%rbp), %rax, %rcx
  14875. adcxq %rax, %r10
  14876. adoxq %rcx, %r8
  14877. # A[2] * B[7]
  14878. mulx 56(%rbp), %rax, %rcx
  14879. movq %r10, 64(%rbx)
  14880. adcxq %rax, %r8
  14881. adoxq %rcx, %r9
  14882. movq %r8, 72(%rbx)
  14883. movq 88(%rbx), %r10
  14884. movq 96(%rdi), %r8
  14885. # A[2] * B[8]
  14886. mulx 64(%rbp), %rax, %rcx
  14887. adcxq %rax, %r9
  14888. adoxq %rcx, %r10
  14889. # A[2] * B[9]
  14890. mulx 72(%rbp), %rax, %rcx
  14891. movq %r9, 80(%rbx)
  14892. adcxq %rax, %r10
  14893. adoxq %rcx, %r8
  14894. movq %r10, 88(%rbx)
  14895. movq 104(%rdi), %r9
  14896. # A[2] * B[10]
  14897. mulx 80(%rbp), %rax, %rcx
  14898. adcxq %rax, %r8
  14899. adoxq %rcx, %r9
  14900. # A[2] * B[11]
  14901. mulx 88(%rbp), %rax, %rcx
  14902. movq %r8, 96(%rdi)
  14903. movq %r12, %r10
  14904. adcxq %rax, %r9
  14905. adoxq %rcx, %r10
  14906. adcxq %r11, %r10
  14907. movq %r12, %r11
  14908. adoxq %r12, %r11
  14909. adcxq %r12, %r11
  14910. movq %r9, 104(%rdi)
  14911. movq %r10, 112(%rdi)
  14912. movq 24(%rsi), %rdx
  14913. movq 24(%rbx), %r8
  14914. movq 32(%rbx), %r9
  14915. movq 40(%rbx), %r10
  14916. # A[3] * B[0]
  14917. mulx (%rbp), %rax, %rcx
  14918. adcxq %rax, %r8
  14919. adoxq %rcx, %r9
  14920. # A[3] * B[1]
  14921. mulx 8(%rbp), %rax, %rcx
  14922. movq %r8, 24(%rbx)
  14923. adcxq %rax, %r9
  14924. adoxq %rcx, %r10
  14925. movq %r9, 32(%rbx)
  14926. movq 48(%rbx), %r8
  14927. movq 56(%rbx), %r9
  14928. # A[3] * B[2]
  14929. mulx 16(%rbp), %rax, %rcx
  14930. adcxq %rax, %r10
  14931. adoxq %rcx, %r8
  14932. # A[3] * B[3]
  14933. mulx 24(%rbp), %rax, %rcx
  14934. movq %r10, 40(%rbx)
  14935. adcxq %rax, %r8
  14936. adoxq %rcx, %r9
  14937. movq %r8, 48(%rbx)
  14938. movq 64(%rbx), %r10
  14939. movq 72(%rbx), %r8
  14940. # A[3] * B[4]
  14941. mulx 32(%rbp), %rax, %rcx
  14942. adcxq %rax, %r9
  14943. adoxq %rcx, %r10
  14944. # A[3] * B[5]
  14945. mulx 40(%rbp), %rax, %rcx
  14946. movq %r9, 56(%rbx)
  14947. adcxq %rax, %r10
  14948. adoxq %rcx, %r8
  14949. movq %r10, 64(%rbx)
  14950. movq 80(%rbx), %r9
  14951. movq 88(%rbx), %r10
  14952. # A[3] * B[6]
  14953. mulx 48(%rbp), %rax, %rcx
  14954. adcxq %rax, %r8
  14955. adoxq %rcx, %r9
  14956. # A[3] * B[7]
  14957. mulx 56(%rbp), %rax, %rcx
  14958. movq %r8, 72(%rbx)
  14959. adcxq %rax, %r9
  14960. adoxq %rcx, %r10
  14961. movq %r9, 80(%rbx)
  14962. movq 96(%rdi), %r8
  14963. movq 104(%rdi), %r9
  14964. # A[3] * B[8]
  14965. mulx 64(%rbp), %rax, %rcx
  14966. adcxq %rax, %r10
  14967. adoxq %rcx, %r8
  14968. # A[3] * B[9]
  14969. mulx 72(%rbp), %rax, %rcx
  14970. movq %r10, 88(%rbx)
  14971. adcxq %rax, %r8
  14972. adoxq %rcx, %r9
  14973. movq %r8, 96(%rdi)
  14974. movq 112(%rdi), %r10
  14975. # A[3] * B[10]
  14976. mulx 80(%rbp), %rax, %rcx
  14977. adcxq %rax, %r9
  14978. adoxq %rcx, %r10
  14979. # A[3] * B[11]
  14980. mulx 88(%rbp), %rax, %rcx
  14981. movq %r9, 104(%rdi)
  14982. movq %r12, %r8
  14983. adcxq %rax, %r10
  14984. adoxq %rcx, %r8
  14985. adcxq %r11, %r8
  14986. movq %r12, %r11
  14987. adoxq %r12, %r11
  14988. adcxq %r12, %r11
  14989. movq %r10, 112(%rdi)
  14990. movq %r8, 120(%rdi)
  14991. movq 32(%rsi), %rdx
  14992. movq 32(%rbx), %r9
  14993. movq 40(%rbx), %r10
  14994. movq 48(%rbx), %r8
  14995. # A[4] * B[0]
  14996. mulx (%rbp), %rax, %rcx
  14997. adcxq %rax, %r9
  14998. adoxq %rcx, %r10
  14999. # A[4] * B[1]
  15000. mulx 8(%rbp), %rax, %rcx
  15001. movq %r9, 32(%rbx)
  15002. adcxq %rax, %r10
  15003. adoxq %rcx, %r8
  15004. movq %r10, 40(%rbx)
  15005. movq 56(%rbx), %r9
  15006. movq 64(%rbx), %r10
  15007. # A[4] * B[2]
  15008. mulx 16(%rbp), %rax, %rcx
  15009. adcxq %rax, %r8
  15010. adoxq %rcx, %r9
  15011. # A[4] * B[3]
  15012. mulx 24(%rbp), %rax, %rcx
  15013. movq %r8, 48(%rbx)
  15014. adcxq %rax, %r9
  15015. adoxq %rcx, %r10
  15016. movq %r9, 56(%rbx)
  15017. movq 72(%rbx), %r8
  15018. movq 80(%rbx), %r9
  15019. # A[4] * B[4]
  15020. mulx 32(%rbp), %rax, %rcx
  15021. adcxq %rax, %r10
  15022. adoxq %rcx, %r8
  15023. # A[4] * B[5]
  15024. mulx 40(%rbp), %rax, %rcx
  15025. movq %r10, 64(%rbx)
  15026. adcxq %rax, %r8
  15027. adoxq %rcx, %r9
  15028. movq %r8, 72(%rbx)
  15029. movq 88(%rbx), %r10
  15030. movq 96(%rdi), %r8
  15031. # A[4] * B[6]
  15032. mulx 48(%rbp), %rax, %rcx
  15033. adcxq %rax, %r9
  15034. adoxq %rcx, %r10
  15035. # A[4] * B[7]
  15036. mulx 56(%rbp), %rax, %rcx
  15037. movq %r9, 80(%rbx)
  15038. adcxq %rax, %r10
  15039. adoxq %rcx, %r8
  15040. movq %r10, 88(%rbx)
  15041. movq 104(%rdi), %r9
  15042. movq 112(%rdi), %r10
  15043. # A[4] * B[8]
  15044. mulx 64(%rbp), %rax, %rcx
  15045. adcxq %rax, %r8
  15046. adoxq %rcx, %r9
  15047. # A[4] * B[9]
  15048. mulx 72(%rbp), %rax, %rcx
  15049. movq %r8, 96(%rdi)
  15050. adcxq %rax, %r9
  15051. adoxq %rcx, %r10
  15052. movq %r9, 104(%rdi)
  15053. movq 120(%rdi), %r8
  15054. # A[4] * B[10]
  15055. mulx 80(%rbp), %rax, %rcx
  15056. adcxq %rax, %r10
  15057. adoxq %rcx, %r8
  15058. # A[4] * B[11]
  15059. mulx 88(%rbp), %rax, %rcx
  15060. movq %r10, 112(%rdi)
  15061. movq %r12, %r9
  15062. adcxq %rax, %r8
  15063. adoxq %rcx, %r9
  15064. adcxq %r11, %r9
  15065. movq %r12, %r11
  15066. adoxq %r12, %r11
  15067. adcxq %r12, %r11
  15068. movq %r8, 120(%rdi)
  15069. movq %r9, 128(%rdi)
  15070. movq 40(%rsi), %rdx
  15071. movq 40(%rbx), %r10
  15072. movq 48(%rbx), %r8
  15073. movq 56(%rbx), %r9
  15074. # A[5] * B[0]
  15075. mulx (%rbp), %rax, %rcx
  15076. adcxq %rax, %r10
  15077. adoxq %rcx, %r8
  15078. # A[5] * B[1]
  15079. mulx 8(%rbp), %rax, %rcx
  15080. movq %r10, 40(%rbx)
  15081. adcxq %rax, %r8
  15082. adoxq %rcx, %r9
  15083. movq %r8, 48(%rbx)
  15084. movq 64(%rbx), %r10
  15085. movq 72(%rbx), %r8
  15086. # A[5] * B[2]
  15087. mulx 16(%rbp), %rax, %rcx
  15088. adcxq %rax, %r9
  15089. adoxq %rcx, %r10
  15090. # A[5] * B[3]
  15091. mulx 24(%rbp), %rax, %rcx
  15092. movq %r9, 56(%rbx)
  15093. adcxq %rax, %r10
  15094. adoxq %rcx, %r8
  15095. movq %r10, 64(%rbx)
  15096. movq 80(%rbx), %r9
  15097. movq 88(%rbx), %r10
  15098. # A[5] * B[4]
  15099. mulx 32(%rbp), %rax, %rcx
  15100. adcxq %rax, %r8
  15101. adoxq %rcx, %r9
  15102. # A[5] * B[5]
  15103. mulx 40(%rbp), %rax, %rcx
  15104. movq %r8, 72(%rbx)
  15105. adcxq %rax, %r9
  15106. adoxq %rcx, %r10
  15107. movq %r9, 80(%rbx)
  15108. movq 96(%rdi), %r8
  15109. movq 104(%rdi), %r9
  15110. # A[5] * B[6]
  15111. mulx 48(%rbp), %rax, %rcx
  15112. adcxq %rax, %r10
  15113. adoxq %rcx, %r8
  15114. # A[5] * B[7]
  15115. mulx 56(%rbp), %rax, %rcx
  15116. movq %r10, 88(%rbx)
  15117. adcxq %rax, %r8
  15118. adoxq %rcx, %r9
  15119. movq %r8, 96(%rdi)
  15120. movq 112(%rdi), %r10
  15121. movq 120(%rdi), %r8
  15122. # A[5] * B[8]
  15123. mulx 64(%rbp), %rax, %rcx
  15124. adcxq %rax, %r9
  15125. adoxq %rcx, %r10
  15126. # A[5] * B[9]
  15127. mulx 72(%rbp), %rax, %rcx
  15128. movq %r9, 104(%rdi)
  15129. adcxq %rax, %r10
  15130. adoxq %rcx, %r8
  15131. movq %r10, 112(%rdi)
  15132. movq 128(%rdi), %r9
  15133. # A[5] * B[10]
  15134. mulx 80(%rbp), %rax, %rcx
  15135. adcxq %rax, %r8
  15136. adoxq %rcx, %r9
  15137. # A[5] * B[11]
  15138. mulx 88(%rbp), %rax, %rcx
  15139. movq %r8, 120(%rdi)
  15140. movq %r12, %r10
  15141. adcxq %rax, %r9
  15142. adoxq %rcx, %r10
  15143. adcxq %r11, %r10
  15144. movq %r12, %r11
  15145. adoxq %r12, %r11
  15146. adcxq %r12, %r11
  15147. movq %r9, 128(%rdi)
  15148. movq %r10, 136(%rdi)
  15149. movq 48(%rsi), %rdx
  15150. movq 48(%rbx), %r8
  15151. movq 56(%rbx), %r9
  15152. movq 64(%rbx), %r10
  15153. # A[6] * B[0]
  15154. mulx (%rbp), %rax, %rcx
  15155. adcxq %rax, %r8
  15156. adoxq %rcx, %r9
  15157. # A[6] * B[1]
  15158. mulx 8(%rbp), %rax, %rcx
  15159. movq %r8, 48(%rbx)
  15160. adcxq %rax, %r9
  15161. adoxq %rcx, %r10
  15162. movq %r9, 56(%rbx)
  15163. movq 72(%rbx), %r8
  15164. movq 80(%rbx), %r9
  15165. # A[6] * B[2]
  15166. mulx 16(%rbp), %rax, %rcx
  15167. adcxq %rax, %r10
  15168. adoxq %rcx, %r8
  15169. # A[6] * B[3]
  15170. mulx 24(%rbp), %rax, %rcx
  15171. movq %r10, 64(%rbx)
  15172. adcxq %rax, %r8
  15173. adoxq %rcx, %r9
  15174. movq %r8, 72(%rbx)
  15175. movq 88(%rbx), %r10
  15176. movq 96(%rdi), %r8
  15177. # A[6] * B[4]
  15178. mulx 32(%rbp), %rax, %rcx
  15179. adcxq %rax, %r9
  15180. adoxq %rcx, %r10
  15181. # A[6] * B[5]
  15182. mulx 40(%rbp), %rax, %rcx
  15183. movq %r9, 80(%rbx)
  15184. adcxq %rax, %r10
  15185. adoxq %rcx, %r8
  15186. movq %r10, 88(%rbx)
  15187. movq 104(%rdi), %r9
  15188. movq 112(%rdi), %r10
  15189. # A[6] * B[6]
  15190. mulx 48(%rbp), %rax, %rcx
  15191. adcxq %rax, %r8
  15192. adoxq %rcx, %r9
  15193. # A[6] * B[7]
  15194. mulx 56(%rbp), %rax, %rcx
  15195. movq %r8, 96(%rdi)
  15196. adcxq %rax, %r9
  15197. adoxq %rcx, %r10
  15198. movq %r9, 104(%rdi)
  15199. movq 120(%rdi), %r8
  15200. movq 128(%rdi), %r9
  15201. # A[6] * B[8]
  15202. mulx 64(%rbp), %rax, %rcx
  15203. adcxq %rax, %r10
  15204. adoxq %rcx, %r8
  15205. # A[6] * B[9]
  15206. mulx 72(%rbp), %rax, %rcx
  15207. movq %r10, 112(%rdi)
  15208. adcxq %rax, %r8
  15209. adoxq %rcx, %r9
  15210. movq %r8, 120(%rdi)
  15211. movq 136(%rdi), %r10
  15212. # A[6] * B[10]
  15213. mulx 80(%rbp), %rax, %rcx
  15214. adcxq %rax, %r9
  15215. adoxq %rcx, %r10
  15216. # A[6] * B[11]
  15217. mulx 88(%rbp), %rax, %rcx
  15218. movq %r9, 128(%rdi)
  15219. movq %r12, %r8
  15220. adcxq %rax, %r10
  15221. adoxq %rcx, %r8
  15222. adcxq %r11, %r8
  15223. movq %r12, %r11
  15224. adoxq %r12, %r11
  15225. adcxq %r12, %r11
  15226. movq %r10, 136(%rdi)
  15227. movq %r8, 144(%rdi)
  15228. movq 56(%rsi), %rdx
  15229. movq 56(%rbx), %r9
  15230. movq 64(%rbx), %r10
  15231. movq 72(%rbx), %r8
  15232. # A[7] * B[0]
  15233. mulx (%rbp), %rax, %rcx
  15234. adcxq %rax, %r9
  15235. adoxq %rcx, %r10
  15236. # A[7] * B[1]
  15237. mulx 8(%rbp), %rax, %rcx
  15238. movq %r9, 56(%rbx)
  15239. adcxq %rax, %r10
  15240. adoxq %rcx, %r8
  15241. movq %r10, 64(%rbx)
  15242. movq 80(%rbx), %r9
  15243. movq 88(%rbx), %r10
  15244. # A[7] * B[2]
  15245. mulx 16(%rbp), %rax, %rcx
  15246. adcxq %rax, %r8
  15247. adoxq %rcx, %r9
  15248. # A[7] * B[3]
  15249. mulx 24(%rbp), %rax, %rcx
  15250. movq %r8, 72(%rbx)
  15251. adcxq %rax, %r9
  15252. adoxq %rcx, %r10
  15253. movq %r9, 80(%rbx)
  15254. movq 96(%rdi), %r8
  15255. movq 104(%rdi), %r9
  15256. # A[7] * B[4]
  15257. mulx 32(%rbp), %rax, %rcx
  15258. adcxq %rax, %r10
  15259. adoxq %rcx, %r8
  15260. # A[7] * B[5]
  15261. mulx 40(%rbp), %rax, %rcx
  15262. movq %r10, 88(%rbx)
  15263. adcxq %rax, %r8
  15264. adoxq %rcx, %r9
  15265. movq %r8, 96(%rdi)
  15266. movq 112(%rdi), %r10
  15267. movq 120(%rdi), %r8
  15268. # A[7] * B[6]
  15269. mulx 48(%rbp), %rax, %rcx
  15270. adcxq %rax, %r9
  15271. adoxq %rcx, %r10
  15272. # A[7] * B[7]
  15273. mulx 56(%rbp), %rax, %rcx
  15274. movq %r9, 104(%rdi)
  15275. adcxq %rax, %r10
  15276. adoxq %rcx, %r8
  15277. movq %r10, 112(%rdi)
  15278. movq 128(%rdi), %r9
  15279. movq 136(%rdi), %r10
  15280. # A[7] * B[8]
  15281. mulx 64(%rbp), %rax, %rcx
  15282. adcxq %rax, %r8
  15283. adoxq %rcx, %r9
  15284. # A[7] * B[9]
  15285. mulx 72(%rbp), %rax, %rcx
  15286. movq %r8, 120(%rdi)
  15287. adcxq %rax, %r9
  15288. adoxq %rcx, %r10
  15289. movq %r9, 128(%rdi)
  15290. movq 144(%rdi), %r8
  15291. # A[7] * B[10]
  15292. mulx 80(%rbp), %rax, %rcx
  15293. adcxq %rax, %r10
  15294. adoxq %rcx, %r8
  15295. # A[7] * B[11]
  15296. mulx 88(%rbp), %rax, %rcx
  15297. movq %r10, 136(%rdi)
  15298. movq %r12, %r9
  15299. adcxq %rax, %r8
  15300. adoxq %rcx, %r9
  15301. adcxq %r11, %r9
  15302. movq %r12, %r11
  15303. adoxq %r12, %r11
  15304. adcxq %r12, %r11
  15305. movq %r8, 144(%rdi)
  15306. movq %r9, 152(%rdi)
  15307. movq 64(%rsi), %rdx
  15308. movq 64(%rbx), %r10
  15309. movq 72(%rbx), %r8
  15310. movq 80(%rbx), %r9
  15311. # A[8] * B[0]
  15312. mulx (%rbp), %rax, %rcx
  15313. adcxq %rax, %r10
  15314. adoxq %rcx, %r8
  15315. # A[8] * B[1]
  15316. mulx 8(%rbp), %rax, %rcx
  15317. movq %r10, 64(%rbx)
  15318. adcxq %rax, %r8
  15319. adoxq %rcx, %r9
  15320. movq %r8, 72(%rbx)
  15321. movq 88(%rbx), %r10
  15322. movq 96(%rdi), %r8
  15323. # A[8] * B[2]
  15324. mulx 16(%rbp), %rax, %rcx
  15325. adcxq %rax, %r9
  15326. adoxq %rcx, %r10
  15327. # A[8] * B[3]
  15328. mulx 24(%rbp), %rax, %rcx
  15329. movq %r9, 80(%rbx)
  15330. adcxq %rax, %r10
  15331. adoxq %rcx, %r8
  15332. movq %r10, 88(%rbx)
  15333. movq 104(%rdi), %r9
  15334. movq 112(%rdi), %r10
  15335. # A[8] * B[4]
  15336. mulx 32(%rbp), %rax, %rcx
  15337. adcxq %rax, %r8
  15338. adoxq %rcx, %r9
  15339. # A[8] * B[5]
  15340. mulx 40(%rbp), %rax, %rcx
  15341. movq %r8, 96(%rdi)
  15342. adcxq %rax, %r9
  15343. adoxq %rcx, %r10
  15344. movq %r9, 104(%rdi)
  15345. movq 120(%rdi), %r8
  15346. movq 128(%rdi), %r9
  15347. # A[8] * B[6]
  15348. mulx 48(%rbp), %rax, %rcx
  15349. adcxq %rax, %r10
  15350. adoxq %rcx, %r8
  15351. # A[8] * B[7]
  15352. mulx 56(%rbp), %rax, %rcx
  15353. movq %r10, 112(%rdi)
  15354. adcxq %rax, %r8
  15355. adoxq %rcx, %r9
  15356. movq %r8, 120(%rdi)
  15357. movq 136(%rdi), %r10
  15358. movq 144(%rdi), %r8
  15359. # A[8] * B[8]
  15360. mulx 64(%rbp), %rax, %rcx
  15361. adcxq %rax, %r9
  15362. adoxq %rcx, %r10
  15363. # A[8] * B[9]
  15364. mulx 72(%rbp), %rax, %rcx
  15365. movq %r9, 128(%rdi)
  15366. adcxq %rax, %r10
  15367. adoxq %rcx, %r8
  15368. movq %r10, 136(%rdi)
  15369. movq 152(%rdi), %r9
  15370. # A[8] * B[10]
  15371. mulx 80(%rbp), %rax, %rcx
  15372. adcxq %rax, %r8
  15373. adoxq %rcx, %r9
  15374. # A[8] * B[11]
  15375. mulx 88(%rbp), %rax, %rcx
  15376. movq %r8, 144(%rdi)
  15377. movq %r12, %r10
  15378. adcxq %rax, %r9
  15379. adoxq %rcx, %r10
  15380. adcxq %r11, %r10
  15381. movq %r12, %r11
  15382. adoxq %r12, %r11
  15383. adcxq %r12, %r11
  15384. movq %r9, 152(%rdi)
  15385. movq %r10, 160(%rdi)
  15386. movq 72(%rsi), %rdx
  15387. movq 72(%rbx), %r8
  15388. movq 80(%rbx), %r9
  15389. movq 88(%rbx), %r10
  15390. # A[9] * B[0]
  15391. mulx (%rbp), %rax, %rcx
  15392. adcxq %rax, %r8
  15393. adoxq %rcx, %r9
  15394. # A[9] * B[1]
  15395. mulx 8(%rbp), %rax, %rcx
  15396. movq %r8, 72(%rbx)
  15397. adcxq %rax, %r9
  15398. adoxq %rcx, %r10
  15399. movq %r9, 80(%rbx)
  15400. movq 96(%rdi), %r8
  15401. movq 104(%rdi), %r9
  15402. # A[9] * B[2]
  15403. mulx 16(%rbp), %rax, %rcx
  15404. adcxq %rax, %r10
  15405. adoxq %rcx, %r8
  15406. # A[9] * B[3]
  15407. mulx 24(%rbp), %rax, %rcx
  15408. movq %r10, 88(%rbx)
  15409. adcxq %rax, %r8
  15410. adoxq %rcx, %r9
  15411. movq %r8, 96(%rdi)
  15412. movq 112(%rdi), %r10
  15413. movq 120(%rdi), %r8
  15414. # A[9] * B[4]
  15415. mulx 32(%rbp), %rax, %rcx
  15416. adcxq %rax, %r9
  15417. adoxq %rcx, %r10
  15418. # A[9] * B[5]
  15419. mulx 40(%rbp), %rax, %rcx
  15420. movq %r9, 104(%rdi)
  15421. adcxq %rax, %r10
  15422. adoxq %rcx, %r8
  15423. movq %r10, 112(%rdi)
  15424. movq 128(%rdi), %r9
  15425. movq 136(%rdi), %r10
  15426. # A[9] * B[6]
  15427. mulx 48(%rbp), %rax, %rcx
  15428. adcxq %rax, %r8
  15429. adoxq %rcx, %r9
  15430. # A[9] * B[7]
  15431. mulx 56(%rbp), %rax, %rcx
  15432. movq %r8, 120(%rdi)
  15433. adcxq %rax, %r9
  15434. adoxq %rcx, %r10
  15435. movq %r9, 128(%rdi)
  15436. movq 144(%rdi), %r8
  15437. movq 152(%rdi), %r9
  15438. # A[9] * B[8]
  15439. mulx 64(%rbp), %rax, %rcx
  15440. adcxq %rax, %r10
  15441. adoxq %rcx, %r8
  15442. # A[9] * B[9]
  15443. mulx 72(%rbp), %rax, %rcx
  15444. movq %r10, 136(%rdi)
  15445. adcxq %rax, %r8
  15446. adoxq %rcx, %r9
  15447. movq %r8, 144(%rdi)
  15448. movq 160(%rdi), %r10
  15449. # A[9] * B[10]
  15450. mulx 80(%rbp), %rax, %rcx
  15451. adcxq %rax, %r9
  15452. adoxq %rcx, %r10
  15453. # A[9] * B[11]
  15454. mulx 88(%rbp), %rax, %rcx
  15455. movq %r9, 152(%rdi)
  15456. movq %r12, %r8
  15457. adcxq %rax, %r10
  15458. adoxq %rcx, %r8
  15459. adcxq %r11, %r8
  15460. movq %r12, %r11
  15461. adoxq %r12, %r11
  15462. adcxq %r12, %r11
  15463. movq %r10, 160(%rdi)
  15464. movq %r8, 168(%rdi)
  15465. movq 80(%rsi), %rdx
  15466. movq 80(%rbx), %r9
  15467. movq 88(%rbx), %r10
  15468. movq 96(%rdi), %r8
  15469. # A[10] * B[0]
  15470. mulx (%rbp), %rax, %rcx
  15471. adcxq %rax, %r9
  15472. adoxq %rcx, %r10
  15473. # A[10] * B[1]
  15474. mulx 8(%rbp), %rax, %rcx
  15475. movq %r9, 80(%rbx)
  15476. adcxq %rax, %r10
  15477. adoxq %rcx, %r8
  15478. movq %r10, 88(%rbx)
  15479. movq 104(%rdi), %r9
  15480. movq 112(%rdi), %r10
  15481. # A[10] * B[2]
  15482. mulx 16(%rbp), %rax, %rcx
  15483. adcxq %rax, %r8
  15484. adoxq %rcx, %r9
  15485. # A[10] * B[3]
  15486. mulx 24(%rbp), %rax, %rcx
  15487. movq %r8, 96(%rdi)
  15488. adcxq %rax, %r9
  15489. adoxq %rcx, %r10
  15490. movq %r9, 104(%rdi)
  15491. movq 120(%rdi), %r8
  15492. movq 128(%rdi), %r9
  15493. # A[10] * B[4]
  15494. mulx 32(%rbp), %rax, %rcx
  15495. adcxq %rax, %r10
  15496. adoxq %rcx, %r8
  15497. # A[10] * B[5]
  15498. mulx 40(%rbp), %rax, %rcx
  15499. movq %r10, 112(%rdi)
  15500. adcxq %rax, %r8
  15501. adoxq %rcx, %r9
  15502. movq %r8, 120(%rdi)
  15503. movq 136(%rdi), %r10
  15504. movq 144(%rdi), %r8
  15505. # A[10] * B[6]
  15506. mulx 48(%rbp), %rax, %rcx
  15507. adcxq %rax, %r9
  15508. adoxq %rcx, %r10
  15509. # A[10] * B[7]
  15510. mulx 56(%rbp), %rax, %rcx
  15511. movq %r9, 128(%rdi)
  15512. adcxq %rax, %r10
  15513. adoxq %rcx, %r8
  15514. movq %r10, 136(%rdi)
  15515. movq 152(%rdi), %r9
  15516. movq 160(%rdi), %r10
  15517. # A[10] * B[8]
  15518. mulx 64(%rbp), %rax, %rcx
  15519. adcxq %rax, %r8
  15520. adoxq %rcx, %r9
  15521. # A[10] * B[9]
  15522. mulx 72(%rbp), %rax, %rcx
  15523. movq %r8, 144(%rdi)
  15524. adcxq %rax, %r9
  15525. adoxq %rcx, %r10
  15526. movq %r9, 152(%rdi)
  15527. movq 168(%rdi), %r8
  15528. # A[10] * B[10]
  15529. mulx 80(%rbp), %rax, %rcx
  15530. adcxq %rax, %r10
  15531. adoxq %rcx, %r8
  15532. # A[10] * B[11]
  15533. mulx 88(%rbp), %rax, %rcx
  15534. movq %r10, 160(%rdi)
  15535. movq %r12, %r9
  15536. adcxq %rax, %r8
  15537. adoxq %rcx, %r9
  15538. adcxq %r11, %r9
  15539. movq %r12, %r11
  15540. adoxq %r12, %r11
  15541. adcxq %r12, %r11
  15542. movq %r8, 168(%rdi)
  15543. movq %r9, 176(%rdi)
  15544. movq 88(%rsi), %rdx
  15545. movq 88(%rbx), %r10
  15546. movq 96(%rdi), %r8
  15547. movq 104(%rdi), %r9
  15548. # A[11] * B[0]
  15549. mulx (%rbp), %rax, %rcx
  15550. adcxq %rax, %r10
  15551. adoxq %rcx, %r8
  15552. # A[11] * B[1]
  15553. mulx 8(%rbp), %rax, %rcx
  15554. movq %r10, 88(%rbx)
  15555. adcxq %rax, %r8
  15556. adoxq %rcx, %r9
  15557. movq %r8, 96(%rdi)
  15558. movq 112(%rdi), %r10
  15559. movq 120(%rdi), %r8
  15560. # A[11] * B[2]
  15561. mulx 16(%rbp), %rax, %rcx
  15562. adcxq %rax, %r9
  15563. adoxq %rcx, %r10
  15564. # A[11] * B[3]
  15565. mulx 24(%rbp), %rax, %rcx
  15566. movq %r9, 104(%rdi)
  15567. adcxq %rax, %r10
  15568. adoxq %rcx, %r8
  15569. movq %r10, 112(%rdi)
  15570. movq 128(%rdi), %r9
  15571. movq 136(%rdi), %r10
  15572. # A[11] * B[4]
  15573. mulx 32(%rbp), %rax, %rcx
  15574. adcxq %rax, %r8
  15575. adoxq %rcx, %r9
  15576. # A[11] * B[5]
  15577. mulx 40(%rbp), %rax, %rcx
  15578. movq %r8, 120(%rdi)
  15579. adcxq %rax, %r9
  15580. adoxq %rcx, %r10
  15581. movq %r9, 128(%rdi)
  15582. movq 144(%rdi), %r8
  15583. movq 152(%rdi), %r9
  15584. # A[11] * B[6]
  15585. mulx 48(%rbp), %rax, %rcx
  15586. adcxq %rax, %r10
  15587. adoxq %rcx, %r8
  15588. # A[11] * B[7]
  15589. mulx 56(%rbp), %rax, %rcx
  15590. movq %r10, 136(%rdi)
  15591. adcxq %rax, %r8
  15592. adoxq %rcx, %r9
  15593. movq %r8, 144(%rdi)
  15594. movq 160(%rdi), %r10
  15595. movq 168(%rdi), %r8
  15596. # A[11] * B[8]
  15597. mulx 64(%rbp), %rax, %rcx
  15598. adcxq %rax, %r9
  15599. adoxq %rcx, %r10
  15600. # A[11] * B[9]
  15601. mulx 72(%rbp), %rax, %rcx
  15602. movq %r9, 152(%rdi)
  15603. adcxq %rax, %r10
  15604. adoxq %rcx, %r8
  15605. movq %r10, 160(%rdi)
  15606. movq 176(%rdi), %r9
  15607. # A[11] * B[10]
  15608. mulx 80(%rbp), %rax, %rcx
  15609. adcxq %rax, %r8
  15610. adoxq %rcx, %r9
  15611. # A[11] * B[11]
  15612. mulx 88(%rbp), %rax, %rcx
  15613. movq %r8, 168(%rdi)
  15614. movq %r12, %r10
  15615. adcxq %rax, %r9
  15616. adoxq %rcx, %r10
  15617. adcxq %r11, %r10
  15618. movq %r9, 176(%rdi)
  15619. movq %r10, 184(%rdi)
  15620. cmpq %rdi, %rsi
  15621. je L_start_3072_mul_avx2_12
  15622. cmpq %rdi, %rbp
  15623. jne L_end_3072_mul_avx2_12
  15624. L_start_3072_mul_avx2_12:
  15625. vmovdqu (%rbx), %xmm0
  15626. vmovups %xmm0, (%rdi)
  15627. vmovdqu 16(%rbx), %xmm0
  15628. vmovups %xmm0, 16(%rdi)
  15629. vmovdqu 32(%rbx), %xmm0
  15630. vmovups %xmm0, 32(%rdi)
  15631. vmovdqu 48(%rbx), %xmm0
  15632. vmovups %xmm0, 48(%rdi)
  15633. vmovdqu 64(%rbx), %xmm0
  15634. vmovups %xmm0, 64(%rdi)
  15635. vmovdqu 80(%rbx), %xmm0
  15636. vmovups %xmm0, 80(%rdi)
  15637. L_end_3072_mul_avx2_12:
  15638. addq $0x60, %rsp
  15639. popq %r12
  15640. popq %rbp
  15641. popq %rbx
  15642. repz retq
  15643. #ifndef __APPLE__
  15644. .size sp_3072_mul_avx2_12,.-sp_3072_mul_avx2_12
  15645. #endif /* __APPLE__ */
  15646. #endif /* HAVE_INTEL_AVX2 */
  15647. #ifdef HAVE_INTEL_AVX2
  15648. /* Square a and put result in r. (r = a * a)
  15649. *
  15650. * r A single precision integer.
  15651. * a A single precision integer.
  15652. */
  15653. #ifndef __APPLE__
  15654. .text
  15655. .globl sp_3072_sqr_avx2_12
  15656. .type sp_3072_sqr_avx2_12,@function
  15657. .align 16
  15658. sp_3072_sqr_avx2_12:
  15659. #else
  15660. .section __TEXT,__text
  15661. .globl _sp_3072_sqr_avx2_12
  15662. .p2align 4
  15663. _sp_3072_sqr_avx2_12:
  15664. #endif /* __APPLE__ */
  15665. pushq %rbp
  15666. pushq %r12
  15667. pushq %r13
  15668. pushq %r14
  15669. pushq %r15
  15670. pushq %rbx
  15671. subq $0x60, %rsp
  15672. cmpq %rdi, %rsi
  15673. movq %rsp, %rbp
  15674. cmovne %rdi, %rbp
  15675. xorq %r10, %r10
  15676. # Diagonal 1
  15677. # A[1] x A[0]
  15678. movq (%rsi), %rdx
  15679. mulxq 8(%rsi), %r8, %r9
  15680. movq %r8, 8(%rbp)
  15681. movq %r10, %r8
  15682. # A[2] x A[0]
  15683. mulxq 16(%rsi), %rax, %rcx
  15684. adcxq %rax, %r9
  15685. adoxq %rcx, %r8
  15686. movq %r9, 16(%rbp)
  15687. movq %r10, %r9
  15688. # A[3] x A[0]
  15689. mulxq 24(%rsi), %rax, %rcx
  15690. adcxq %rax, %r8
  15691. adoxq %rcx, %r9
  15692. movq %r8, 24(%rbp)
  15693. movq %r10, %r8
  15694. # A[4] x A[0]
  15695. mulxq 32(%rsi), %rax, %rcx
  15696. adcxq %rax, %r9
  15697. adoxq %rcx, %r8
  15698. movq %r9, 32(%rbp)
  15699. movq %r10, %r9
  15700. # A[5] x A[0]
  15701. mulxq 40(%rsi), %rax, %rcx
  15702. adcxq %rax, %r8
  15703. adoxq %rcx, %r9
  15704. movq %r8, 40(%rbp)
  15705. movq %r10, %r8
  15706. # A[6] x A[0]
  15707. mulxq 48(%rsi), %rax, %rcx
  15708. adcxq %rax, %r9
  15709. adoxq %rcx, %r8
  15710. movq %r9, 48(%rbp)
  15711. movq %r10, %r9
  15712. # A[7] x A[0]
  15713. mulxq 56(%rsi), %rax, %rcx
  15714. adcxq %rax, %r8
  15715. adoxq %rcx, %r9
  15716. movq %r8, %r12
  15717. movq %r10, %r8
  15718. # A[8] x A[0]
  15719. mulxq 64(%rsi), %rax, %rcx
  15720. adcxq %rax, %r9
  15721. adoxq %rcx, %r8
  15722. movq %r9, %r13
  15723. movq %r10, %r9
  15724. # A[9] x A[0]
  15725. mulxq 72(%rsi), %rax, %rcx
  15726. adcxq %rax, %r8
  15727. adoxq %rcx, %r9
  15728. movq %r8, %r14
  15729. movq %r10, %r8
  15730. # A[10] x A[0]
  15731. mulxq 80(%rsi), %rax, %rcx
  15732. adcxq %rax, %r9
  15733. adoxq %rcx, %r8
  15734. movq %r9, %r15
  15735. movq %r10, %r9
  15736. # A[11] x A[0]
  15737. mulxq 88(%rsi), %rax, %rcx
  15738. adcxq %rax, %r8
  15739. adoxq %rcx, %r9
  15740. movq %r8, %rbx
  15741. # Carry
  15742. adcxq %r10, %r9
  15743. movq %r10, %r11
  15744. adcxq %r10, %r11
  15745. adoxq %r10, %r11
  15746. movq %r9, 96(%rdi)
  15747. # Diagonal 2
  15748. movq 24(%rbp), %r9
  15749. movq 32(%rbp), %r8
  15750. # A[2] x A[1]
  15751. movq 8(%rsi), %rdx
  15752. mulxq 16(%rsi), %rax, %rcx
  15753. adcxq %rax, %r9
  15754. adoxq %rcx, %r8
  15755. movq %r9, 24(%rbp)
  15756. movq 40(%rbp), %r9
  15757. # A[3] x A[1]
  15758. mulxq 24(%rsi), %rax, %rcx
  15759. adcxq %rax, %r8
  15760. adoxq %rcx, %r9
  15761. movq %r8, 32(%rbp)
  15762. movq 48(%rbp), %r8
  15763. # A[4] x A[1]
  15764. mulxq 32(%rsi), %rax, %rcx
  15765. adcxq %rax, %r9
  15766. adoxq %rcx, %r8
  15767. movq %r9, 40(%rbp)
  15768. # No load %r12 - %r9
  15769. # A[5] x A[1]
  15770. mulxq 40(%rsi), %rax, %rcx
  15771. adcxq %rax, %r8
  15772. adoxq %rcx, %r12
  15773. movq %r8, 48(%rbp)
  15774. # No load %r13 - %r8
  15775. # A[6] x A[1]
  15776. mulxq 48(%rsi), %rax, %rcx
  15777. adcxq %rax, %r12
  15778. adoxq %rcx, %r13
  15779. # No store %r12
  15780. # No load %r14 - %r9
  15781. # A[7] x A[1]
  15782. mulxq 56(%rsi), %rax, %rcx
  15783. adcxq %rax, %r13
  15784. adoxq %rcx, %r14
  15785. # No store %r13
  15786. # No load %r15 - %r8
  15787. # A[8] x A[1]
  15788. mulxq 64(%rsi), %rax, %rcx
  15789. adcxq %rax, %r14
  15790. adoxq %rcx, %r15
  15791. # No store %r14
  15792. # No load %rbx - %r9
  15793. # A[9] x A[1]
  15794. mulxq 72(%rsi), %rax, %rcx
  15795. adcxq %rax, %r15
  15796. adoxq %rcx, %rbx
  15797. # No store %r15
  15798. movq 96(%rdi), %r8
  15799. # A[10] x A[1]
  15800. mulxq 80(%rsi), %rax, %rcx
  15801. adcxq %rax, %rbx
  15802. adoxq %rcx, %r8
  15803. # No store %rbx
  15804. movq %r10, %r9
  15805. # A[11] x A[1]
  15806. mulxq 88(%rsi), %rax, %rcx
  15807. adcxq %rax, %r8
  15808. adoxq %rcx, %r9
  15809. movq %r8, 96(%rdi)
  15810. movq %r10, %r8
  15811. # A[11] x A[2]
  15812. movq 16(%rsi), %rdx
  15813. mulxq 88(%rsi), %rax, %rcx
  15814. adcxq %rax, %r9
  15815. adoxq %rcx, %r8
  15816. movq %r9, 104(%rdi)
  15817. # Carry
  15818. adcxq %r11, %r8
  15819. movq %r10, %r11
  15820. adcxq %r10, %r11
  15821. adoxq %r10, %r11
  15822. movq %r8, 112(%rdi)
  15823. # Diagonal 3
  15824. movq 40(%rbp), %r8
  15825. movq 48(%rbp), %r9
  15826. # A[3] x A[2]
  15827. mulxq 24(%rsi), %rax, %rcx
  15828. adcxq %rax, %r8
  15829. adoxq %rcx, %r9
  15830. movq %r8, 40(%rbp)
  15831. # No load %r12 - %r8
  15832. # A[4] x A[2]
  15833. mulxq 32(%rsi), %rax, %rcx
  15834. adcxq %rax, %r9
  15835. adoxq %rcx, %r12
  15836. movq %r9, 48(%rbp)
  15837. # No load %r13 - %r9
  15838. # A[5] x A[2]
  15839. mulxq 40(%rsi), %rax, %rcx
  15840. adcxq %rax, %r12
  15841. adoxq %rcx, %r13
  15842. # No store %r12
  15843. # No load %r14 - %r8
  15844. # A[6] x A[2]
  15845. mulxq 48(%rsi), %rax, %rcx
  15846. adcxq %rax, %r13
  15847. adoxq %rcx, %r14
  15848. # No store %r13
  15849. # No load %r15 - %r9
  15850. # A[7] x A[2]
  15851. mulxq 56(%rsi), %rax, %rcx
  15852. adcxq %rax, %r14
  15853. adoxq %rcx, %r15
  15854. # No store %r14
  15855. # No load %rbx - %r8
  15856. # A[8] x A[2]
  15857. mulxq 64(%rsi), %rax, %rcx
  15858. adcxq %rax, %r15
  15859. adoxq %rcx, %rbx
  15860. # No store %r15
  15861. movq 96(%rdi), %r9
  15862. # A[9] x A[2]
  15863. mulxq 72(%rsi), %rax, %rcx
  15864. adcxq %rax, %rbx
  15865. adoxq %rcx, %r9
  15866. # No store %rbx
  15867. movq 104(%rdi), %r8
  15868. # A[10] x A[2]
  15869. mulxq 80(%rsi), %rax, %rcx
  15870. adcxq %rax, %r9
  15871. adoxq %rcx, %r8
  15872. movq %r9, 96(%rdi)
  15873. movq 112(%rdi), %r9
  15874. # A[10] x A[3]
  15875. movq 80(%rsi), %rdx
  15876. mulxq 24(%rsi), %rax, %rcx
  15877. adcxq %rax, %r8
  15878. adoxq %rcx, %r9
  15879. movq %r8, 104(%rdi)
  15880. movq %r10, %r8
  15881. # A[10] x A[4]
  15882. mulxq 32(%rsi), %rax, %rcx
  15883. adcxq %rax, %r9
  15884. adoxq %rcx, %r8
  15885. movq %r9, 112(%rdi)
  15886. movq %r10, %r9
  15887. # A[10] x A[5]
  15888. mulxq 40(%rsi), %rax, %rcx
  15889. adcxq %rax, %r8
  15890. adoxq %rcx, %r9
  15891. movq %r8, 120(%rdi)
  15892. # Carry
  15893. adcxq %r11, %r9
  15894. movq %r10, %r11
  15895. adcxq %r10, %r11
  15896. adoxq %r10, %r11
  15897. movq %r9, 128(%rdi)
  15898. # Diagonal 4
  15899. # No load %r12 - %r9
  15900. # No load %r13 - %r8
  15901. # A[4] x A[3]
  15902. movq 24(%rsi), %rdx
  15903. mulxq 32(%rsi), %rax, %rcx
  15904. adcxq %rax, %r12
  15905. adoxq %rcx, %r13
  15906. # No store %r12
  15907. # No load %r14 - %r9
  15908. # A[5] x A[3]
  15909. mulxq 40(%rsi), %rax, %rcx
  15910. adcxq %rax, %r13
  15911. adoxq %rcx, %r14
  15912. # No store %r13
  15913. # No load %r15 - %r8
  15914. # A[6] x A[3]
  15915. mulxq 48(%rsi), %rax, %rcx
  15916. adcxq %rax, %r14
  15917. adoxq %rcx, %r15
  15918. # No store %r14
  15919. # No load %rbx - %r9
  15920. # A[7] x A[3]
  15921. mulxq 56(%rsi), %rax, %rcx
  15922. adcxq %rax, %r15
  15923. adoxq %rcx, %rbx
  15924. # No store %r15
  15925. movq 96(%rdi), %r8
  15926. # A[8] x A[3]
  15927. mulxq 64(%rsi), %rax, %rcx
  15928. adcxq %rax, %rbx
  15929. adoxq %rcx, %r8
  15930. # No store %rbx
  15931. movq 104(%rdi), %r9
  15932. # A[9] x A[3]
  15933. mulxq 72(%rsi), %rax, %rcx
  15934. adcxq %rax, %r8
  15935. adoxq %rcx, %r9
  15936. movq %r8, 96(%rdi)
  15937. movq 112(%rdi), %r8
  15938. # A[9] x A[4]
  15939. movq 72(%rsi), %rdx
  15940. mulxq 32(%rsi), %rax, %rcx
  15941. adcxq %rax, %r9
  15942. adoxq %rcx, %r8
  15943. movq %r9, 104(%rdi)
  15944. movq 120(%rdi), %r9
  15945. # A[9] x A[5]
  15946. mulxq 40(%rsi), %rax, %rcx
  15947. adcxq %rax, %r8
  15948. adoxq %rcx, %r9
  15949. movq %r8, 112(%rdi)
  15950. movq 128(%rdi), %r8
  15951. # A[9] x A[6]
  15952. mulxq 48(%rsi), %rax, %rcx
  15953. adcxq %rax, %r9
  15954. adoxq %rcx, %r8
  15955. movq %r9, 120(%rdi)
  15956. movq %r10, %r9
  15957. # A[9] x A[7]
  15958. mulxq 56(%rsi), %rax, %rcx
  15959. adcxq %rax, %r8
  15960. adoxq %rcx, %r9
  15961. movq %r8, 128(%rdi)
  15962. movq %r10, %r8
  15963. # A[9] x A[8]
  15964. mulxq 64(%rsi), %rax, %rcx
  15965. adcxq %rax, %r9
  15966. adoxq %rcx, %r8
  15967. movq %r9, 136(%rdi)
  15968. # Carry
  15969. adcxq %r11, %r8
  15970. movq %r10, %r11
  15971. adcxq %r10, %r11
  15972. adoxq %r10, %r11
  15973. movq %r8, 144(%rdi)
  15974. # Diagonal 5
  15975. # No load %r14 - %r8
  15976. # No load %r15 - %r9
  15977. # A[5] x A[4]
  15978. movq 32(%rsi), %rdx
  15979. mulxq 40(%rsi), %rax, %rcx
  15980. adcxq %rax, %r14
  15981. adoxq %rcx, %r15
  15982. # No store %r14
  15983. # No load %rbx - %r8
  15984. # A[6] x A[4]
  15985. mulxq 48(%rsi), %rax, %rcx
  15986. adcxq %rax, %r15
  15987. adoxq %rcx, %rbx
  15988. # No store %r15
  15989. movq 96(%rdi), %r9
  15990. # A[7] x A[4]
  15991. mulxq 56(%rsi), %rax, %rcx
  15992. adcxq %rax, %rbx
  15993. adoxq %rcx, %r9
  15994. # No store %rbx
  15995. movq 104(%rdi), %r8
  15996. # A[8] x A[4]
  15997. mulxq 64(%rsi), %rax, %rcx
  15998. adcxq %rax, %r9
  15999. adoxq %rcx, %r8
  16000. movq %r9, 96(%rdi)
  16001. movq 112(%rdi), %r9
  16002. # A[8] x A[5]
  16003. movq 64(%rsi), %rdx
  16004. mulxq 40(%rsi), %rax, %rcx
  16005. adcxq %rax, %r8
  16006. adoxq %rcx, %r9
  16007. movq %r8, 104(%rdi)
  16008. movq 120(%rdi), %r8
  16009. # A[8] x A[6]
  16010. mulxq 48(%rsi), %rax, %rcx
  16011. adcxq %rax, %r9
  16012. adoxq %rcx, %r8
  16013. movq %r9, 112(%rdi)
  16014. movq 128(%rdi), %r9
  16015. # A[8] x A[7]
  16016. mulxq 56(%rsi), %rax, %rcx
  16017. adcxq %rax, %r8
  16018. adoxq %rcx, %r9
  16019. movq %r8, 120(%rdi)
  16020. movq 136(%rdi), %r8
  16021. # A[10] x A[6]
  16022. movq 80(%rsi), %rdx
  16023. mulxq 48(%rsi), %rax, %rcx
  16024. adcxq %rax, %r9
  16025. adoxq %rcx, %r8
  16026. movq %r9, 128(%rdi)
  16027. movq 144(%rdi), %r9
  16028. # A[10] x A[7]
  16029. mulxq 56(%rsi), %rax, %rcx
  16030. adcxq %rax, %r8
  16031. adoxq %rcx, %r9
  16032. movq %r8, 136(%rdi)
  16033. movq %r10, %r8
  16034. # A[10] x A[8]
  16035. mulxq 64(%rsi), %rax, %rcx
  16036. adcxq %rax, %r9
  16037. adoxq %rcx, %r8
  16038. movq %r9, 144(%rdi)
  16039. movq %r10, %r9
  16040. # A[10] x A[9]
  16041. mulxq 72(%rsi), %rax, %rcx
  16042. adcxq %rax, %r8
  16043. adoxq %rcx, %r9
  16044. movq %r8, 152(%rdi)
  16045. # Carry
  16046. adcxq %r11, %r9
  16047. movq %r10, %r11
  16048. adcxq %r10, %r11
  16049. adoxq %r10, %r11
  16050. movq %r9, 160(%rdi)
  16051. # Diagonal 6
  16052. # No load %rbx - %r9
  16053. movq 96(%rdi), %r8
  16054. # A[6] x A[5]
  16055. movq 40(%rsi), %rdx
  16056. mulxq 48(%rsi), %rax, %rcx
  16057. adcxq %rax, %rbx
  16058. adoxq %rcx, %r8
  16059. # No store %rbx
  16060. movq 104(%rdi), %r9
  16061. # A[7] x A[5]
  16062. mulxq 56(%rsi), %rax, %rcx
  16063. adcxq %rax, %r8
  16064. adoxq %rcx, %r9
  16065. movq %r8, 96(%rdi)
  16066. movq 112(%rdi), %r8
  16067. # A[7] x A[6]
  16068. movq 48(%rsi), %rdx
  16069. mulxq 56(%rsi), %rax, %rcx
  16070. adcxq %rax, %r9
  16071. adoxq %rcx, %r8
  16072. movq %r9, 104(%rdi)
  16073. movq 120(%rdi), %r9
  16074. # A[11] x A[3]
  16075. movq 88(%rsi), %rdx
  16076. mulxq 24(%rsi), %rax, %rcx
  16077. adcxq %rax, %r8
  16078. adoxq %rcx, %r9
  16079. movq %r8, 112(%rdi)
  16080. movq 128(%rdi), %r8
  16081. # A[11] x A[4]
  16082. mulxq 32(%rsi), %rax, %rcx
  16083. adcxq %rax, %r9
  16084. adoxq %rcx, %r8
  16085. movq %r9, 120(%rdi)
  16086. movq 136(%rdi), %r9
  16087. # A[11] x A[5]
  16088. mulxq 40(%rsi), %rax, %rcx
  16089. adcxq %rax, %r8
  16090. adoxq %rcx, %r9
  16091. movq %r8, 128(%rdi)
  16092. movq 144(%rdi), %r8
  16093. # A[11] x A[6]
  16094. mulxq 48(%rsi), %rax, %rcx
  16095. adcxq %rax, %r9
  16096. adoxq %rcx, %r8
  16097. movq %r9, 136(%rdi)
  16098. movq 152(%rdi), %r9
  16099. # A[11] x A[7]
  16100. mulxq 56(%rsi), %rax, %rcx
  16101. adcxq %rax, %r8
  16102. adoxq %rcx, %r9
  16103. movq %r8, 144(%rdi)
  16104. movq 160(%rdi), %r8
  16105. # A[11] x A[8]
  16106. mulxq 64(%rsi), %rax, %rcx
  16107. adcxq %rax, %r9
  16108. adoxq %rcx, %r8
  16109. movq %r9, 152(%rdi)
  16110. movq %r10, %r9
  16111. # A[11] x A[9]
  16112. mulxq 72(%rsi), %rax, %rcx
  16113. adcxq %rax, %r8
  16114. adoxq %rcx, %r9
  16115. movq %r8, 160(%rdi)
  16116. movq %r10, %r8
  16117. # A[11] x A[10]
  16118. mulxq 80(%rsi), %rax, %rcx
  16119. adcxq %rax, %r9
  16120. adoxq %rcx, %r8
  16121. movq %r9, 168(%rdi)
  16122. # Carry
  16123. adcxq %r11, %r8
  16124. movq %r10, %r11
  16125. adcxq %r10, %r11
  16126. adoxq %r10, %r11
  16127. movq %r8, 176(%rdi)
  16128. movq %r11, 184(%rdi)
  16129. # Double and Add in A[i] x A[i]
  16130. movq 8(%rbp), %r9
  16131. # A[0] x A[0]
  16132. movq (%rsi), %rdx
  16133. mulxq %rdx, %rax, %rcx
  16134. movq %rax, (%rbp)
  16135. adoxq %r9, %r9
  16136. adcxq %rcx, %r9
  16137. movq %r9, 8(%rbp)
  16138. movq 16(%rbp), %r8
  16139. movq 24(%rbp), %r9
  16140. # A[1] x A[1]
  16141. movq 8(%rsi), %rdx
  16142. mulxq %rdx, %rax, %rcx
  16143. adoxq %r8, %r8
  16144. adoxq %r9, %r9
  16145. adcxq %rax, %r8
  16146. adcxq %rcx, %r9
  16147. movq %r8, 16(%rbp)
  16148. movq %r9, 24(%rbp)
  16149. movq 32(%rbp), %r8
  16150. movq 40(%rbp), %r9
  16151. # A[2] x A[2]
  16152. movq 16(%rsi), %rdx
  16153. mulxq %rdx, %rax, %rcx
  16154. adoxq %r8, %r8
  16155. adoxq %r9, %r9
  16156. adcxq %rax, %r8
  16157. adcxq %rcx, %r9
  16158. movq %r8, 32(%rbp)
  16159. movq %r9, 40(%rbp)
  16160. movq 48(%rbp), %r8
  16161. # A[3] x A[3]
  16162. movq 24(%rsi), %rdx
  16163. mulxq %rdx, %rax, %rcx
  16164. adoxq %r8, %r8
  16165. adoxq %r12, %r12
  16166. adcxq %rax, %r8
  16167. adcxq %rcx, %r12
  16168. movq %r8, 48(%rbp)
  16169. # A[4] x A[4]
  16170. movq 32(%rsi), %rdx
  16171. mulxq %rdx, %rax, %rcx
  16172. adoxq %r13, %r13
  16173. adoxq %r14, %r14
  16174. adcxq %rax, %r13
  16175. adcxq %rcx, %r14
  16176. # A[5] x A[5]
  16177. movq 40(%rsi), %rdx
  16178. mulxq %rdx, %rax, %rcx
  16179. adoxq %r15, %r15
  16180. adoxq %rbx, %rbx
  16181. adcxq %rax, %r15
  16182. adcxq %rcx, %rbx
  16183. movq 96(%rdi), %r8
  16184. movq 104(%rdi), %r9
  16185. # A[6] x A[6]
  16186. movq 48(%rsi), %rdx
  16187. mulxq %rdx, %rax, %rcx
  16188. adoxq %r8, %r8
  16189. adoxq %r9, %r9
  16190. adcxq %rax, %r8
  16191. adcxq %rcx, %r9
  16192. movq %r8, 96(%rdi)
  16193. movq %r9, 104(%rdi)
  16194. movq 112(%rdi), %r8
  16195. movq 120(%rdi), %r9
  16196. # A[7] x A[7]
  16197. movq 56(%rsi), %rdx
  16198. mulxq %rdx, %rax, %rcx
  16199. adoxq %r8, %r8
  16200. adoxq %r9, %r9
  16201. adcxq %rax, %r8
  16202. adcxq %rcx, %r9
  16203. movq %r8, 112(%rdi)
  16204. movq %r9, 120(%rdi)
  16205. movq 128(%rdi), %r8
  16206. movq 136(%rdi), %r9
  16207. # A[8] x A[8]
  16208. movq 64(%rsi), %rdx
  16209. mulxq %rdx, %rax, %rcx
  16210. adoxq %r8, %r8
  16211. adoxq %r9, %r9
  16212. adcxq %rax, %r8
  16213. adcxq %rcx, %r9
  16214. movq %r8, 128(%rdi)
  16215. movq %r9, 136(%rdi)
  16216. movq 144(%rdi), %r8
  16217. movq 152(%rdi), %r9
  16218. # A[9] x A[9]
  16219. movq 72(%rsi), %rdx
  16220. mulxq %rdx, %rax, %rcx
  16221. adoxq %r8, %r8
  16222. adoxq %r9, %r9
  16223. adcxq %rax, %r8
  16224. adcxq %rcx, %r9
  16225. movq %r8, 144(%rdi)
  16226. movq %r9, 152(%rdi)
  16227. movq 160(%rdi), %r8
  16228. movq 168(%rdi), %r9
  16229. # A[10] x A[10]
  16230. movq 80(%rsi), %rdx
  16231. mulxq %rdx, %rax, %rcx
  16232. adoxq %r8, %r8
  16233. adoxq %r9, %r9
  16234. adcxq %rax, %r8
  16235. adcxq %rcx, %r9
  16236. movq %r8, 160(%rdi)
  16237. movq %r9, 168(%rdi)
  16238. movq 176(%rdi), %r8
  16239. movq 184(%rdi), %r9
  16240. # A[11] x A[11]
  16241. movq 88(%rsi), %rdx
  16242. mulxq %rdx, %rax, %rcx
  16243. adoxq %r8, %r8
  16244. adoxq %r9, %r9
  16245. adcxq %rax, %r8
  16246. adcxq %rcx, %r9
  16247. movq %r8, 176(%rdi)
  16248. movq %r9, 184(%rdi)
  16249. movq %r12, 56(%rdi)
  16250. movq %r13, 64(%rdi)
  16251. movq %r14, 72(%rdi)
  16252. movq %r15, 80(%rdi)
  16253. movq %rbx, 88(%rdi)
  16254. cmpq %rdi, %rsi
  16255. jne L_end_3072_sqr_avx2_12
  16256. vmovdqu (%rbp), %xmm0
  16257. vmovups %xmm0, (%rdi)
  16258. vmovdqu 16(%rbp), %xmm0
  16259. vmovups %xmm0, 16(%rdi)
  16260. vmovdqu 32(%rbp), %xmm0
  16261. vmovups %xmm0, 32(%rdi)
  16262. movq 48(%rbp), %rax
  16263. movq %rax, 48(%rdi)
  16264. L_end_3072_sqr_avx2_12:
  16265. addq $0x60, %rsp
  16266. popq %rbx
  16267. popq %r15
  16268. popq %r14
  16269. popq %r13
  16270. popq %r12
  16271. popq %rbp
  16272. repz retq
  16273. #ifndef __APPLE__
  16274. .size sp_3072_sqr_avx2_12,.-sp_3072_sqr_avx2_12
  16275. #endif /* __APPLE__ */
  16276. #endif /* HAVE_INTEL_AVX2 */
  16277. /* Add b to a into r. (r = a + b)
  16278. *
  16279. * r A single precision integer.
  16280. * a A single precision integer.
  16281. * b A single precision integer.
  16282. */
  16283. #ifndef __APPLE__
  16284. .text
  16285. .globl sp_3072_add_12
  16286. .type sp_3072_add_12,@function
  16287. .align 16
  16288. sp_3072_add_12:
  16289. #else
  16290. .section __TEXT,__text
  16291. .globl _sp_3072_add_12
  16292. .p2align 4
  16293. _sp_3072_add_12:
  16294. #endif /* __APPLE__ */
  16295. # Add
  16296. movq (%rsi), %rcx
  16297. xorq %rax, %rax
  16298. addq (%rdx), %rcx
  16299. movq 8(%rsi), %r8
  16300. movq %rcx, (%rdi)
  16301. adcq 8(%rdx), %r8
  16302. movq 16(%rsi), %rcx
  16303. movq %r8, 8(%rdi)
  16304. adcq 16(%rdx), %rcx
  16305. movq 24(%rsi), %r8
  16306. movq %rcx, 16(%rdi)
  16307. adcq 24(%rdx), %r8
  16308. movq 32(%rsi), %rcx
  16309. movq %r8, 24(%rdi)
  16310. adcq 32(%rdx), %rcx
  16311. movq 40(%rsi), %r8
  16312. movq %rcx, 32(%rdi)
  16313. adcq 40(%rdx), %r8
  16314. movq 48(%rsi), %rcx
  16315. movq %r8, 40(%rdi)
  16316. adcq 48(%rdx), %rcx
  16317. movq 56(%rsi), %r8
  16318. movq %rcx, 48(%rdi)
  16319. adcq 56(%rdx), %r8
  16320. movq 64(%rsi), %rcx
  16321. movq %r8, 56(%rdi)
  16322. adcq 64(%rdx), %rcx
  16323. movq 72(%rsi), %r8
  16324. movq %rcx, 64(%rdi)
  16325. adcq 72(%rdx), %r8
  16326. movq 80(%rsi), %rcx
  16327. movq %r8, 72(%rdi)
  16328. adcq 80(%rdx), %rcx
  16329. movq 88(%rsi), %r8
  16330. movq %rcx, 80(%rdi)
  16331. adcq 88(%rdx), %r8
  16332. movq %r8, 88(%rdi)
  16333. adcq $0x00, %rax
  16334. repz retq
  16335. #ifndef __APPLE__
  16336. .size sp_3072_add_12,.-sp_3072_add_12
  16337. #endif /* __APPLE__ */
  16338. /* Sub b from a into a. (a -= b)
  16339. *
  16340. * a A single precision integer and result.
  16341. * b A single precision integer.
  16342. */
  16343. #ifndef __APPLE__
  16344. .text
  16345. .globl sp_3072_sub_in_place_24
  16346. .type sp_3072_sub_in_place_24,@function
  16347. .align 16
  16348. sp_3072_sub_in_place_24:
  16349. #else
  16350. .section __TEXT,__text
  16351. .globl _sp_3072_sub_in_place_24
  16352. .p2align 4
  16353. _sp_3072_sub_in_place_24:
  16354. #endif /* __APPLE__ */
  16355. movq (%rdi), %rdx
  16356. xorq %rax, %rax
  16357. subq (%rsi), %rdx
  16358. movq 8(%rdi), %rcx
  16359. movq %rdx, (%rdi)
  16360. sbbq 8(%rsi), %rcx
  16361. movq 16(%rdi), %rdx
  16362. movq %rcx, 8(%rdi)
  16363. sbbq 16(%rsi), %rdx
  16364. movq 24(%rdi), %rcx
  16365. movq %rdx, 16(%rdi)
  16366. sbbq 24(%rsi), %rcx
  16367. movq 32(%rdi), %rdx
  16368. movq %rcx, 24(%rdi)
  16369. sbbq 32(%rsi), %rdx
  16370. movq 40(%rdi), %rcx
  16371. movq %rdx, 32(%rdi)
  16372. sbbq 40(%rsi), %rcx
  16373. movq 48(%rdi), %rdx
  16374. movq %rcx, 40(%rdi)
  16375. sbbq 48(%rsi), %rdx
  16376. movq 56(%rdi), %rcx
  16377. movq %rdx, 48(%rdi)
  16378. sbbq 56(%rsi), %rcx
  16379. movq 64(%rdi), %rdx
  16380. movq %rcx, 56(%rdi)
  16381. sbbq 64(%rsi), %rdx
  16382. movq 72(%rdi), %rcx
  16383. movq %rdx, 64(%rdi)
  16384. sbbq 72(%rsi), %rcx
  16385. movq 80(%rdi), %rdx
  16386. movq %rcx, 72(%rdi)
  16387. sbbq 80(%rsi), %rdx
  16388. movq 88(%rdi), %rcx
  16389. movq %rdx, 80(%rdi)
  16390. sbbq 88(%rsi), %rcx
  16391. movq 96(%rdi), %rdx
  16392. movq %rcx, 88(%rdi)
  16393. sbbq 96(%rsi), %rdx
  16394. movq 104(%rdi), %rcx
  16395. movq %rdx, 96(%rdi)
  16396. sbbq 104(%rsi), %rcx
  16397. movq 112(%rdi), %rdx
  16398. movq %rcx, 104(%rdi)
  16399. sbbq 112(%rsi), %rdx
  16400. movq 120(%rdi), %rcx
  16401. movq %rdx, 112(%rdi)
  16402. sbbq 120(%rsi), %rcx
  16403. movq 128(%rdi), %rdx
  16404. movq %rcx, 120(%rdi)
  16405. sbbq 128(%rsi), %rdx
  16406. movq 136(%rdi), %rcx
  16407. movq %rdx, 128(%rdi)
  16408. sbbq 136(%rsi), %rcx
  16409. movq 144(%rdi), %rdx
  16410. movq %rcx, 136(%rdi)
  16411. sbbq 144(%rsi), %rdx
  16412. movq 152(%rdi), %rcx
  16413. movq %rdx, 144(%rdi)
  16414. sbbq 152(%rsi), %rcx
  16415. movq 160(%rdi), %rdx
  16416. movq %rcx, 152(%rdi)
  16417. sbbq 160(%rsi), %rdx
  16418. movq 168(%rdi), %rcx
  16419. movq %rdx, 160(%rdi)
  16420. sbbq 168(%rsi), %rcx
  16421. movq 176(%rdi), %rdx
  16422. movq %rcx, 168(%rdi)
  16423. sbbq 176(%rsi), %rdx
  16424. movq 184(%rdi), %rcx
  16425. movq %rdx, 176(%rdi)
  16426. sbbq 184(%rsi), %rcx
  16427. movq %rcx, 184(%rdi)
  16428. sbbq $0x00, %rax
  16429. repz retq
  16430. #ifndef __APPLE__
  16431. .size sp_3072_sub_in_place_24,.-sp_3072_sub_in_place_24
  16432. #endif /* __APPLE__ */
  16433. /* Add b to a into r. (r = a + b)
  16434. *
  16435. * r A single precision integer.
  16436. * a A single precision integer.
  16437. * b A single precision integer.
  16438. */
  16439. #ifndef __APPLE__
  16440. .text
  16441. .globl sp_3072_add_24
  16442. .type sp_3072_add_24,@function
  16443. .align 16
  16444. sp_3072_add_24:
  16445. #else
  16446. .section __TEXT,__text
  16447. .globl _sp_3072_add_24
  16448. .p2align 4
  16449. _sp_3072_add_24:
  16450. #endif /* __APPLE__ */
  16451. # Add
  16452. movq (%rsi), %rcx
  16453. xorq %rax, %rax
  16454. addq (%rdx), %rcx
  16455. movq 8(%rsi), %r8
  16456. movq %rcx, (%rdi)
  16457. adcq 8(%rdx), %r8
  16458. movq 16(%rsi), %rcx
  16459. movq %r8, 8(%rdi)
  16460. adcq 16(%rdx), %rcx
  16461. movq 24(%rsi), %r8
  16462. movq %rcx, 16(%rdi)
  16463. adcq 24(%rdx), %r8
  16464. movq 32(%rsi), %rcx
  16465. movq %r8, 24(%rdi)
  16466. adcq 32(%rdx), %rcx
  16467. movq 40(%rsi), %r8
  16468. movq %rcx, 32(%rdi)
  16469. adcq 40(%rdx), %r8
  16470. movq 48(%rsi), %rcx
  16471. movq %r8, 40(%rdi)
  16472. adcq 48(%rdx), %rcx
  16473. movq 56(%rsi), %r8
  16474. movq %rcx, 48(%rdi)
  16475. adcq 56(%rdx), %r8
  16476. movq 64(%rsi), %rcx
  16477. movq %r8, 56(%rdi)
  16478. adcq 64(%rdx), %rcx
  16479. movq 72(%rsi), %r8
  16480. movq %rcx, 64(%rdi)
  16481. adcq 72(%rdx), %r8
  16482. movq 80(%rsi), %rcx
  16483. movq %r8, 72(%rdi)
  16484. adcq 80(%rdx), %rcx
  16485. movq 88(%rsi), %r8
  16486. movq %rcx, 80(%rdi)
  16487. adcq 88(%rdx), %r8
  16488. movq 96(%rsi), %rcx
  16489. movq %r8, 88(%rdi)
  16490. adcq 96(%rdx), %rcx
  16491. movq 104(%rsi), %r8
  16492. movq %rcx, 96(%rdi)
  16493. adcq 104(%rdx), %r8
  16494. movq 112(%rsi), %rcx
  16495. movq %r8, 104(%rdi)
  16496. adcq 112(%rdx), %rcx
  16497. movq 120(%rsi), %r8
  16498. movq %rcx, 112(%rdi)
  16499. adcq 120(%rdx), %r8
  16500. movq 128(%rsi), %rcx
  16501. movq %r8, 120(%rdi)
  16502. adcq 128(%rdx), %rcx
  16503. movq 136(%rsi), %r8
  16504. movq %rcx, 128(%rdi)
  16505. adcq 136(%rdx), %r8
  16506. movq 144(%rsi), %rcx
  16507. movq %r8, 136(%rdi)
  16508. adcq 144(%rdx), %rcx
  16509. movq 152(%rsi), %r8
  16510. movq %rcx, 144(%rdi)
  16511. adcq 152(%rdx), %r8
  16512. movq 160(%rsi), %rcx
  16513. movq %r8, 152(%rdi)
  16514. adcq 160(%rdx), %rcx
  16515. movq 168(%rsi), %r8
  16516. movq %rcx, 160(%rdi)
  16517. adcq 168(%rdx), %r8
  16518. movq 176(%rsi), %rcx
  16519. movq %r8, 168(%rdi)
  16520. adcq 176(%rdx), %rcx
  16521. movq 184(%rsi), %r8
  16522. movq %rcx, 176(%rdi)
  16523. adcq 184(%rdx), %r8
  16524. movq %r8, 184(%rdi)
  16525. adcq $0x00, %rax
  16526. repz retq
  16527. #ifndef __APPLE__
  16528. .size sp_3072_add_24,.-sp_3072_add_24
  16529. #endif /* __APPLE__ */
  16530. /* Multiply a and b into r. (r = a * b)
  16531. *
  16532. * r A single precision integer.
  16533. * a A single precision integer.
  16534. * b A single precision integer.
  16535. */
  16536. #ifndef __APPLE__
  16537. .text
  16538. .globl sp_3072_mul_24
  16539. .type sp_3072_mul_24,@function
  16540. .align 16
  16541. sp_3072_mul_24:
  16542. #else
  16543. .section __TEXT,__text
  16544. .globl _sp_3072_mul_24
  16545. .p2align 4
  16546. _sp_3072_mul_24:
  16547. #endif /* __APPLE__ */
  16548. pushq %r12
  16549. pushq %r13
  16550. pushq %r14
  16551. pushq %r15
  16552. subq $0x268, %rsp
  16553. movq %rdi, 576(%rsp)
  16554. movq %rsi, 584(%rsp)
  16555. movq %rdx, 592(%rsp)
  16556. leaq 384(%rsp), %r10
  16557. leaq 96(%rsi), %r12
  16558. # Add
  16559. movq (%rsi), %rax
  16560. xorq %r13, %r13
  16561. addq (%r12), %rax
  16562. movq 8(%rsi), %rcx
  16563. movq %rax, (%r10)
  16564. adcq 8(%r12), %rcx
  16565. movq 16(%rsi), %r8
  16566. movq %rcx, 8(%r10)
  16567. adcq 16(%r12), %r8
  16568. movq 24(%rsi), %rax
  16569. movq %r8, 16(%r10)
  16570. adcq 24(%r12), %rax
  16571. movq 32(%rsi), %rcx
  16572. movq %rax, 24(%r10)
  16573. adcq 32(%r12), %rcx
  16574. movq 40(%rsi), %r8
  16575. movq %rcx, 32(%r10)
  16576. adcq 40(%r12), %r8
  16577. movq 48(%rsi), %rax
  16578. movq %r8, 40(%r10)
  16579. adcq 48(%r12), %rax
  16580. movq 56(%rsi), %rcx
  16581. movq %rax, 48(%r10)
  16582. adcq 56(%r12), %rcx
  16583. movq 64(%rsi), %r8
  16584. movq %rcx, 56(%r10)
  16585. adcq 64(%r12), %r8
  16586. movq 72(%rsi), %rax
  16587. movq %r8, 64(%r10)
  16588. adcq 72(%r12), %rax
  16589. movq 80(%rsi), %rcx
  16590. movq %rax, 72(%r10)
  16591. adcq 80(%r12), %rcx
  16592. movq 88(%rsi), %r8
  16593. movq %rcx, 80(%r10)
  16594. adcq 88(%r12), %r8
  16595. movq %r8, 88(%r10)
  16596. adcq $0x00, %r13
  16597. movq %r13, 600(%rsp)
  16598. leaq 480(%rsp), %r11
  16599. leaq 96(%rdx), %r12
  16600. # Add
  16601. movq (%rdx), %rax
  16602. xorq %r14, %r14
  16603. addq (%r12), %rax
  16604. movq 8(%rdx), %rcx
  16605. movq %rax, (%r11)
  16606. adcq 8(%r12), %rcx
  16607. movq 16(%rdx), %r8
  16608. movq %rcx, 8(%r11)
  16609. adcq 16(%r12), %r8
  16610. movq 24(%rdx), %rax
  16611. movq %r8, 16(%r11)
  16612. adcq 24(%r12), %rax
  16613. movq 32(%rdx), %rcx
  16614. movq %rax, 24(%r11)
  16615. adcq 32(%r12), %rcx
  16616. movq 40(%rdx), %r8
  16617. movq %rcx, 32(%r11)
  16618. adcq 40(%r12), %r8
  16619. movq 48(%rdx), %rax
  16620. movq %r8, 40(%r11)
  16621. adcq 48(%r12), %rax
  16622. movq 56(%rdx), %rcx
  16623. movq %rax, 48(%r11)
  16624. adcq 56(%r12), %rcx
  16625. movq 64(%rdx), %r8
  16626. movq %rcx, 56(%r11)
  16627. adcq 64(%r12), %r8
  16628. movq 72(%rdx), %rax
  16629. movq %r8, 64(%r11)
  16630. adcq 72(%r12), %rax
  16631. movq 80(%rdx), %rcx
  16632. movq %rax, 72(%r11)
  16633. adcq 80(%r12), %rcx
  16634. movq 88(%rdx), %r8
  16635. movq %rcx, 80(%r11)
  16636. adcq 88(%r12), %r8
  16637. movq %r8, 88(%r11)
  16638. adcq $0x00, %r14
  16639. movq %r14, 608(%rsp)
  16640. movq %r11, %rdx
  16641. movq %r10, %rsi
  16642. movq %rsp, %rdi
  16643. #ifndef __APPLE__
  16644. callq sp_3072_mul_12@plt
  16645. #else
  16646. callq _sp_3072_mul_12
  16647. #endif /* __APPLE__ */
  16648. movq 592(%rsp), %rdx
  16649. movq 584(%rsp), %rsi
  16650. leaq 192(%rsp), %rdi
  16651. addq $0x60, %rdx
  16652. addq $0x60, %rsi
  16653. #ifndef __APPLE__
  16654. callq sp_3072_mul_12@plt
  16655. #else
  16656. callq _sp_3072_mul_12
  16657. #endif /* __APPLE__ */
  16658. movq 592(%rsp), %rdx
  16659. movq 584(%rsp), %rsi
  16660. movq 576(%rsp), %rdi
  16661. #ifndef __APPLE__
  16662. callq sp_3072_mul_12@plt
  16663. #else
  16664. callq _sp_3072_mul_12
  16665. #endif /* __APPLE__ */
  16666. movq 600(%rsp), %r13
  16667. movq 608(%rsp), %r14
  16668. movq 576(%rsp), %r15
  16669. movq %r13, %r9
  16670. leaq 384(%rsp), %r10
  16671. leaq 480(%rsp), %r11
  16672. andq %r14, %r9
  16673. negq %r13
  16674. negq %r14
  16675. addq $0xc0, %r15
  16676. movq (%r10), %rax
  16677. movq (%r11), %rcx
  16678. andq %r14, %rax
  16679. andq %r13, %rcx
  16680. movq %rax, (%r10)
  16681. movq %rcx, (%r11)
  16682. movq 8(%r10), %rax
  16683. movq 8(%r11), %rcx
  16684. andq %r14, %rax
  16685. andq %r13, %rcx
  16686. movq %rax, 8(%r10)
  16687. movq %rcx, 8(%r11)
  16688. movq 16(%r10), %rax
  16689. movq 16(%r11), %rcx
  16690. andq %r14, %rax
  16691. andq %r13, %rcx
  16692. movq %rax, 16(%r10)
  16693. movq %rcx, 16(%r11)
  16694. movq 24(%r10), %rax
  16695. movq 24(%r11), %rcx
  16696. andq %r14, %rax
  16697. andq %r13, %rcx
  16698. movq %rax, 24(%r10)
  16699. movq %rcx, 24(%r11)
  16700. movq 32(%r10), %rax
  16701. movq 32(%r11), %rcx
  16702. andq %r14, %rax
  16703. andq %r13, %rcx
  16704. movq %rax, 32(%r10)
  16705. movq %rcx, 32(%r11)
  16706. movq 40(%r10), %rax
  16707. movq 40(%r11), %rcx
  16708. andq %r14, %rax
  16709. andq %r13, %rcx
  16710. movq %rax, 40(%r10)
  16711. movq %rcx, 40(%r11)
  16712. movq 48(%r10), %rax
  16713. movq 48(%r11), %rcx
  16714. andq %r14, %rax
  16715. andq %r13, %rcx
  16716. movq %rax, 48(%r10)
  16717. movq %rcx, 48(%r11)
  16718. movq 56(%r10), %rax
  16719. movq 56(%r11), %rcx
  16720. andq %r14, %rax
  16721. andq %r13, %rcx
  16722. movq %rax, 56(%r10)
  16723. movq %rcx, 56(%r11)
  16724. movq 64(%r10), %rax
  16725. movq 64(%r11), %rcx
  16726. andq %r14, %rax
  16727. andq %r13, %rcx
  16728. movq %rax, 64(%r10)
  16729. movq %rcx, 64(%r11)
  16730. movq 72(%r10), %rax
  16731. movq 72(%r11), %rcx
  16732. andq %r14, %rax
  16733. andq %r13, %rcx
  16734. movq %rax, 72(%r10)
  16735. movq %rcx, 72(%r11)
  16736. movq 80(%r10), %rax
  16737. movq 80(%r11), %rcx
  16738. andq %r14, %rax
  16739. andq %r13, %rcx
  16740. movq %rax, 80(%r10)
  16741. movq %rcx, 80(%r11)
  16742. movq 88(%r10), %rax
  16743. movq 88(%r11), %rcx
  16744. andq %r14, %rax
  16745. andq %r13, %rcx
  16746. movq %rax, 88(%r10)
  16747. movq %rcx, 88(%r11)
  16748. movq (%r10), %rax
  16749. addq (%r11), %rax
  16750. movq 8(%r10), %rcx
  16751. movq %rax, (%r15)
  16752. adcq 8(%r11), %rcx
  16753. movq 16(%r10), %r8
  16754. movq %rcx, 8(%r15)
  16755. adcq 16(%r11), %r8
  16756. movq 24(%r10), %rax
  16757. movq %r8, 16(%r15)
  16758. adcq 24(%r11), %rax
  16759. movq 32(%r10), %rcx
  16760. movq %rax, 24(%r15)
  16761. adcq 32(%r11), %rcx
  16762. movq 40(%r10), %r8
  16763. movq %rcx, 32(%r15)
  16764. adcq 40(%r11), %r8
  16765. movq 48(%r10), %rax
  16766. movq %r8, 40(%r15)
  16767. adcq 48(%r11), %rax
  16768. movq 56(%r10), %rcx
  16769. movq %rax, 48(%r15)
  16770. adcq 56(%r11), %rcx
  16771. movq 64(%r10), %r8
  16772. movq %rcx, 56(%r15)
  16773. adcq 64(%r11), %r8
  16774. movq 72(%r10), %rax
  16775. movq %r8, 64(%r15)
  16776. adcq 72(%r11), %rax
  16777. movq 80(%r10), %rcx
  16778. movq %rax, 72(%r15)
  16779. adcq 80(%r11), %rcx
  16780. movq 88(%r10), %r8
  16781. movq %rcx, 80(%r15)
  16782. adcq 88(%r11), %r8
  16783. movq %r8, 88(%r15)
  16784. adcq $0x00, %r9
  16785. leaq 192(%rsp), %r11
  16786. movq %rsp, %r10
  16787. movq (%r10), %rax
  16788. subq (%r11), %rax
  16789. movq 8(%r10), %rcx
  16790. movq %rax, (%r10)
  16791. sbbq 8(%r11), %rcx
  16792. movq 16(%r10), %r8
  16793. movq %rcx, 8(%r10)
  16794. sbbq 16(%r11), %r8
  16795. movq 24(%r10), %rax
  16796. movq %r8, 16(%r10)
  16797. sbbq 24(%r11), %rax
  16798. movq 32(%r10), %rcx
  16799. movq %rax, 24(%r10)
  16800. sbbq 32(%r11), %rcx
  16801. movq 40(%r10), %r8
  16802. movq %rcx, 32(%r10)
  16803. sbbq 40(%r11), %r8
  16804. movq 48(%r10), %rax
  16805. movq %r8, 40(%r10)
  16806. sbbq 48(%r11), %rax
  16807. movq 56(%r10), %rcx
  16808. movq %rax, 48(%r10)
  16809. sbbq 56(%r11), %rcx
  16810. movq 64(%r10), %r8
  16811. movq %rcx, 56(%r10)
  16812. sbbq 64(%r11), %r8
  16813. movq 72(%r10), %rax
  16814. movq %r8, 64(%r10)
  16815. sbbq 72(%r11), %rax
  16816. movq 80(%r10), %rcx
  16817. movq %rax, 72(%r10)
  16818. sbbq 80(%r11), %rcx
  16819. movq 88(%r10), %r8
  16820. movq %rcx, 80(%r10)
  16821. sbbq 88(%r11), %r8
  16822. movq 96(%r10), %rax
  16823. movq %r8, 88(%r10)
  16824. sbbq 96(%r11), %rax
  16825. movq 104(%r10), %rcx
  16826. movq %rax, 96(%r10)
  16827. sbbq 104(%r11), %rcx
  16828. movq 112(%r10), %r8
  16829. movq %rcx, 104(%r10)
  16830. sbbq 112(%r11), %r8
  16831. movq 120(%r10), %rax
  16832. movq %r8, 112(%r10)
  16833. sbbq 120(%r11), %rax
  16834. movq 128(%r10), %rcx
  16835. movq %rax, 120(%r10)
  16836. sbbq 128(%r11), %rcx
  16837. movq 136(%r10), %r8
  16838. movq %rcx, 128(%r10)
  16839. sbbq 136(%r11), %r8
  16840. movq 144(%r10), %rax
  16841. movq %r8, 136(%r10)
  16842. sbbq 144(%r11), %rax
  16843. movq 152(%r10), %rcx
  16844. movq %rax, 144(%r10)
  16845. sbbq 152(%r11), %rcx
  16846. movq 160(%r10), %r8
  16847. movq %rcx, 152(%r10)
  16848. sbbq 160(%r11), %r8
  16849. movq 168(%r10), %rax
  16850. movq %r8, 160(%r10)
  16851. sbbq 168(%r11), %rax
  16852. movq 176(%r10), %rcx
  16853. movq %rax, 168(%r10)
  16854. sbbq 176(%r11), %rcx
  16855. movq 184(%r10), %r8
  16856. movq %rcx, 176(%r10)
  16857. sbbq 184(%r11), %r8
  16858. movq %r8, 184(%r10)
  16859. sbbq $0x00, %r9
  16860. movq (%r10), %rax
  16861. subq (%rdi), %rax
  16862. movq 8(%r10), %rcx
  16863. movq %rax, (%r10)
  16864. sbbq 8(%rdi), %rcx
  16865. movq 16(%r10), %r8
  16866. movq %rcx, 8(%r10)
  16867. sbbq 16(%rdi), %r8
  16868. movq 24(%r10), %rax
  16869. movq %r8, 16(%r10)
  16870. sbbq 24(%rdi), %rax
  16871. movq 32(%r10), %rcx
  16872. movq %rax, 24(%r10)
  16873. sbbq 32(%rdi), %rcx
  16874. movq 40(%r10), %r8
  16875. movq %rcx, 32(%r10)
  16876. sbbq 40(%rdi), %r8
  16877. movq 48(%r10), %rax
  16878. movq %r8, 40(%r10)
  16879. sbbq 48(%rdi), %rax
  16880. movq 56(%r10), %rcx
  16881. movq %rax, 48(%r10)
  16882. sbbq 56(%rdi), %rcx
  16883. movq 64(%r10), %r8
  16884. movq %rcx, 56(%r10)
  16885. sbbq 64(%rdi), %r8
  16886. movq 72(%r10), %rax
  16887. movq %r8, 64(%r10)
  16888. sbbq 72(%rdi), %rax
  16889. movq 80(%r10), %rcx
  16890. movq %rax, 72(%r10)
  16891. sbbq 80(%rdi), %rcx
  16892. movq 88(%r10), %r8
  16893. movq %rcx, 80(%r10)
  16894. sbbq 88(%rdi), %r8
  16895. movq 96(%r10), %rax
  16896. movq %r8, 88(%r10)
  16897. sbbq 96(%rdi), %rax
  16898. movq 104(%r10), %rcx
  16899. movq %rax, 96(%r10)
  16900. sbbq 104(%rdi), %rcx
  16901. movq 112(%r10), %r8
  16902. movq %rcx, 104(%r10)
  16903. sbbq 112(%rdi), %r8
  16904. movq 120(%r10), %rax
  16905. movq %r8, 112(%r10)
  16906. sbbq 120(%rdi), %rax
  16907. movq 128(%r10), %rcx
  16908. movq %rax, 120(%r10)
  16909. sbbq 128(%rdi), %rcx
  16910. movq 136(%r10), %r8
  16911. movq %rcx, 128(%r10)
  16912. sbbq 136(%rdi), %r8
  16913. movq 144(%r10), %rax
  16914. movq %r8, 136(%r10)
  16915. sbbq 144(%rdi), %rax
  16916. movq 152(%r10), %rcx
  16917. movq %rax, 144(%r10)
  16918. sbbq 152(%rdi), %rcx
  16919. movq 160(%r10), %r8
  16920. movq %rcx, 152(%r10)
  16921. sbbq 160(%rdi), %r8
  16922. movq 168(%r10), %rax
  16923. movq %r8, 160(%r10)
  16924. sbbq 168(%rdi), %rax
  16925. movq 176(%r10), %rcx
  16926. movq %rax, 168(%r10)
  16927. sbbq 176(%rdi), %rcx
  16928. movq 184(%r10), %r8
  16929. movq %rcx, 176(%r10)
  16930. sbbq 184(%rdi), %r8
  16931. movq %r8, 184(%r10)
  16932. sbbq $0x00, %r9
  16933. subq $0x60, %r15
  16934. # Add
  16935. movq (%r15), %rax
  16936. addq (%r10), %rax
  16937. movq 8(%r15), %rcx
  16938. movq %rax, (%r15)
  16939. adcq 8(%r10), %rcx
  16940. movq 16(%r15), %r8
  16941. movq %rcx, 8(%r15)
  16942. adcq 16(%r10), %r8
  16943. movq 24(%r15), %rax
  16944. movq %r8, 16(%r15)
  16945. adcq 24(%r10), %rax
  16946. movq 32(%r15), %rcx
  16947. movq %rax, 24(%r15)
  16948. adcq 32(%r10), %rcx
  16949. movq 40(%r15), %r8
  16950. movq %rcx, 32(%r15)
  16951. adcq 40(%r10), %r8
  16952. movq 48(%r15), %rax
  16953. movq %r8, 40(%r15)
  16954. adcq 48(%r10), %rax
  16955. movq 56(%r15), %rcx
  16956. movq %rax, 48(%r15)
  16957. adcq 56(%r10), %rcx
  16958. movq 64(%r15), %r8
  16959. movq %rcx, 56(%r15)
  16960. adcq 64(%r10), %r8
  16961. movq 72(%r15), %rax
  16962. movq %r8, 64(%r15)
  16963. adcq 72(%r10), %rax
  16964. movq 80(%r15), %rcx
  16965. movq %rax, 72(%r15)
  16966. adcq 80(%r10), %rcx
  16967. movq 88(%r15), %r8
  16968. movq %rcx, 80(%r15)
  16969. adcq 88(%r10), %r8
  16970. movq 96(%r15), %rax
  16971. movq %r8, 88(%r15)
  16972. adcq 96(%r10), %rax
  16973. movq 104(%r15), %rcx
  16974. movq %rax, 96(%r15)
  16975. adcq 104(%r10), %rcx
  16976. movq 112(%r15), %r8
  16977. movq %rcx, 104(%r15)
  16978. adcq 112(%r10), %r8
  16979. movq 120(%r15), %rax
  16980. movq %r8, 112(%r15)
  16981. adcq 120(%r10), %rax
  16982. movq 128(%r15), %rcx
  16983. movq %rax, 120(%r15)
  16984. adcq 128(%r10), %rcx
  16985. movq 136(%r15), %r8
  16986. movq %rcx, 128(%r15)
  16987. adcq 136(%r10), %r8
  16988. movq 144(%r15), %rax
  16989. movq %r8, 136(%r15)
  16990. adcq 144(%r10), %rax
  16991. movq 152(%r15), %rcx
  16992. movq %rax, 144(%r15)
  16993. adcq 152(%r10), %rcx
  16994. movq 160(%r15), %r8
  16995. movq %rcx, 152(%r15)
  16996. adcq 160(%r10), %r8
  16997. movq 168(%r15), %rax
  16998. movq %r8, 160(%r15)
  16999. adcq 168(%r10), %rax
  17000. movq 176(%r15), %rcx
  17001. movq %rax, 168(%r15)
  17002. adcq 176(%r10), %rcx
  17003. movq 184(%r15), %r8
  17004. movq %rcx, 176(%r15)
  17005. adcq 184(%r10), %r8
  17006. movq %r8, 184(%r15)
  17007. adcq $0x00, %r9
  17008. movq %r9, 288(%rdi)
  17009. addq $0x60, %r15
  17010. # Add
  17011. movq (%r15), %rax
  17012. addq (%r11), %rax
  17013. movq 8(%r15), %rcx
  17014. movq %rax, (%r15)
  17015. adcq 8(%r11), %rcx
  17016. movq 16(%r15), %r8
  17017. movq %rcx, 8(%r15)
  17018. adcq 16(%r11), %r8
  17019. movq 24(%r15), %rax
  17020. movq %r8, 16(%r15)
  17021. adcq 24(%r11), %rax
  17022. movq 32(%r15), %rcx
  17023. movq %rax, 24(%r15)
  17024. adcq 32(%r11), %rcx
  17025. movq 40(%r15), %r8
  17026. movq %rcx, 32(%r15)
  17027. adcq 40(%r11), %r8
  17028. movq 48(%r15), %rax
  17029. movq %r8, 40(%r15)
  17030. adcq 48(%r11), %rax
  17031. movq 56(%r15), %rcx
  17032. movq %rax, 48(%r15)
  17033. adcq 56(%r11), %rcx
  17034. movq 64(%r15), %r8
  17035. movq %rcx, 56(%r15)
  17036. adcq 64(%r11), %r8
  17037. movq 72(%r15), %rax
  17038. movq %r8, 64(%r15)
  17039. adcq 72(%r11), %rax
  17040. movq 80(%r15), %rcx
  17041. movq %rax, 72(%r15)
  17042. adcq 80(%r11), %rcx
  17043. movq 88(%r15), %r8
  17044. movq %rcx, 80(%r15)
  17045. adcq 88(%r11), %r8
  17046. movq 96(%r15), %rax
  17047. movq %r8, 88(%r15)
  17048. adcq 96(%r11), %rax
  17049. movq %rax, 96(%r15)
  17050. # Add to zero
  17051. movq 104(%r11), %rax
  17052. adcq $0x00, %rax
  17053. movq 112(%r11), %rcx
  17054. movq %rax, 104(%r15)
  17055. adcq $0x00, %rcx
  17056. movq 120(%r11), %r8
  17057. movq %rcx, 112(%r15)
  17058. adcq $0x00, %r8
  17059. movq 128(%r11), %rax
  17060. movq %r8, 120(%r15)
  17061. adcq $0x00, %rax
  17062. movq 136(%r11), %rcx
  17063. movq %rax, 128(%r15)
  17064. adcq $0x00, %rcx
  17065. movq 144(%r11), %r8
  17066. movq %rcx, 136(%r15)
  17067. adcq $0x00, %r8
  17068. movq 152(%r11), %rax
  17069. movq %r8, 144(%r15)
  17070. adcq $0x00, %rax
  17071. movq 160(%r11), %rcx
  17072. movq %rax, 152(%r15)
  17073. adcq $0x00, %rcx
  17074. movq 168(%r11), %r8
  17075. movq %rcx, 160(%r15)
  17076. adcq $0x00, %r8
  17077. movq 176(%r11), %rax
  17078. movq %r8, 168(%r15)
  17079. adcq $0x00, %rax
  17080. movq 184(%r11), %rcx
  17081. movq %rax, 176(%r15)
  17082. adcq $0x00, %rcx
  17083. movq %rcx, 184(%r15)
  17084. addq $0x268, %rsp
  17085. popq %r15
  17086. popq %r14
  17087. popq %r13
  17088. popq %r12
  17089. repz retq
  17090. #ifndef __APPLE__
  17091. .size sp_3072_mul_24,.-sp_3072_mul_24
  17092. #endif /* __APPLE__ */
  17093. /* Add a to a into r. (r = a + a)
  17094. *
  17095. * r A single precision integer.
  17096. * a A single precision integer.
  17097. */
  17098. #ifndef __APPLE__
  17099. .text
  17100. .globl sp_3072_dbl_12
  17101. .type sp_3072_dbl_12,@function
  17102. .align 16
  17103. sp_3072_dbl_12:
  17104. #else
  17105. .section __TEXT,__text
  17106. .globl _sp_3072_dbl_12
  17107. .p2align 4
  17108. _sp_3072_dbl_12:
  17109. #endif /* __APPLE__ */
  17110. movq (%rsi), %rdx
  17111. xorq %rax, %rax
  17112. addq %rdx, %rdx
  17113. movq 8(%rsi), %rcx
  17114. movq %rdx, (%rdi)
  17115. adcq %rcx, %rcx
  17116. movq 16(%rsi), %rdx
  17117. movq %rcx, 8(%rdi)
  17118. adcq %rdx, %rdx
  17119. movq 24(%rsi), %rcx
  17120. movq %rdx, 16(%rdi)
  17121. adcq %rcx, %rcx
  17122. movq 32(%rsi), %rdx
  17123. movq %rcx, 24(%rdi)
  17124. adcq %rdx, %rdx
  17125. movq 40(%rsi), %rcx
  17126. movq %rdx, 32(%rdi)
  17127. adcq %rcx, %rcx
  17128. movq 48(%rsi), %rdx
  17129. movq %rcx, 40(%rdi)
  17130. adcq %rdx, %rdx
  17131. movq 56(%rsi), %rcx
  17132. movq %rdx, 48(%rdi)
  17133. adcq %rcx, %rcx
  17134. movq 64(%rsi), %rdx
  17135. movq %rcx, 56(%rdi)
  17136. adcq %rdx, %rdx
  17137. movq 72(%rsi), %rcx
  17138. movq %rdx, 64(%rdi)
  17139. adcq %rcx, %rcx
  17140. movq 80(%rsi), %rdx
  17141. movq %rcx, 72(%rdi)
  17142. adcq %rdx, %rdx
  17143. movq 88(%rsi), %rcx
  17144. movq %rdx, 80(%rdi)
  17145. adcq %rcx, %rcx
  17146. movq %rcx, 88(%rdi)
  17147. adcq $0x00, %rax
  17148. repz retq
  17149. #ifndef __APPLE__
  17150. .size sp_3072_dbl_12,.-sp_3072_dbl_12
  17151. #endif /* __APPLE__ */
  17152. /* Square a and put result in r. (r = a * a)
  17153. *
  17154. * r A single precision integer.
  17155. * a A single precision integer.
  17156. */
  17157. #ifndef __APPLE__
  17158. .text
  17159. .globl sp_3072_sqr_24
  17160. .type sp_3072_sqr_24,@function
  17161. .align 16
  17162. sp_3072_sqr_24:
  17163. #else
  17164. .section __TEXT,__text
  17165. .globl _sp_3072_sqr_24
  17166. .p2align 4
  17167. _sp_3072_sqr_24:
  17168. #endif /* __APPLE__ */
  17169. subq $0x1f8, %rsp
  17170. movq %rdi, 480(%rsp)
  17171. movq %rsi, 488(%rsp)
  17172. leaq 384(%rsp), %r8
  17173. leaq 96(%rsi), %r9
  17174. # Add
  17175. movq (%rsi), %rdx
  17176. xorq %rcx, %rcx
  17177. addq (%r9), %rdx
  17178. movq 8(%rsi), %rax
  17179. movq %rdx, (%r8)
  17180. adcq 8(%r9), %rax
  17181. movq 16(%rsi), %rdx
  17182. movq %rax, 8(%r8)
  17183. adcq 16(%r9), %rdx
  17184. movq 24(%rsi), %rax
  17185. movq %rdx, 16(%r8)
  17186. adcq 24(%r9), %rax
  17187. movq 32(%rsi), %rdx
  17188. movq %rax, 24(%r8)
  17189. adcq 32(%r9), %rdx
  17190. movq 40(%rsi), %rax
  17191. movq %rdx, 32(%r8)
  17192. adcq 40(%r9), %rax
  17193. movq 48(%rsi), %rdx
  17194. movq %rax, 40(%r8)
  17195. adcq 48(%r9), %rdx
  17196. movq 56(%rsi), %rax
  17197. movq %rdx, 48(%r8)
  17198. adcq 56(%r9), %rax
  17199. movq 64(%rsi), %rdx
  17200. movq %rax, 56(%r8)
  17201. adcq 64(%r9), %rdx
  17202. movq 72(%rsi), %rax
  17203. movq %rdx, 64(%r8)
  17204. adcq 72(%r9), %rax
  17205. movq 80(%rsi), %rdx
  17206. movq %rax, 72(%r8)
  17207. adcq 80(%r9), %rdx
  17208. movq 88(%rsi), %rax
  17209. movq %rdx, 80(%r8)
  17210. adcq 88(%r9), %rax
  17211. movq %rax, 88(%r8)
  17212. adcq $0x00, %rcx
  17213. movq %rcx, 496(%rsp)
  17214. movq %r8, %rsi
  17215. movq %rsp, %rdi
  17216. #ifndef __APPLE__
  17217. callq sp_3072_sqr_12@plt
  17218. #else
  17219. callq _sp_3072_sqr_12
  17220. #endif /* __APPLE__ */
  17221. movq 488(%rsp), %rsi
  17222. leaq 192(%rsp), %rdi
  17223. addq $0x60, %rsi
  17224. #ifndef __APPLE__
  17225. callq sp_3072_sqr_12@plt
  17226. #else
  17227. callq _sp_3072_sqr_12
  17228. #endif /* __APPLE__ */
  17229. movq 488(%rsp), %rsi
  17230. movq 480(%rsp), %rdi
  17231. #ifndef __APPLE__
  17232. callq sp_3072_sqr_12@plt
  17233. #else
  17234. callq _sp_3072_sqr_12
  17235. #endif /* __APPLE__ */
  17236. movq 496(%rsp), %r10
  17237. movq %rdi, %r9
  17238. leaq 384(%rsp), %r8
  17239. movq %r10, %rcx
  17240. negq %r10
  17241. addq $0xc0, %r9
  17242. movq (%r8), %rdx
  17243. movq 8(%r8), %rax
  17244. andq %r10, %rdx
  17245. andq %r10, %rax
  17246. movq %rdx, (%r9)
  17247. movq %rax, 8(%r9)
  17248. movq 16(%r8), %rdx
  17249. movq 24(%r8), %rax
  17250. andq %r10, %rdx
  17251. andq %r10, %rax
  17252. movq %rdx, 16(%r9)
  17253. movq %rax, 24(%r9)
  17254. movq 32(%r8), %rdx
  17255. movq 40(%r8), %rax
  17256. andq %r10, %rdx
  17257. andq %r10, %rax
  17258. movq %rdx, 32(%r9)
  17259. movq %rax, 40(%r9)
  17260. movq 48(%r8), %rdx
  17261. movq 56(%r8), %rax
  17262. andq %r10, %rdx
  17263. andq %r10, %rax
  17264. movq %rdx, 48(%r9)
  17265. movq %rax, 56(%r9)
  17266. movq 64(%r8), %rdx
  17267. movq 72(%r8), %rax
  17268. andq %r10, %rdx
  17269. andq %r10, %rax
  17270. movq %rdx, 64(%r9)
  17271. movq %rax, 72(%r9)
  17272. movq 80(%r8), %rdx
  17273. movq 88(%r8), %rax
  17274. andq %r10, %rdx
  17275. andq %r10, %rax
  17276. movq %rdx, 80(%r9)
  17277. movq %rax, 88(%r9)
  17278. movq (%r9), %rdx
  17279. addq %rdx, %rdx
  17280. movq 8(%r9), %rax
  17281. movq %rdx, (%r9)
  17282. adcq %rax, %rax
  17283. movq 16(%r9), %rdx
  17284. movq %rax, 8(%r9)
  17285. adcq %rdx, %rdx
  17286. movq 24(%r9), %rax
  17287. movq %rdx, 16(%r9)
  17288. adcq %rax, %rax
  17289. movq 32(%r9), %rdx
  17290. movq %rax, 24(%r9)
  17291. adcq %rdx, %rdx
  17292. movq 40(%r9), %rax
  17293. movq %rdx, 32(%r9)
  17294. adcq %rax, %rax
  17295. movq 48(%r9), %rdx
  17296. movq %rax, 40(%r9)
  17297. adcq %rdx, %rdx
  17298. movq 56(%r9), %rax
  17299. movq %rdx, 48(%r9)
  17300. adcq %rax, %rax
  17301. movq 64(%r9), %rdx
  17302. movq %rax, 56(%r9)
  17303. adcq %rdx, %rdx
  17304. movq 72(%r9), %rax
  17305. movq %rdx, 64(%r9)
  17306. adcq %rax, %rax
  17307. movq 80(%r9), %rdx
  17308. movq %rax, 72(%r9)
  17309. adcq %rdx, %rdx
  17310. movq 88(%r9), %rax
  17311. movq %rdx, 80(%r9)
  17312. adcq %rax, %rax
  17313. movq %rax, 88(%r9)
  17314. adcq $0x00, %rcx
  17315. leaq 192(%rsp), %rsi
  17316. movq %rsp, %r8
  17317. movq (%r8), %rdx
  17318. subq (%rsi), %rdx
  17319. movq 8(%r8), %rax
  17320. movq %rdx, (%r8)
  17321. sbbq 8(%rsi), %rax
  17322. movq 16(%r8), %rdx
  17323. movq %rax, 8(%r8)
  17324. sbbq 16(%rsi), %rdx
  17325. movq 24(%r8), %rax
  17326. movq %rdx, 16(%r8)
  17327. sbbq 24(%rsi), %rax
  17328. movq 32(%r8), %rdx
  17329. movq %rax, 24(%r8)
  17330. sbbq 32(%rsi), %rdx
  17331. movq 40(%r8), %rax
  17332. movq %rdx, 32(%r8)
  17333. sbbq 40(%rsi), %rax
  17334. movq 48(%r8), %rdx
  17335. movq %rax, 40(%r8)
  17336. sbbq 48(%rsi), %rdx
  17337. movq 56(%r8), %rax
  17338. movq %rdx, 48(%r8)
  17339. sbbq 56(%rsi), %rax
  17340. movq 64(%r8), %rdx
  17341. movq %rax, 56(%r8)
  17342. sbbq 64(%rsi), %rdx
  17343. movq 72(%r8), %rax
  17344. movq %rdx, 64(%r8)
  17345. sbbq 72(%rsi), %rax
  17346. movq 80(%r8), %rdx
  17347. movq %rax, 72(%r8)
  17348. sbbq 80(%rsi), %rdx
  17349. movq 88(%r8), %rax
  17350. movq %rdx, 80(%r8)
  17351. sbbq 88(%rsi), %rax
  17352. movq 96(%r8), %rdx
  17353. movq %rax, 88(%r8)
  17354. sbbq 96(%rsi), %rdx
  17355. movq 104(%r8), %rax
  17356. movq %rdx, 96(%r8)
  17357. sbbq 104(%rsi), %rax
  17358. movq 112(%r8), %rdx
  17359. movq %rax, 104(%r8)
  17360. sbbq 112(%rsi), %rdx
  17361. movq 120(%r8), %rax
  17362. movq %rdx, 112(%r8)
  17363. sbbq 120(%rsi), %rax
  17364. movq 128(%r8), %rdx
  17365. movq %rax, 120(%r8)
  17366. sbbq 128(%rsi), %rdx
  17367. movq 136(%r8), %rax
  17368. movq %rdx, 128(%r8)
  17369. sbbq 136(%rsi), %rax
  17370. movq 144(%r8), %rdx
  17371. movq %rax, 136(%r8)
  17372. sbbq 144(%rsi), %rdx
  17373. movq 152(%r8), %rax
  17374. movq %rdx, 144(%r8)
  17375. sbbq 152(%rsi), %rax
  17376. movq 160(%r8), %rdx
  17377. movq %rax, 152(%r8)
  17378. sbbq 160(%rsi), %rdx
  17379. movq 168(%r8), %rax
  17380. movq %rdx, 160(%r8)
  17381. sbbq 168(%rsi), %rax
  17382. movq 176(%r8), %rdx
  17383. movq %rax, 168(%r8)
  17384. sbbq 176(%rsi), %rdx
  17385. movq 184(%r8), %rax
  17386. movq %rdx, 176(%r8)
  17387. sbbq 184(%rsi), %rax
  17388. movq %rax, 184(%r8)
  17389. sbbq $0x00, %rcx
  17390. movq (%r8), %rdx
  17391. subq (%rdi), %rdx
  17392. movq 8(%r8), %rax
  17393. movq %rdx, (%r8)
  17394. sbbq 8(%rdi), %rax
  17395. movq 16(%r8), %rdx
  17396. movq %rax, 8(%r8)
  17397. sbbq 16(%rdi), %rdx
  17398. movq 24(%r8), %rax
  17399. movq %rdx, 16(%r8)
  17400. sbbq 24(%rdi), %rax
  17401. movq 32(%r8), %rdx
  17402. movq %rax, 24(%r8)
  17403. sbbq 32(%rdi), %rdx
  17404. movq 40(%r8), %rax
  17405. movq %rdx, 32(%r8)
  17406. sbbq 40(%rdi), %rax
  17407. movq 48(%r8), %rdx
  17408. movq %rax, 40(%r8)
  17409. sbbq 48(%rdi), %rdx
  17410. movq 56(%r8), %rax
  17411. movq %rdx, 48(%r8)
  17412. sbbq 56(%rdi), %rax
  17413. movq 64(%r8), %rdx
  17414. movq %rax, 56(%r8)
  17415. sbbq 64(%rdi), %rdx
  17416. movq 72(%r8), %rax
  17417. movq %rdx, 64(%r8)
  17418. sbbq 72(%rdi), %rax
  17419. movq 80(%r8), %rdx
  17420. movq %rax, 72(%r8)
  17421. sbbq 80(%rdi), %rdx
  17422. movq 88(%r8), %rax
  17423. movq %rdx, 80(%r8)
  17424. sbbq 88(%rdi), %rax
  17425. movq 96(%r8), %rdx
  17426. movq %rax, 88(%r8)
  17427. sbbq 96(%rdi), %rdx
  17428. movq 104(%r8), %rax
  17429. movq %rdx, 96(%r8)
  17430. sbbq 104(%rdi), %rax
  17431. movq 112(%r8), %rdx
  17432. movq %rax, 104(%r8)
  17433. sbbq 112(%rdi), %rdx
  17434. movq 120(%r8), %rax
  17435. movq %rdx, 112(%r8)
  17436. sbbq 120(%rdi), %rax
  17437. movq 128(%r8), %rdx
  17438. movq %rax, 120(%r8)
  17439. sbbq 128(%rdi), %rdx
  17440. movq 136(%r8), %rax
  17441. movq %rdx, 128(%r8)
  17442. sbbq 136(%rdi), %rax
  17443. movq 144(%r8), %rdx
  17444. movq %rax, 136(%r8)
  17445. sbbq 144(%rdi), %rdx
  17446. movq 152(%r8), %rax
  17447. movq %rdx, 144(%r8)
  17448. sbbq 152(%rdi), %rax
  17449. movq 160(%r8), %rdx
  17450. movq %rax, 152(%r8)
  17451. sbbq 160(%rdi), %rdx
  17452. movq 168(%r8), %rax
  17453. movq %rdx, 160(%r8)
  17454. sbbq 168(%rdi), %rax
  17455. movq 176(%r8), %rdx
  17456. movq %rax, 168(%r8)
  17457. sbbq 176(%rdi), %rdx
  17458. movq 184(%r8), %rax
  17459. movq %rdx, 176(%r8)
  17460. sbbq 184(%rdi), %rax
  17461. movq %rax, 184(%r8)
  17462. sbbq $0x00, %rcx
  17463. subq $0x60, %r9
  17464. # Add in place
  17465. movq (%r9), %rdx
  17466. addq (%r8), %rdx
  17467. movq 8(%r9), %rax
  17468. movq %rdx, (%r9)
  17469. adcq 8(%r8), %rax
  17470. movq 16(%r9), %rdx
  17471. movq %rax, 8(%r9)
  17472. adcq 16(%r8), %rdx
  17473. movq 24(%r9), %rax
  17474. movq %rdx, 16(%r9)
  17475. adcq 24(%r8), %rax
  17476. movq 32(%r9), %rdx
  17477. movq %rax, 24(%r9)
  17478. adcq 32(%r8), %rdx
  17479. movq 40(%r9), %rax
  17480. movq %rdx, 32(%r9)
  17481. adcq 40(%r8), %rax
  17482. movq 48(%r9), %rdx
  17483. movq %rax, 40(%r9)
  17484. adcq 48(%r8), %rdx
  17485. movq 56(%r9), %rax
  17486. movq %rdx, 48(%r9)
  17487. adcq 56(%r8), %rax
  17488. movq 64(%r9), %rdx
  17489. movq %rax, 56(%r9)
  17490. adcq 64(%r8), %rdx
  17491. movq 72(%r9), %rax
  17492. movq %rdx, 64(%r9)
  17493. adcq 72(%r8), %rax
  17494. movq 80(%r9), %rdx
  17495. movq %rax, 72(%r9)
  17496. adcq 80(%r8), %rdx
  17497. movq 88(%r9), %rax
  17498. movq %rdx, 80(%r9)
  17499. adcq 88(%r8), %rax
  17500. movq 96(%r9), %rdx
  17501. movq %rax, 88(%r9)
  17502. adcq 96(%r8), %rdx
  17503. movq 104(%r9), %rax
  17504. movq %rdx, 96(%r9)
  17505. adcq 104(%r8), %rax
  17506. movq 112(%r9), %rdx
  17507. movq %rax, 104(%r9)
  17508. adcq 112(%r8), %rdx
  17509. movq 120(%r9), %rax
  17510. movq %rdx, 112(%r9)
  17511. adcq 120(%r8), %rax
  17512. movq 128(%r9), %rdx
  17513. movq %rax, 120(%r9)
  17514. adcq 128(%r8), %rdx
  17515. movq 136(%r9), %rax
  17516. movq %rdx, 128(%r9)
  17517. adcq 136(%r8), %rax
  17518. movq 144(%r9), %rdx
  17519. movq %rax, 136(%r9)
  17520. adcq 144(%r8), %rdx
  17521. movq 152(%r9), %rax
  17522. movq %rdx, 144(%r9)
  17523. adcq 152(%r8), %rax
  17524. movq 160(%r9), %rdx
  17525. movq %rax, 152(%r9)
  17526. adcq 160(%r8), %rdx
  17527. movq 168(%r9), %rax
  17528. movq %rdx, 160(%r9)
  17529. adcq 168(%r8), %rax
  17530. movq 176(%r9), %rdx
  17531. movq %rax, 168(%r9)
  17532. adcq 176(%r8), %rdx
  17533. movq 184(%r9), %rax
  17534. movq %rdx, 176(%r9)
  17535. adcq 184(%r8), %rax
  17536. movq %rax, 184(%r9)
  17537. adcq $0x00, %rcx
  17538. movq %rcx, 288(%rdi)
  17539. # Add in place
  17540. movq 96(%r9), %rdx
  17541. addq (%rsi), %rdx
  17542. movq 104(%r9), %rax
  17543. movq %rdx, 96(%r9)
  17544. adcq 8(%rsi), %rax
  17545. movq 112(%r9), %rdx
  17546. movq %rax, 104(%r9)
  17547. adcq 16(%rsi), %rdx
  17548. movq 120(%r9), %rax
  17549. movq %rdx, 112(%r9)
  17550. adcq 24(%rsi), %rax
  17551. movq 128(%r9), %rdx
  17552. movq %rax, 120(%r9)
  17553. adcq 32(%rsi), %rdx
  17554. movq 136(%r9), %rax
  17555. movq %rdx, 128(%r9)
  17556. adcq 40(%rsi), %rax
  17557. movq 144(%r9), %rdx
  17558. movq %rax, 136(%r9)
  17559. adcq 48(%rsi), %rdx
  17560. movq 152(%r9), %rax
  17561. movq %rdx, 144(%r9)
  17562. adcq 56(%rsi), %rax
  17563. movq 160(%r9), %rdx
  17564. movq %rax, 152(%r9)
  17565. adcq 64(%rsi), %rdx
  17566. movq 168(%r9), %rax
  17567. movq %rdx, 160(%r9)
  17568. adcq 72(%rsi), %rax
  17569. movq 176(%r9), %rdx
  17570. movq %rax, 168(%r9)
  17571. adcq 80(%rsi), %rdx
  17572. movq 184(%r9), %rax
  17573. movq %rdx, 176(%r9)
  17574. adcq 88(%rsi), %rax
  17575. movq 192(%r9), %rdx
  17576. movq %rax, 184(%r9)
  17577. adcq 96(%rsi), %rdx
  17578. movq %rdx, 192(%r9)
  17579. # Add to zero
  17580. movq 104(%rsi), %rdx
  17581. adcq $0x00, %rdx
  17582. movq 112(%rsi), %rax
  17583. movq %rdx, 200(%r9)
  17584. adcq $0x00, %rax
  17585. movq 120(%rsi), %rdx
  17586. movq %rax, 208(%r9)
  17587. adcq $0x00, %rdx
  17588. movq 128(%rsi), %rax
  17589. movq %rdx, 216(%r9)
  17590. adcq $0x00, %rax
  17591. movq 136(%rsi), %rdx
  17592. movq %rax, 224(%r9)
  17593. adcq $0x00, %rdx
  17594. movq 144(%rsi), %rax
  17595. movq %rdx, 232(%r9)
  17596. adcq $0x00, %rax
  17597. movq 152(%rsi), %rdx
  17598. movq %rax, 240(%r9)
  17599. adcq $0x00, %rdx
  17600. movq 160(%rsi), %rax
  17601. movq %rdx, 248(%r9)
  17602. adcq $0x00, %rax
  17603. movq 168(%rsi), %rdx
  17604. movq %rax, 256(%r9)
  17605. adcq $0x00, %rdx
  17606. movq 176(%rsi), %rax
  17607. movq %rdx, 264(%r9)
  17608. adcq $0x00, %rax
  17609. movq 184(%rsi), %rdx
  17610. movq %rax, 272(%r9)
  17611. adcq $0x00, %rdx
  17612. movq %rdx, 280(%r9)
  17613. addq $0x1f8, %rsp
  17614. repz retq
  17615. #ifndef __APPLE__
  17616. .size sp_3072_sqr_24,.-sp_3072_sqr_24
  17617. #endif /* __APPLE__ */
  17618. /* Multiply a and b into r. (r = a * b)
  17619. *
  17620. * r A single precision integer.
  17621. * a A single precision integer.
  17622. * b A single precision integer.
  17623. */
  17624. #ifndef __APPLE__
  17625. .text
  17626. .globl sp_3072_mul_avx2_24
  17627. .type sp_3072_mul_avx2_24,@function
  17628. .align 16
  17629. sp_3072_mul_avx2_24:
  17630. #else
  17631. .section __TEXT,__text
  17632. .globl _sp_3072_mul_avx2_24
  17633. .p2align 4
  17634. _sp_3072_mul_avx2_24:
  17635. #endif /* __APPLE__ */
  17636. pushq %r12
  17637. pushq %r13
  17638. pushq %r14
  17639. pushq %r15
  17640. subq $0x268, %rsp
  17641. movq %rdi, 576(%rsp)
  17642. movq %rsi, 584(%rsp)
  17643. movq %rdx, 592(%rsp)
  17644. leaq 384(%rsp), %r10
  17645. leaq 96(%rsi), %r12
  17646. # Add
  17647. movq (%rsi), %rax
  17648. xorq %r13, %r13
  17649. addq (%r12), %rax
  17650. movq 8(%rsi), %rcx
  17651. movq %rax, (%r10)
  17652. adcq 8(%r12), %rcx
  17653. movq 16(%rsi), %r8
  17654. movq %rcx, 8(%r10)
  17655. adcq 16(%r12), %r8
  17656. movq 24(%rsi), %rax
  17657. movq %r8, 16(%r10)
  17658. adcq 24(%r12), %rax
  17659. movq 32(%rsi), %rcx
  17660. movq %rax, 24(%r10)
  17661. adcq 32(%r12), %rcx
  17662. movq 40(%rsi), %r8
  17663. movq %rcx, 32(%r10)
  17664. adcq 40(%r12), %r8
  17665. movq 48(%rsi), %rax
  17666. movq %r8, 40(%r10)
  17667. adcq 48(%r12), %rax
  17668. movq 56(%rsi), %rcx
  17669. movq %rax, 48(%r10)
  17670. adcq 56(%r12), %rcx
  17671. movq 64(%rsi), %r8
  17672. movq %rcx, 56(%r10)
  17673. adcq 64(%r12), %r8
  17674. movq 72(%rsi), %rax
  17675. movq %r8, 64(%r10)
  17676. adcq 72(%r12), %rax
  17677. movq 80(%rsi), %rcx
  17678. movq %rax, 72(%r10)
  17679. adcq 80(%r12), %rcx
  17680. movq 88(%rsi), %r8
  17681. movq %rcx, 80(%r10)
  17682. adcq 88(%r12), %r8
  17683. movq %r8, 88(%r10)
  17684. adcq $0x00, %r13
  17685. movq %r13, 600(%rsp)
  17686. leaq 480(%rsp), %r11
  17687. leaq 96(%rdx), %r12
  17688. # Add
  17689. movq (%rdx), %rax
  17690. xorq %r14, %r14
  17691. addq (%r12), %rax
  17692. movq 8(%rdx), %rcx
  17693. movq %rax, (%r11)
  17694. adcq 8(%r12), %rcx
  17695. movq 16(%rdx), %r8
  17696. movq %rcx, 8(%r11)
  17697. adcq 16(%r12), %r8
  17698. movq 24(%rdx), %rax
  17699. movq %r8, 16(%r11)
  17700. adcq 24(%r12), %rax
  17701. movq 32(%rdx), %rcx
  17702. movq %rax, 24(%r11)
  17703. adcq 32(%r12), %rcx
  17704. movq 40(%rdx), %r8
  17705. movq %rcx, 32(%r11)
  17706. adcq 40(%r12), %r8
  17707. movq 48(%rdx), %rax
  17708. movq %r8, 40(%r11)
  17709. adcq 48(%r12), %rax
  17710. movq 56(%rdx), %rcx
  17711. movq %rax, 48(%r11)
  17712. adcq 56(%r12), %rcx
  17713. movq 64(%rdx), %r8
  17714. movq %rcx, 56(%r11)
  17715. adcq 64(%r12), %r8
  17716. movq 72(%rdx), %rax
  17717. movq %r8, 64(%r11)
  17718. adcq 72(%r12), %rax
  17719. movq 80(%rdx), %rcx
  17720. movq %rax, 72(%r11)
  17721. adcq 80(%r12), %rcx
  17722. movq 88(%rdx), %r8
  17723. movq %rcx, 80(%r11)
  17724. adcq 88(%r12), %r8
  17725. movq %r8, 88(%r11)
  17726. adcq $0x00, %r14
  17727. movq %r14, 608(%rsp)
  17728. movq %r11, %rdx
  17729. movq %r10, %rsi
  17730. movq %rsp, %rdi
  17731. #ifndef __APPLE__
  17732. callq sp_3072_mul_avx2_12@plt
  17733. #else
  17734. callq _sp_3072_mul_avx2_12
  17735. #endif /* __APPLE__ */
  17736. movq 592(%rsp), %rdx
  17737. movq 584(%rsp), %rsi
  17738. leaq 192(%rsp), %rdi
  17739. addq $0x60, %rdx
  17740. addq $0x60, %rsi
  17741. #ifndef __APPLE__
  17742. callq sp_3072_mul_avx2_12@plt
  17743. #else
  17744. callq _sp_3072_mul_avx2_12
  17745. #endif /* __APPLE__ */
  17746. movq 592(%rsp), %rdx
  17747. movq 584(%rsp), %rsi
  17748. movq 576(%rsp), %rdi
  17749. #ifndef __APPLE__
  17750. callq sp_3072_mul_avx2_12@plt
  17751. #else
  17752. callq _sp_3072_mul_avx2_12
  17753. #endif /* __APPLE__ */
  17754. movq 600(%rsp), %r13
  17755. movq 608(%rsp), %r14
  17756. movq 576(%rsp), %r15
  17757. movq %r13, %r9
  17758. leaq 384(%rsp), %r10
  17759. leaq 480(%rsp), %r11
  17760. andq %r14, %r9
  17761. negq %r13
  17762. negq %r14
  17763. addq $0xc0, %r15
  17764. movq (%r10), %rax
  17765. movq (%r11), %rcx
  17766. pextq %r14, %rax, %rax
  17767. pextq %r13, %rcx, %rcx
  17768. addq %rcx, %rax
  17769. movq 8(%r10), %rcx
  17770. movq 8(%r11), %r8
  17771. pextq %r14, %rcx, %rcx
  17772. pextq %r13, %r8, %r8
  17773. movq %rax, (%r15)
  17774. adcq %r8, %rcx
  17775. movq 16(%r10), %r8
  17776. movq 16(%r11), %rax
  17777. pextq %r14, %r8, %r8
  17778. pextq %r13, %rax, %rax
  17779. movq %rcx, 8(%r15)
  17780. adcq %rax, %r8
  17781. movq 24(%r10), %rax
  17782. movq 24(%r11), %rcx
  17783. pextq %r14, %rax, %rax
  17784. pextq %r13, %rcx, %rcx
  17785. movq %r8, 16(%r15)
  17786. adcq %rcx, %rax
  17787. movq 32(%r10), %rcx
  17788. movq 32(%r11), %r8
  17789. pextq %r14, %rcx, %rcx
  17790. pextq %r13, %r8, %r8
  17791. movq %rax, 24(%r15)
  17792. adcq %r8, %rcx
  17793. movq 40(%r10), %r8
  17794. movq 40(%r11), %rax
  17795. pextq %r14, %r8, %r8
  17796. pextq %r13, %rax, %rax
  17797. movq %rcx, 32(%r15)
  17798. adcq %rax, %r8
  17799. movq 48(%r10), %rax
  17800. movq 48(%r11), %rcx
  17801. pextq %r14, %rax, %rax
  17802. pextq %r13, %rcx, %rcx
  17803. movq %r8, 40(%r15)
  17804. adcq %rcx, %rax
  17805. movq 56(%r10), %rcx
  17806. movq 56(%r11), %r8
  17807. pextq %r14, %rcx, %rcx
  17808. pextq %r13, %r8, %r8
  17809. movq %rax, 48(%r15)
  17810. adcq %r8, %rcx
  17811. movq 64(%r10), %r8
  17812. movq 64(%r11), %rax
  17813. pextq %r14, %r8, %r8
  17814. pextq %r13, %rax, %rax
  17815. movq %rcx, 56(%r15)
  17816. adcq %rax, %r8
  17817. movq 72(%r10), %rax
  17818. movq 72(%r11), %rcx
  17819. pextq %r14, %rax, %rax
  17820. pextq %r13, %rcx, %rcx
  17821. movq %r8, 64(%r15)
  17822. adcq %rcx, %rax
  17823. movq 80(%r10), %rcx
  17824. movq 80(%r11), %r8
  17825. pextq %r14, %rcx, %rcx
  17826. pextq %r13, %r8, %r8
  17827. movq %rax, 72(%r15)
  17828. adcq %r8, %rcx
  17829. movq 88(%r10), %r8
  17830. movq 88(%r11), %rax
  17831. pextq %r14, %r8, %r8
  17832. pextq %r13, %rax, %rax
  17833. movq %rcx, 80(%r15)
  17834. adcq %rax, %r8
  17835. movq %r8, 88(%r15)
  17836. adcq $0x00, %r9
  17837. leaq 192(%rsp), %r11
  17838. movq %rsp, %r10
  17839. movq (%r10), %rax
  17840. subq (%r11), %rax
  17841. movq 8(%r10), %rcx
  17842. movq %rax, (%r10)
  17843. sbbq 8(%r11), %rcx
  17844. movq 16(%r10), %r8
  17845. movq %rcx, 8(%r10)
  17846. sbbq 16(%r11), %r8
  17847. movq 24(%r10), %rax
  17848. movq %r8, 16(%r10)
  17849. sbbq 24(%r11), %rax
  17850. movq 32(%r10), %rcx
  17851. movq %rax, 24(%r10)
  17852. sbbq 32(%r11), %rcx
  17853. movq 40(%r10), %r8
  17854. movq %rcx, 32(%r10)
  17855. sbbq 40(%r11), %r8
  17856. movq 48(%r10), %rax
  17857. movq %r8, 40(%r10)
  17858. sbbq 48(%r11), %rax
  17859. movq 56(%r10), %rcx
  17860. movq %rax, 48(%r10)
  17861. sbbq 56(%r11), %rcx
  17862. movq 64(%r10), %r8
  17863. movq %rcx, 56(%r10)
  17864. sbbq 64(%r11), %r8
  17865. movq 72(%r10), %rax
  17866. movq %r8, 64(%r10)
  17867. sbbq 72(%r11), %rax
  17868. movq 80(%r10), %rcx
  17869. movq %rax, 72(%r10)
  17870. sbbq 80(%r11), %rcx
  17871. movq 88(%r10), %r8
  17872. movq %rcx, 80(%r10)
  17873. sbbq 88(%r11), %r8
  17874. movq 96(%r10), %rax
  17875. movq %r8, 88(%r10)
  17876. sbbq 96(%r11), %rax
  17877. movq 104(%r10), %rcx
  17878. movq %rax, 96(%r10)
  17879. sbbq 104(%r11), %rcx
  17880. movq 112(%r10), %r8
  17881. movq %rcx, 104(%r10)
  17882. sbbq 112(%r11), %r8
  17883. movq 120(%r10), %rax
  17884. movq %r8, 112(%r10)
  17885. sbbq 120(%r11), %rax
  17886. movq 128(%r10), %rcx
  17887. movq %rax, 120(%r10)
  17888. sbbq 128(%r11), %rcx
  17889. movq 136(%r10), %r8
  17890. movq %rcx, 128(%r10)
  17891. sbbq 136(%r11), %r8
  17892. movq 144(%r10), %rax
  17893. movq %r8, 136(%r10)
  17894. sbbq 144(%r11), %rax
  17895. movq 152(%r10), %rcx
  17896. movq %rax, 144(%r10)
  17897. sbbq 152(%r11), %rcx
  17898. movq 160(%r10), %r8
  17899. movq %rcx, 152(%r10)
  17900. sbbq 160(%r11), %r8
  17901. movq 168(%r10), %rax
  17902. movq %r8, 160(%r10)
  17903. sbbq 168(%r11), %rax
  17904. movq 176(%r10), %rcx
  17905. movq %rax, 168(%r10)
  17906. sbbq 176(%r11), %rcx
  17907. movq 184(%r10), %r8
  17908. movq %rcx, 176(%r10)
  17909. sbbq 184(%r11), %r8
  17910. movq %r8, 184(%r10)
  17911. sbbq $0x00, %r9
  17912. movq (%r10), %rax
  17913. subq (%rdi), %rax
  17914. movq 8(%r10), %rcx
  17915. movq %rax, (%r10)
  17916. sbbq 8(%rdi), %rcx
  17917. movq 16(%r10), %r8
  17918. movq %rcx, 8(%r10)
  17919. sbbq 16(%rdi), %r8
  17920. movq 24(%r10), %rax
  17921. movq %r8, 16(%r10)
  17922. sbbq 24(%rdi), %rax
  17923. movq 32(%r10), %rcx
  17924. movq %rax, 24(%r10)
  17925. sbbq 32(%rdi), %rcx
  17926. movq 40(%r10), %r8
  17927. movq %rcx, 32(%r10)
  17928. sbbq 40(%rdi), %r8
  17929. movq 48(%r10), %rax
  17930. movq %r8, 40(%r10)
  17931. sbbq 48(%rdi), %rax
  17932. movq 56(%r10), %rcx
  17933. movq %rax, 48(%r10)
  17934. sbbq 56(%rdi), %rcx
  17935. movq 64(%r10), %r8
  17936. movq %rcx, 56(%r10)
  17937. sbbq 64(%rdi), %r8
  17938. movq 72(%r10), %rax
  17939. movq %r8, 64(%r10)
  17940. sbbq 72(%rdi), %rax
  17941. movq 80(%r10), %rcx
  17942. movq %rax, 72(%r10)
  17943. sbbq 80(%rdi), %rcx
  17944. movq 88(%r10), %r8
  17945. movq %rcx, 80(%r10)
  17946. sbbq 88(%rdi), %r8
  17947. movq 96(%r10), %rax
  17948. movq %r8, 88(%r10)
  17949. sbbq 96(%rdi), %rax
  17950. movq 104(%r10), %rcx
  17951. movq %rax, 96(%r10)
  17952. sbbq 104(%rdi), %rcx
  17953. movq 112(%r10), %r8
  17954. movq %rcx, 104(%r10)
  17955. sbbq 112(%rdi), %r8
  17956. movq 120(%r10), %rax
  17957. movq %r8, 112(%r10)
  17958. sbbq 120(%rdi), %rax
  17959. movq 128(%r10), %rcx
  17960. movq %rax, 120(%r10)
  17961. sbbq 128(%rdi), %rcx
  17962. movq 136(%r10), %r8
  17963. movq %rcx, 128(%r10)
  17964. sbbq 136(%rdi), %r8
  17965. movq 144(%r10), %rax
  17966. movq %r8, 136(%r10)
  17967. sbbq 144(%rdi), %rax
  17968. movq 152(%r10), %rcx
  17969. movq %rax, 144(%r10)
  17970. sbbq 152(%rdi), %rcx
  17971. movq 160(%r10), %r8
  17972. movq %rcx, 152(%r10)
  17973. sbbq 160(%rdi), %r8
  17974. movq 168(%r10), %rax
  17975. movq %r8, 160(%r10)
  17976. sbbq 168(%rdi), %rax
  17977. movq 176(%r10), %rcx
  17978. movq %rax, 168(%r10)
  17979. sbbq 176(%rdi), %rcx
  17980. movq 184(%r10), %r8
  17981. movq %rcx, 176(%r10)
  17982. sbbq 184(%rdi), %r8
  17983. movq %r8, 184(%r10)
  17984. sbbq $0x00, %r9
  17985. subq $0x60, %r15
  17986. # Add
  17987. movq (%r15), %rax
  17988. addq (%r10), %rax
  17989. movq 8(%r15), %rcx
  17990. movq %rax, (%r15)
  17991. adcq 8(%r10), %rcx
  17992. movq 16(%r15), %r8
  17993. movq %rcx, 8(%r15)
  17994. adcq 16(%r10), %r8
  17995. movq 24(%r15), %rax
  17996. movq %r8, 16(%r15)
  17997. adcq 24(%r10), %rax
  17998. movq 32(%r15), %rcx
  17999. movq %rax, 24(%r15)
  18000. adcq 32(%r10), %rcx
  18001. movq 40(%r15), %r8
  18002. movq %rcx, 32(%r15)
  18003. adcq 40(%r10), %r8
  18004. movq 48(%r15), %rax
  18005. movq %r8, 40(%r15)
  18006. adcq 48(%r10), %rax
  18007. movq 56(%r15), %rcx
  18008. movq %rax, 48(%r15)
  18009. adcq 56(%r10), %rcx
  18010. movq 64(%r15), %r8
  18011. movq %rcx, 56(%r15)
  18012. adcq 64(%r10), %r8
  18013. movq 72(%r15), %rax
  18014. movq %r8, 64(%r15)
  18015. adcq 72(%r10), %rax
  18016. movq 80(%r15), %rcx
  18017. movq %rax, 72(%r15)
  18018. adcq 80(%r10), %rcx
  18019. movq 88(%r15), %r8
  18020. movq %rcx, 80(%r15)
  18021. adcq 88(%r10), %r8
  18022. movq 96(%r15), %rax
  18023. movq %r8, 88(%r15)
  18024. adcq 96(%r10), %rax
  18025. movq 104(%r15), %rcx
  18026. movq %rax, 96(%r15)
  18027. adcq 104(%r10), %rcx
  18028. movq 112(%r15), %r8
  18029. movq %rcx, 104(%r15)
  18030. adcq 112(%r10), %r8
  18031. movq 120(%r15), %rax
  18032. movq %r8, 112(%r15)
  18033. adcq 120(%r10), %rax
  18034. movq 128(%r15), %rcx
  18035. movq %rax, 120(%r15)
  18036. adcq 128(%r10), %rcx
  18037. movq 136(%r15), %r8
  18038. movq %rcx, 128(%r15)
  18039. adcq 136(%r10), %r8
  18040. movq 144(%r15), %rax
  18041. movq %r8, 136(%r15)
  18042. adcq 144(%r10), %rax
  18043. movq 152(%r15), %rcx
  18044. movq %rax, 144(%r15)
  18045. adcq 152(%r10), %rcx
  18046. movq 160(%r15), %r8
  18047. movq %rcx, 152(%r15)
  18048. adcq 160(%r10), %r8
  18049. movq 168(%r15), %rax
  18050. movq %r8, 160(%r15)
  18051. adcq 168(%r10), %rax
  18052. movq 176(%r15), %rcx
  18053. movq %rax, 168(%r15)
  18054. adcq 176(%r10), %rcx
  18055. movq 184(%r15), %r8
  18056. movq %rcx, 176(%r15)
  18057. adcq 184(%r10), %r8
  18058. movq %r8, 184(%r15)
  18059. adcq $0x00, %r9
  18060. movq %r9, 288(%rdi)
  18061. addq $0x60, %r15
  18062. # Add
  18063. movq (%r15), %rax
  18064. addq (%r11), %rax
  18065. movq 8(%r15), %rcx
  18066. movq %rax, (%r15)
  18067. adcq 8(%r11), %rcx
  18068. movq 16(%r15), %r8
  18069. movq %rcx, 8(%r15)
  18070. adcq 16(%r11), %r8
  18071. movq 24(%r15), %rax
  18072. movq %r8, 16(%r15)
  18073. adcq 24(%r11), %rax
  18074. movq 32(%r15), %rcx
  18075. movq %rax, 24(%r15)
  18076. adcq 32(%r11), %rcx
  18077. movq 40(%r15), %r8
  18078. movq %rcx, 32(%r15)
  18079. adcq 40(%r11), %r8
  18080. movq 48(%r15), %rax
  18081. movq %r8, 40(%r15)
  18082. adcq 48(%r11), %rax
  18083. movq 56(%r15), %rcx
  18084. movq %rax, 48(%r15)
  18085. adcq 56(%r11), %rcx
  18086. movq 64(%r15), %r8
  18087. movq %rcx, 56(%r15)
  18088. adcq 64(%r11), %r8
  18089. movq 72(%r15), %rax
  18090. movq %r8, 64(%r15)
  18091. adcq 72(%r11), %rax
  18092. movq 80(%r15), %rcx
  18093. movq %rax, 72(%r15)
  18094. adcq 80(%r11), %rcx
  18095. movq 88(%r15), %r8
  18096. movq %rcx, 80(%r15)
  18097. adcq 88(%r11), %r8
  18098. movq 96(%r15), %rax
  18099. movq %r8, 88(%r15)
  18100. adcq 96(%r11), %rax
  18101. movq %rax, 96(%r15)
  18102. # Add to zero
  18103. movq 104(%r11), %rax
  18104. adcq $0x00, %rax
  18105. movq 112(%r11), %rcx
  18106. movq %rax, 104(%r15)
  18107. adcq $0x00, %rcx
  18108. movq 120(%r11), %r8
  18109. movq %rcx, 112(%r15)
  18110. adcq $0x00, %r8
  18111. movq 128(%r11), %rax
  18112. movq %r8, 120(%r15)
  18113. adcq $0x00, %rax
  18114. movq 136(%r11), %rcx
  18115. movq %rax, 128(%r15)
  18116. adcq $0x00, %rcx
  18117. movq 144(%r11), %r8
  18118. movq %rcx, 136(%r15)
  18119. adcq $0x00, %r8
  18120. movq 152(%r11), %rax
  18121. movq %r8, 144(%r15)
  18122. adcq $0x00, %rax
  18123. movq 160(%r11), %rcx
  18124. movq %rax, 152(%r15)
  18125. adcq $0x00, %rcx
  18126. movq 168(%r11), %r8
  18127. movq %rcx, 160(%r15)
  18128. adcq $0x00, %r8
  18129. movq 176(%r11), %rax
  18130. movq %r8, 168(%r15)
  18131. adcq $0x00, %rax
  18132. movq 184(%r11), %rcx
  18133. movq %rax, 176(%r15)
  18134. adcq $0x00, %rcx
  18135. movq %rcx, 184(%r15)
  18136. addq $0x268, %rsp
  18137. popq %r15
  18138. popq %r14
  18139. popq %r13
  18140. popq %r12
  18141. repz retq
  18142. #ifndef __APPLE__
  18143. .size sp_3072_mul_avx2_24,.-sp_3072_mul_avx2_24
  18144. #endif /* __APPLE__ */
  18145. /* Square a and put result in r. (r = a * a)
  18146. *
  18147. * r A single precision integer.
  18148. * a A single precision integer.
  18149. */
  18150. #ifndef __APPLE__
  18151. .text
  18152. .globl sp_3072_sqr_avx2_24
  18153. .type sp_3072_sqr_avx2_24,@function
  18154. .align 16
  18155. sp_3072_sqr_avx2_24:
  18156. #else
  18157. .section __TEXT,__text
  18158. .globl _sp_3072_sqr_avx2_24
  18159. .p2align 4
  18160. _sp_3072_sqr_avx2_24:
  18161. #endif /* __APPLE__ */
  18162. subq $0x1f8, %rsp
  18163. movq %rdi, 480(%rsp)
  18164. movq %rsi, 488(%rsp)
  18165. leaq 384(%rsp), %r8
  18166. leaq 96(%rsi), %r9
  18167. # Add
  18168. movq (%rsi), %rdx
  18169. xorq %rcx, %rcx
  18170. addq (%r9), %rdx
  18171. movq 8(%rsi), %rax
  18172. movq %rdx, (%r8)
  18173. adcq 8(%r9), %rax
  18174. movq 16(%rsi), %rdx
  18175. movq %rax, 8(%r8)
  18176. adcq 16(%r9), %rdx
  18177. movq 24(%rsi), %rax
  18178. movq %rdx, 16(%r8)
  18179. adcq 24(%r9), %rax
  18180. movq 32(%rsi), %rdx
  18181. movq %rax, 24(%r8)
  18182. adcq 32(%r9), %rdx
  18183. movq 40(%rsi), %rax
  18184. movq %rdx, 32(%r8)
  18185. adcq 40(%r9), %rax
  18186. movq 48(%rsi), %rdx
  18187. movq %rax, 40(%r8)
  18188. adcq 48(%r9), %rdx
  18189. movq 56(%rsi), %rax
  18190. movq %rdx, 48(%r8)
  18191. adcq 56(%r9), %rax
  18192. movq 64(%rsi), %rdx
  18193. movq %rax, 56(%r8)
  18194. adcq 64(%r9), %rdx
  18195. movq 72(%rsi), %rax
  18196. movq %rdx, 64(%r8)
  18197. adcq 72(%r9), %rax
  18198. movq 80(%rsi), %rdx
  18199. movq %rax, 72(%r8)
  18200. adcq 80(%r9), %rdx
  18201. movq 88(%rsi), %rax
  18202. movq %rdx, 80(%r8)
  18203. adcq 88(%r9), %rax
  18204. movq %rax, 88(%r8)
  18205. adcq $0x00, %rcx
  18206. movq %rcx, 496(%rsp)
  18207. movq %r8, %rsi
  18208. movq %rsp, %rdi
  18209. #ifndef __APPLE__
  18210. callq sp_3072_sqr_avx2_12@plt
  18211. #else
  18212. callq _sp_3072_sqr_avx2_12
  18213. #endif /* __APPLE__ */
  18214. movq 488(%rsp), %rsi
  18215. leaq 192(%rsp), %rdi
  18216. addq $0x60, %rsi
  18217. #ifndef __APPLE__
  18218. callq sp_3072_sqr_avx2_12@plt
  18219. #else
  18220. callq _sp_3072_sqr_avx2_12
  18221. #endif /* __APPLE__ */
  18222. movq 488(%rsp), %rsi
  18223. movq 480(%rsp), %rdi
  18224. #ifndef __APPLE__
  18225. callq sp_3072_sqr_avx2_12@plt
  18226. #else
  18227. callq _sp_3072_sqr_avx2_12
  18228. #endif /* __APPLE__ */
  18229. movq 496(%rsp), %r10
  18230. movq %rdi, %r9
  18231. leaq 384(%rsp), %r8
  18232. movq %r10, %rcx
  18233. negq %r10
  18234. addq $0xc0, %r9
  18235. movq (%r8), %rdx
  18236. pextq %r10, %rdx, %rdx
  18237. addq %rdx, %rdx
  18238. movq 8(%r8), %rax
  18239. movq %rdx, (%r9)
  18240. pextq %r10, %rax, %rax
  18241. adcq %rax, %rax
  18242. movq 16(%r8), %rdx
  18243. movq %rax, 8(%r9)
  18244. pextq %r10, %rdx, %rdx
  18245. adcq %rdx, %rdx
  18246. movq 24(%r8), %rax
  18247. movq %rdx, 16(%r9)
  18248. pextq %r10, %rax, %rax
  18249. adcq %rax, %rax
  18250. movq 32(%r8), %rdx
  18251. movq %rax, 24(%r9)
  18252. pextq %r10, %rdx, %rdx
  18253. adcq %rdx, %rdx
  18254. movq 40(%r8), %rax
  18255. movq %rdx, 32(%r9)
  18256. pextq %r10, %rax, %rax
  18257. adcq %rax, %rax
  18258. movq 48(%r8), %rdx
  18259. movq %rax, 40(%r9)
  18260. pextq %r10, %rdx, %rdx
  18261. adcq %rdx, %rdx
  18262. movq 56(%r8), %rax
  18263. movq %rdx, 48(%r9)
  18264. pextq %r10, %rax, %rax
  18265. adcq %rax, %rax
  18266. movq 64(%r8), %rdx
  18267. movq %rax, 56(%r9)
  18268. pextq %r10, %rdx, %rdx
  18269. adcq %rdx, %rdx
  18270. movq 72(%r8), %rax
  18271. movq %rdx, 64(%r9)
  18272. pextq %r10, %rax, %rax
  18273. adcq %rax, %rax
  18274. movq 80(%r8), %rdx
  18275. movq %rax, 72(%r9)
  18276. pextq %r10, %rdx, %rdx
  18277. adcq %rdx, %rdx
  18278. movq 88(%r8), %rax
  18279. movq %rdx, 80(%r9)
  18280. pextq %r10, %rax, %rax
  18281. adcq %rax, %rax
  18282. movq %rax, 88(%r9)
  18283. adcq $0x00, %rcx
  18284. leaq 192(%rsp), %rsi
  18285. movq %rsp, %r8
  18286. movq (%r8), %rdx
  18287. subq (%rsi), %rdx
  18288. movq 8(%r8), %rax
  18289. movq %rdx, (%r8)
  18290. sbbq 8(%rsi), %rax
  18291. movq 16(%r8), %rdx
  18292. movq %rax, 8(%r8)
  18293. sbbq 16(%rsi), %rdx
  18294. movq 24(%r8), %rax
  18295. movq %rdx, 16(%r8)
  18296. sbbq 24(%rsi), %rax
  18297. movq 32(%r8), %rdx
  18298. movq %rax, 24(%r8)
  18299. sbbq 32(%rsi), %rdx
  18300. movq 40(%r8), %rax
  18301. movq %rdx, 32(%r8)
  18302. sbbq 40(%rsi), %rax
  18303. movq 48(%r8), %rdx
  18304. movq %rax, 40(%r8)
  18305. sbbq 48(%rsi), %rdx
  18306. movq 56(%r8), %rax
  18307. movq %rdx, 48(%r8)
  18308. sbbq 56(%rsi), %rax
  18309. movq 64(%r8), %rdx
  18310. movq %rax, 56(%r8)
  18311. sbbq 64(%rsi), %rdx
  18312. movq 72(%r8), %rax
  18313. movq %rdx, 64(%r8)
  18314. sbbq 72(%rsi), %rax
  18315. movq 80(%r8), %rdx
  18316. movq %rax, 72(%r8)
  18317. sbbq 80(%rsi), %rdx
  18318. movq 88(%r8), %rax
  18319. movq %rdx, 80(%r8)
  18320. sbbq 88(%rsi), %rax
  18321. movq 96(%r8), %rdx
  18322. movq %rax, 88(%r8)
  18323. sbbq 96(%rsi), %rdx
  18324. movq 104(%r8), %rax
  18325. movq %rdx, 96(%r8)
  18326. sbbq 104(%rsi), %rax
  18327. movq 112(%r8), %rdx
  18328. movq %rax, 104(%r8)
  18329. sbbq 112(%rsi), %rdx
  18330. movq 120(%r8), %rax
  18331. movq %rdx, 112(%r8)
  18332. sbbq 120(%rsi), %rax
  18333. movq 128(%r8), %rdx
  18334. movq %rax, 120(%r8)
  18335. sbbq 128(%rsi), %rdx
  18336. movq 136(%r8), %rax
  18337. movq %rdx, 128(%r8)
  18338. sbbq 136(%rsi), %rax
  18339. movq 144(%r8), %rdx
  18340. movq %rax, 136(%r8)
  18341. sbbq 144(%rsi), %rdx
  18342. movq 152(%r8), %rax
  18343. movq %rdx, 144(%r8)
  18344. sbbq 152(%rsi), %rax
  18345. movq 160(%r8), %rdx
  18346. movq %rax, 152(%r8)
  18347. sbbq 160(%rsi), %rdx
  18348. movq 168(%r8), %rax
  18349. movq %rdx, 160(%r8)
  18350. sbbq 168(%rsi), %rax
  18351. movq 176(%r8), %rdx
  18352. movq %rax, 168(%r8)
  18353. sbbq 176(%rsi), %rdx
  18354. movq 184(%r8), %rax
  18355. movq %rdx, 176(%r8)
  18356. sbbq 184(%rsi), %rax
  18357. movq %rax, 184(%r8)
  18358. sbbq $0x00, %rcx
  18359. movq (%r8), %rdx
  18360. subq (%rdi), %rdx
  18361. movq 8(%r8), %rax
  18362. movq %rdx, (%r8)
  18363. sbbq 8(%rdi), %rax
  18364. movq 16(%r8), %rdx
  18365. movq %rax, 8(%r8)
  18366. sbbq 16(%rdi), %rdx
  18367. movq 24(%r8), %rax
  18368. movq %rdx, 16(%r8)
  18369. sbbq 24(%rdi), %rax
  18370. movq 32(%r8), %rdx
  18371. movq %rax, 24(%r8)
  18372. sbbq 32(%rdi), %rdx
  18373. movq 40(%r8), %rax
  18374. movq %rdx, 32(%r8)
  18375. sbbq 40(%rdi), %rax
  18376. movq 48(%r8), %rdx
  18377. movq %rax, 40(%r8)
  18378. sbbq 48(%rdi), %rdx
  18379. movq 56(%r8), %rax
  18380. movq %rdx, 48(%r8)
  18381. sbbq 56(%rdi), %rax
  18382. movq 64(%r8), %rdx
  18383. movq %rax, 56(%r8)
  18384. sbbq 64(%rdi), %rdx
  18385. movq 72(%r8), %rax
  18386. movq %rdx, 64(%r8)
  18387. sbbq 72(%rdi), %rax
  18388. movq 80(%r8), %rdx
  18389. movq %rax, 72(%r8)
  18390. sbbq 80(%rdi), %rdx
  18391. movq 88(%r8), %rax
  18392. movq %rdx, 80(%r8)
  18393. sbbq 88(%rdi), %rax
  18394. movq 96(%r8), %rdx
  18395. movq %rax, 88(%r8)
  18396. sbbq 96(%rdi), %rdx
  18397. movq 104(%r8), %rax
  18398. movq %rdx, 96(%r8)
  18399. sbbq 104(%rdi), %rax
  18400. movq 112(%r8), %rdx
  18401. movq %rax, 104(%r8)
  18402. sbbq 112(%rdi), %rdx
  18403. movq 120(%r8), %rax
  18404. movq %rdx, 112(%r8)
  18405. sbbq 120(%rdi), %rax
  18406. movq 128(%r8), %rdx
  18407. movq %rax, 120(%r8)
  18408. sbbq 128(%rdi), %rdx
  18409. movq 136(%r8), %rax
  18410. movq %rdx, 128(%r8)
  18411. sbbq 136(%rdi), %rax
  18412. movq 144(%r8), %rdx
  18413. movq %rax, 136(%r8)
  18414. sbbq 144(%rdi), %rdx
  18415. movq 152(%r8), %rax
  18416. movq %rdx, 144(%r8)
  18417. sbbq 152(%rdi), %rax
  18418. movq 160(%r8), %rdx
  18419. movq %rax, 152(%r8)
  18420. sbbq 160(%rdi), %rdx
  18421. movq 168(%r8), %rax
  18422. movq %rdx, 160(%r8)
  18423. sbbq 168(%rdi), %rax
  18424. movq 176(%r8), %rdx
  18425. movq %rax, 168(%r8)
  18426. sbbq 176(%rdi), %rdx
  18427. movq 184(%r8), %rax
  18428. movq %rdx, 176(%r8)
  18429. sbbq 184(%rdi), %rax
  18430. movq %rax, 184(%r8)
  18431. sbbq $0x00, %rcx
  18432. subq $0x60, %r9
  18433. # Add in place
  18434. movq (%r9), %rdx
  18435. addq (%r8), %rdx
  18436. movq 8(%r9), %rax
  18437. movq %rdx, (%r9)
  18438. adcq 8(%r8), %rax
  18439. movq 16(%r9), %rdx
  18440. movq %rax, 8(%r9)
  18441. adcq 16(%r8), %rdx
  18442. movq 24(%r9), %rax
  18443. movq %rdx, 16(%r9)
  18444. adcq 24(%r8), %rax
  18445. movq 32(%r9), %rdx
  18446. movq %rax, 24(%r9)
  18447. adcq 32(%r8), %rdx
  18448. movq 40(%r9), %rax
  18449. movq %rdx, 32(%r9)
  18450. adcq 40(%r8), %rax
  18451. movq 48(%r9), %rdx
  18452. movq %rax, 40(%r9)
  18453. adcq 48(%r8), %rdx
  18454. movq 56(%r9), %rax
  18455. movq %rdx, 48(%r9)
  18456. adcq 56(%r8), %rax
  18457. movq 64(%r9), %rdx
  18458. movq %rax, 56(%r9)
  18459. adcq 64(%r8), %rdx
  18460. movq 72(%r9), %rax
  18461. movq %rdx, 64(%r9)
  18462. adcq 72(%r8), %rax
  18463. movq 80(%r9), %rdx
  18464. movq %rax, 72(%r9)
  18465. adcq 80(%r8), %rdx
  18466. movq 88(%r9), %rax
  18467. movq %rdx, 80(%r9)
  18468. adcq 88(%r8), %rax
  18469. movq 96(%r9), %rdx
  18470. movq %rax, 88(%r9)
  18471. adcq 96(%r8), %rdx
  18472. movq 104(%r9), %rax
  18473. movq %rdx, 96(%r9)
  18474. adcq 104(%r8), %rax
  18475. movq 112(%r9), %rdx
  18476. movq %rax, 104(%r9)
  18477. adcq 112(%r8), %rdx
  18478. movq 120(%r9), %rax
  18479. movq %rdx, 112(%r9)
  18480. adcq 120(%r8), %rax
  18481. movq 128(%r9), %rdx
  18482. movq %rax, 120(%r9)
  18483. adcq 128(%r8), %rdx
  18484. movq 136(%r9), %rax
  18485. movq %rdx, 128(%r9)
  18486. adcq 136(%r8), %rax
  18487. movq 144(%r9), %rdx
  18488. movq %rax, 136(%r9)
  18489. adcq 144(%r8), %rdx
  18490. movq 152(%r9), %rax
  18491. movq %rdx, 144(%r9)
  18492. adcq 152(%r8), %rax
  18493. movq 160(%r9), %rdx
  18494. movq %rax, 152(%r9)
  18495. adcq 160(%r8), %rdx
  18496. movq 168(%r9), %rax
  18497. movq %rdx, 160(%r9)
  18498. adcq 168(%r8), %rax
  18499. movq 176(%r9), %rdx
  18500. movq %rax, 168(%r9)
  18501. adcq 176(%r8), %rdx
  18502. movq 184(%r9), %rax
  18503. movq %rdx, 176(%r9)
  18504. adcq 184(%r8), %rax
  18505. movq %rax, 184(%r9)
  18506. adcq $0x00, %rcx
  18507. movq %rcx, 288(%rdi)
  18508. # Add in place
  18509. movq 96(%r9), %rdx
  18510. addq (%rsi), %rdx
  18511. movq 104(%r9), %rax
  18512. movq %rdx, 96(%r9)
  18513. adcq 8(%rsi), %rax
  18514. movq 112(%r9), %rdx
  18515. movq %rax, 104(%r9)
  18516. adcq 16(%rsi), %rdx
  18517. movq 120(%r9), %rax
  18518. movq %rdx, 112(%r9)
  18519. adcq 24(%rsi), %rax
  18520. movq 128(%r9), %rdx
  18521. movq %rax, 120(%r9)
  18522. adcq 32(%rsi), %rdx
  18523. movq 136(%r9), %rax
  18524. movq %rdx, 128(%r9)
  18525. adcq 40(%rsi), %rax
  18526. movq 144(%r9), %rdx
  18527. movq %rax, 136(%r9)
  18528. adcq 48(%rsi), %rdx
  18529. movq 152(%r9), %rax
  18530. movq %rdx, 144(%r9)
  18531. adcq 56(%rsi), %rax
  18532. movq 160(%r9), %rdx
  18533. movq %rax, 152(%r9)
  18534. adcq 64(%rsi), %rdx
  18535. movq 168(%r9), %rax
  18536. movq %rdx, 160(%r9)
  18537. adcq 72(%rsi), %rax
  18538. movq 176(%r9), %rdx
  18539. movq %rax, 168(%r9)
  18540. adcq 80(%rsi), %rdx
  18541. movq 184(%r9), %rax
  18542. movq %rdx, 176(%r9)
  18543. adcq 88(%rsi), %rax
  18544. movq 192(%r9), %rdx
  18545. movq %rax, 184(%r9)
  18546. adcq 96(%rsi), %rdx
  18547. movq %rdx, 192(%r9)
  18548. # Add to zero
  18549. movq 104(%rsi), %rdx
  18550. adcq $0x00, %rdx
  18551. movq 112(%rsi), %rax
  18552. movq %rdx, 200(%r9)
  18553. adcq $0x00, %rax
  18554. movq 120(%rsi), %rdx
  18555. movq %rax, 208(%r9)
  18556. adcq $0x00, %rdx
  18557. movq 128(%rsi), %rax
  18558. movq %rdx, 216(%r9)
  18559. adcq $0x00, %rax
  18560. movq 136(%rsi), %rdx
  18561. movq %rax, 224(%r9)
  18562. adcq $0x00, %rdx
  18563. movq 144(%rsi), %rax
  18564. movq %rdx, 232(%r9)
  18565. adcq $0x00, %rax
  18566. movq 152(%rsi), %rdx
  18567. movq %rax, 240(%r9)
  18568. adcq $0x00, %rdx
  18569. movq 160(%rsi), %rax
  18570. movq %rdx, 248(%r9)
  18571. adcq $0x00, %rax
  18572. movq 168(%rsi), %rdx
  18573. movq %rax, 256(%r9)
  18574. adcq $0x00, %rdx
  18575. movq 176(%rsi), %rax
  18576. movq %rdx, 264(%r9)
  18577. adcq $0x00, %rax
  18578. movq 184(%rsi), %rdx
  18579. movq %rax, 272(%r9)
  18580. adcq $0x00, %rdx
  18581. movq %rdx, 280(%r9)
  18582. addq $0x1f8, %rsp
  18583. repz retq
  18584. #ifndef __APPLE__
  18585. .size sp_3072_sqr_avx2_24,.-sp_3072_sqr_avx2_24
  18586. #endif /* __APPLE__ */
  18587. /* Sub b from a into a. (a -= b)
  18588. *
  18589. * a A single precision integer and result.
  18590. * b A single precision integer.
  18591. */
  18592. #ifndef __APPLE__
  18593. .text
  18594. .globl sp_3072_sub_in_place_48
  18595. .type sp_3072_sub_in_place_48,@function
  18596. .align 16
  18597. sp_3072_sub_in_place_48:
  18598. #else
  18599. .section __TEXT,__text
  18600. .globl _sp_3072_sub_in_place_48
  18601. .p2align 4
  18602. _sp_3072_sub_in_place_48:
  18603. #endif /* __APPLE__ */
  18604. movq (%rdi), %rdx
  18605. xorq %rax, %rax
  18606. subq (%rsi), %rdx
  18607. movq 8(%rdi), %rcx
  18608. movq %rdx, (%rdi)
  18609. sbbq 8(%rsi), %rcx
  18610. movq 16(%rdi), %rdx
  18611. movq %rcx, 8(%rdi)
  18612. sbbq 16(%rsi), %rdx
  18613. movq 24(%rdi), %rcx
  18614. movq %rdx, 16(%rdi)
  18615. sbbq 24(%rsi), %rcx
  18616. movq 32(%rdi), %rdx
  18617. movq %rcx, 24(%rdi)
  18618. sbbq 32(%rsi), %rdx
  18619. movq 40(%rdi), %rcx
  18620. movq %rdx, 32(%rdi)
  18621. sbbq 40(%rsi), %rcx
  18622. movq 48(%rdi), %rdx
  18623. movq %rcx, 40(%rdi)
  18624. sbbq 48(%rsi), %rdx
  18625. movq 56(%rdi), %rcx
  18626. movq %rdx, 48(%rdi)
  18627. sbbq 56(%rsi), %rcx
  18628. movq 64(%rdi), %rdx
  18629. movq %rcx, 56(%rdi)
  18630. sbbq 64(%rsi), %rdx
  18631. movq 72(%rdi), %rcx
  18632. movq %rdx, 64(%rdi)
  18633. sbbq 72(%rsi), %rcx
  18634. movq 80(%rdi), %rdx
  18635. movq %rcx, 72(%rdi)
  18636. sbbq 80(%rsi), %rdx
  18637. movq 88(%rdi), %rcx
  18638. movq %rdx, 80(%rdi)
  18639. sbbq 88(%rsi), %rcx
  18640. movq 96(%rdi), %rdx
  18641. movq %rcx, 88(%rdi)
  18642. sbbq 96(%rsi), %rdx
  18643. movq 104(%rdi), %rcx
  18644. movq %rdx, 96(%rdi)
  18645. sbbq 104(%rsi), %rcx
  18646. movq 112(%rdi), %rdx
  18647. movq %rcx, 104(%rdi)
  18648. sbbq 112(%rsi), %rdx
  18649. movq 120(%rdi), %rcx
  18650. movq %rdx, 112(%rdi)
  18651. sbbq 120(%rsi), %rcx
  18652. movq 128(%rdi), %rdx
  18653. movq %rcx, 120(%rdi)
  18654. sbbq 128(%rsi), %rdx
  18655. movq 136(%rdi), %rcx
  18656. movq %rdx, 128(%rdi)
  18657. sbbq 136(%rsi), %rcx
  18658. movq 144(%rdi), %rdx
  18659. movq %rcx, 136(%rdi)
  18660. sbbq 144(%rsi), %rdx
  18661. movq 152(%rdi), %rcx
  18662. movq %rdx, 144(%rdi)
  18663. sbbq 152(%rsi), %rcx
  18664. movq 160(%rdi), %rdx
  18665. movq %rcx, 152(%rdi)
  18666. sbbq 160(%rsi), %rdx
  18667. movq 168(%rdi), %rcx
  18668. movq %rdx, 160(%rdi)
  18669. sbbq 168(%rsi), %rcx
  18670. movq 176(%rdi), %rdx
  18671. movq %rcx, 168(%rdi)
  18672. sbbq 176(%rsi), %rdx
  18673. movq 184(%rdi), %rcx
  18674. movq %rdx, 176(%rdi)
  18675. sbbq 184(%rsi), %rcx
  18676. movq 192(%rdi), %rdx
  18677. movq %rcx, 184(%rdi)
  18678. sbbq 192(%rsi), %rdx
  18679. movq 200(%rdi), %rcx
  18680. movq %rdx, 192(%rdi)
  18681. sbbq 200(%rsi), %rcx
  18682. movq 208(%rdi), %rdx
  18683. movq %rcx, 200(%rdi)
  18684. sbbq 208(%rsi), %rdx
  18685. movq 216(%rdi), %rcx
  18686. movq %rdx, 208(%rdi)
  18687. sbbq 216(%rsi), %rcx
  18688. movq 224(%rdi), %rdx
  18689. movq %rcx, 216(%rdi)
  18690. sbbq 224(%rsi), %rdx
  18691. movq 232(%rdi), %rcx
  18692. movq %rdx, 224(%rdi)
  18693. sbbq 232(%rsi), %rcx
  18694. movq 240(%rdi), %rdx
  18695. movq %rcx, 232(%rdi)
  18696. sbbq 240(%rsi), %rdx
  18697. movq 248(%rdi), %rcx
  18698. movq %rdx, 240(%rdi)
  18699. sbbq 248(%rsi), %rcx
  18700. movq 256(%rdi), %rdx
  18701. movq %rcx, 248(%rdi)
  18702. sbbq 256(%rsi), %rdx
  18703. movq 264(%rdi), %rcx
  18704. movq %rdx, 256(%rdi)
  18705. sbbq 264(%rsi), %rcx
  18706. movq 272(%rdi), %rdx
  18707. movq %rcx, 264(%rdi)
  18708. sbbq 272(%rsi), %rdx
  18709. movq 280(%rdi), %rcx
  18710. movq %rdx, 272(%rdi)
  18711. sbbq 280(%rsi), %rcx
  18712. movq 288(%rdi), %rdx
  18713. movq %rcx, 280(%rdi)
  18714. sbbq 288(%rsi), %rdx
  18715. movq 296(%rdi), %rcx
  18716. movq %rdx, 288(%rdi)
  18717. sbbq 296(%rsi), %rcx
  18718. movq 304(%rdi), %rdx
  18719. movq %rcx, 296(%rdi)
  18720. sbbq 304(%rsi), %rdx
  18721. movq 312(%rdi), %rcx
  18722. movq %rdx, 304(%rdi)
  18723. sbbq 312(%rsi), %rcx
  18724. movq 320(%rdi), %rdx
  18725. movq %rcx, 312(%rdi)
  18726. sbbq 320(%rsi), %rdx
  18727. movq 328(%rdi), %rcx
  18728. movq %rdx, 320(%rdi)
  18729. sbbq 328(%rsi), %rcx
  18730. movq 336(%rdi), %rdx
  18731. movq %rcx, 328(%rdi)
  18732. sbbq 336(%rsi), %rdx
  18733. movq 344(%rdi), %rcx
  18734. movq %rdx, 336(%rdi)
  18735. sbbq 344(%rsi), %rcx
  18736. movq 352(%rdi), %rdx
  18737. movq %rcx, 344(%rdi)
  18738. sbbq 352(%rsi), %rdx
  18739. movq 360(%rdi), %rcx
  18740. movq %rdx, 352(%rdi)
  18741. sbbq 360(%rsi), %rcx
  18742. movq 368(%rdi), %rdx
  18743. movq %rcx, 360(%rdi)
  18744. sbbq 368(%rsi), %rdx
  18745. movq 376(%rdi), %rcx
  18746. movq %rdx, 368(%rdi)
  18747. sbbq 376(%rsi), %rcx
  18748. movq %rcx, 376(%rdi)
  18749. sbbq $0x00, %rax
  18750. repz retq
  18751. #ifndef __APPLE__
  18752. .size sp_3072_sub_in_place_48,.-sp_3072_sub_in_place_48
  18753. #endif /* __APPLE__ */
  18754. /* Add b to a into r. (r = a + b)
  18755. *
  18756. * r A single precision integer.
  18757. * a A single precision integer.
  18758. * b A single precision integer.
  18759. */
  18760. #ifndef __APPLE__
  18761. .text
  18762. .globl sp_3072_add_48
  18763. .type sp_3072_add_48,@function
  18764. .align 16
  18765. sp_3072_add_48:
  18766. #else
  18767. .section __TEXT,__text
  18768. .globl _sp_3072_add_48
  18769. .p2align 4
  18770. _sp_3072_add_48:
  18771. #endif /* __APPLE__ */
  18772. # Add
  18773. movq (%rsi), %rcx
  18774. xorq %rax, %rax
  18775. addq (%rdx), %rcx
  18776. movq 8(%rsi), %r8
  18777. movq %rcx, (%rdi)
  18778. adcq 8(%rdx), %r8
  18779. movq 16(%rsi), %rcx
  18780. movq %r8, 8(%rdi)
  18781. adcq 16(%rdx), %rcx
  18782. movq 24(%rsi), %r8
  18783. movq %rcx, 16(%rdi)
  18784. adcq 24(%rdx), %r8
  18785. movq 32(%rsi), %rcx
  18786. movq %r8, 24(%rdi)
  18787. adcq 32(%rdx), %rcx
  18788. movq 40(%rsi), %r8
  18789. movq %rcx, 32(%rdi)
  18790. adcq 40(%rdx), %r8
  18791. movq 48(%rsi), %rcx
  18792. movq %r8, 40(%rdi)
  18793. adcq 48(%rdx), %rcx
  18794. movq 56(%rsi), %r8
  18795. movq %rcx, 48(%rdi)
  18796. adcq 56(%rdx), %r8
  18797. movq 64(%rsi), %rcx
  18798. movq %r8, 56(%rdi)
  18799. adcq 64(%rdx), %rcx
  18800. movq 72(%rsi), %r8
  18801. movq %rcx, 64(%rdi)
  18802. adcq 72(%rdx), %r8
  18803. movq 80(%rsi), %rcx
  18804. movq %r8, 72(%rdi)
  18805. adcq 80(%rdx), %rcx
  18806. movq 88(%rsi), %r8
  18807. movq %rcx, 80(%rdi)
  18808. adcq 88(%rdx), %r8
  18809. movq 96(%rsi), %rcx
  18810. movq %r8, 88(%rdi)
  18811. adcq 96(%rdx), %rcx
  18812. movq 104(%rsi), %r8
  18813. movq %rcx, 96(%rdi)
  18814. adcq 104(%rdx), %r8
  18815. movq 112(%rsi), %rcx
  18816. movq %r8, 104(%rdi)
  18817. adcq 112(%rdx), %rcx
  18818. movq 120(%rsi), %r8
  18819. movq %rcx, 112(%rdi)
  18820. adcq 120(%rdx), %r8
  18821. movq 128(%rsi), %rcx
  18822. movq %r8, 120(%rdi)
  18823. adcq 128(%rdx), %rcx
  18824. movq 136(%rsi), %r8
  18825. movq %rcx, 128(%rdi)
  18826. adcq 136(%rdx), %r8
  18827. movq 144(%rsi), %rcx
  18828. movq %r8, 136(%rdi)
  18829. adcq 144(%rdx), %rcx
  18830. movq 152(%rsi), %r8
  18831. movq %rcx, 144(%rdi)
  18832. adcq 152(%rdx), %r8
  18833. movq 160(%rsi), %rcx
  18834. movq %r8, 152(%rdi)
  18835. adcq 160(%rdx), %rcx
  18836. movq 168(%rsi), %r8
  18837. movq %rcx, 160(%rdi)
  18838. adcq 168(%rdx), %r8
  18839. movq 176(%rsi), %rcx
  18840. movq %r8, 168(%rdi)
  18841. adcq 176(%rdx), %rcx
  18842. movq 184(%rsi), %r8
  18843. movq %rcx, 176(%rdi)
  18844. adcq 184(%rdx), %r8
  18845. movq 192(%rsi), %rcx
  18846. movq %r8, 184(%rdi)
  18847. adcq 192(%rdx), %rcx
  18848. movq 200(%rsi), %r8
  18849. movq %rcx, 192(%rdi)
  18850. adcq 200(%rdx), %r8
  18851. movq 208(%rsi), %rcx
  18852. movq %r8, 200(%rdi)
  18853. adcq 208(%rdx), %rcx
  18854. movq 216(%rsi), %r8
  18855. movq %rcx, 208(%rdi)
  18856. adcq 216(%rdx), %r8
  18857. movq 224(%rsi), %rcx
  18858. movq %r8, 216(%rdi)
  18859. adcq 224(%rdx), %rcx
  18860. movq 232(%rsi), %r8
  18861. movq %rcx, 224(%rdi)
  18862. adcq 232(%rdx), %r8
  18863. movq 240(%rsi), %rcx
  18864. movq %r8, 232(%rdi)
  18865. adcq 240(%rdx), %rcx
  18866. movq 248(%rsi), %r8
  18867. movq %rcx, 240(%rdi)
  18868. adcq 248(%rdx), %r8
  18869. movq 256(%rsi), %rcx
  18870. movq %r8, 248(%rdi)
  18871. adcq 256(%rdx), %rcx
  18872. movq 264(%rsi), %r8
  18873. movq %rcx, 256(%rdi)
  18874. adcq 264(%rdx), %r8
  18875. movq 272(%rsi), %rcx
  18876. movq %r8, 264(%rdi)
  18877. adcq 272(%rdx), %rcx
  18878. movq 280(%rsi), %r8
  18879. movq %rcx, 272(%rdi)
  18880. adcq 280(%rdx), %r8
  18881. movq 288(%rsi), %rcx
  18882. movq %r8, 280(%rdi)
  18883. adcq 288(%rdx), %rcx
  18884. movq 296(%rsi), %r8
  18885. movq %rcx, 288(%rdi)
  18886. adcq 296(%rdx), %r8
  18887. movq 304(%rsi), %rcx
  18888. movq %r8, 296(%rdi)
  18889. adcq 304(%rdx), %rcx
  18890. movq 312(%rsi), %r8
  18891. movq %rcx, 304(%rdi)
  18892. adcq 312(%rdx), %r8
  18893. movq 320(%rsi), %rcx
  18894. movq %r8, 312(%rdi)
  18895. adcq 320(%rdx), %rcx
  18896. movq 328(%rsi), %r8
  18897. movq %rcx, 320(%rdi)
  18898. adcq 328(%rdx), %r8
  18899. movq 336(%rsi), %rcx
  18900. movq %r8, 328(%rdi)
  18901. adcq 336(%rdx), %rcx
  18902. movq 344(%rsi), %r8
  18903. movq %rcx, 336(%rdi)
  18904. adcq 344(%rdx), %r8
  18905. movq 352(%rsi), %rcx
  18906. movq %r8, 344(%rdi)
  18907. adcq 352(%rdx), %rcx
  18908. movq 360(%rsi), %r8
  18909. movq %rcx, 352(%rdi)
  18910. adcq 360(%rdx), %r8
  18911. movq 368(%rsi), %rcx
  18912. movq %r8, 360(%rdi)
  18913. adcq 368(%rdx), %rcx
  18914. movq 376(%rsi), %r8
  18915. movq %rcx, 368(%rdi)
  18916. adcq 376(%rdx), %r8
  18917. movq %r8, 376(%rdi)
  18918. adcq $0x00, %rax
  18919. repz retq
  18920. #ifndef __APPLE__
  18921. .size sp_3072_add_48,.-sp_3072_add_48
  18922. #endif /* __APPLE__ */
  18923. /* Multiply a and b into r. (r = a * b)
  18924. *
  18925. * r A single precision integer.
  18926. * a A single precision integer.
  18927. * b A single precision integer.
  18928. */
  18929. #ifndef __APPLE__
  18930. .text
  18931. .globl sp_3072_mul_48
  18932. .type sp_3072_mul_48,@function
  18933. .align 16
  18934. sp_3072_mul_48:
  18935. #else
  18936. .section __TEXT,__text
  18937. .globl _sp_3072_mul_48
  18938. .p2align 4
  18939. _sp_3072_mul_48:
  18940. #endif /* __APPLE__ */
  18941. pushq %r12
  18942. pushq %r13
  18943. pushq %r14
  18944. pushq %r15
  18945. subq $0x4a8, %rsp
  18946. movq %rdi, 1152(%rsp)
  18947. movq %rsi, 1160(%rsp)
  18948. movq %rdx, 1168(%rsp)
  18949. leaq 768(%rsp), %r10
  18950. leaq 192(%rsi), %r12
  18951. # Add
  18952. movq (%rsi), %rax
  18953. xorq %r13, %r13
  18954. addq (%r12), %rax
  18955. movq 8(%rsi), %rcx
  18956. movq %rax, (%r10)
  18957. adcq 8(%r12), %rcx
  18958. movq 16(%rsi), %r8
  18959. movq %rcx, 8(%r10)
  18960. adcq 16(%r12), %r8
  18961. movq 24(%rsi), %rax
  18962. movq %r8, 16(%r10)
  18963. adcq 24(%r12), %rax
  18964. movq 32(%rsi), %rcx
  18965. movq %rax, 24(%r10)
  18966. adcq 32(%r12), %rcx
  18967. movq 40(%rsi), %r8
  18968. movq %rcx, 32(%r10)
  18969. adcq 40(%r12), %r8
  18970. movq 48(%rsi), %rax
  18971. movq %r8, 40(%r10)
  18972. adcq 48(%r12), %rax
  18973. movq 56(%rsi), %rcx
  18974. movq %rax, 48(%r10)
  18975. adcq 56(%r12), %rcx
  18976. movq 64(%rsi), %r8
  18977. movq %rcx, 56(%r10)
  18978. adcq 64(%r12), %r8
  18979. movq 72(%rsi), %rax
  18980. movq %r8, 64(%r10)
  18981. adcq 72(%r12), %rax
  18982. movq 80(%rsi), %rcx
  18983. movq %rax, 72(%r10)
  18984. adcq 80(%r12), %rcx
  18985. movq 88(%rsi), %r8
  18986. movq %rcx, 80(%r10)
  18987. adcq 88(%r12), %r8
  18988. movq 96(%rsi), %rax
  18989. movq %r8, 88(%r10)
  18990. adcq 96(%r12), %rax
  18991. movq 104(%rsi), %rcx
  18992. movq %rax, 96(%r10)
  18993. adcq 104(%r12), %rcx
  18994. movq 112(%rsi), %r8
  18995. movq %rcx, 104(%r10)
  18996. adcq 112(%r12), %r8
  18997. movq 120(%rsi), %rax
  18998. movq %r8, 112(%r10)
  18999. adcq 120(%r12), %rax
  19000. movq 128(%rsi), %rcx
  19001. movq %rax, 120(%r10)
  19002. adcq 128(%r12), %rcx
  19003. movq 136(%rsi), %r8
  19004. movq %rcx, 128(%r10)
  19005. adcq 136(%r12), %r8
  19006. movq 144(%rsi), %rax
  19007. movq %r8, 136(%r10)
  19008. adcq 144(%r12), %rax
  19009. movq 152(%rsi), %rcx
  19010. movq %rax, 144(%r10)
  19011. adcq 152(%r12), %rcx
  19012. movq 160(%rsi), %r8
  19013. movq %rcx, 152(%r10)
  19014. adcq 160(%r12), %r8
  19015. movq 168(%rsi), %rax
  19016. movq %r8, 160(%r10)
  19017. adcq 168(%r12), %rax
  19018. movq 176(%rsi), %rcx
  19019. movq %rax, 168(%r10)
  19020. adcq 176(%r12), %rcx
  19021. movq 184(%rsi), %r8
  19022. movq %rcx, 176(%r10)
  19023. adcq 184(%r12), %r8
  19024. movq %r8, 184(%r10)
  19025. adcq $0x00, %r13
  19026. movq %r13, 1176(%rsp)
  19027. leaq 960(%rsp), %r11
  19028. leaq 192(%rdx), %r12
  19029. # Add
  19030. movq (%rdx), %rax
  19031. xorq %r14, %r14
  19032. addq (%r12), %rax
  19033. movq 8(%rdx), %rcx
  19034. movq %rax, (%r11)
  19035. adcq 8(%r12), %rcx
  19036. movq 16(%rdx), %r8
  19037. movq %rcx, 8(%r11)
  19038. adcq 16(%r12), %r8
  19039. movq 24(%rdx), %rax
  19040. movq %r8, 16(%r11)
  19041. adcq 24(%r12), %rax
  19042. movq 32(%rdx), %rcx
  19043. movq %rax, 24(%r11)
  19044. adcq 32(%r12), %rcx
  19045. movq 40(%rdx), %r8
  19046. movq %rcx, 32(%r11)
  19047. adcq 40(%r12), %r8
  19048. movq 48(%rdx), %rax
  19049. movq %r8, 40(%r11)
  19050. adcq 48(%r12), %rax
  19051. movq 56(%rdx), %rcx
  19052. movq %rax, 48(%r11)
  19053. adcq 56(%r12), %rcx
  19054. movq 64(%rdx), %r8
  19055. movq %rcx, 56(%r11)
  19056. adcq 64(%r12), %r8
  19057. movq 72(%rdx), %rax
  19058. movq %r8, 64(%r11)
  19059. adcq 72(%r12), %rax
  19060. movq 80(%rdx), %rcx
  19061. movq %rax, 72(%r11)
  19062. adcq 80(%r12), %rcx
  19063. movq 88(%rdx), %r8
  19064. movq %rcx, 80(%r11)
  19065. adcq 88(%r12), %r8
  19066. movq 96(%rdx), %rax
  19067. movq %r8, 88(%r11)
  19068. adcq 96(%r12), %rax
  19069. movq 104(%rdx), %rcx
  19070. movq %rax, 96(%r11)
  19071. adcq 104(%r12), %rcx
  19072. movq 112(%rdx), %r8
  19073. movq %rcx, 104(%r11)
  19074. adcq 112(%r12), %r8
  19075. movq 120(%rdx), %rax
  19076. movq %r8, 112(%r11)
  19077. adcq 120(%r12), %rax
  19078. movq 128(%rdx), %rcx
  19079. movq %rax, 120(%r11)
  19080. adcq 128(%r12), %rcx
  19081. movq 136(%rdx), %r8
  19082. movq %rcx, 128(%r11)
  19083. adcq 136(%r12), %r8
  19084. movq 144(%rdx), %rax
  19085. movq %r8, 136(%r11)
  19086. adcq 144(%r12), %rax
  19087. movq 152(%rdx), %rcx
  19088. movq %rax, 144(%r11)
  19089. adcq 152(%r12), %rcx
  19090. movq 160(%rdx), %r8
  19091. movq %rcx, 152(%r11)
  19092. adcq 160(%r12), %r8
  19093. movq 168(%rdx), %rax
  19094. movq %r8, 160(%r11)
  19095. adcq 168(%r12), %rax
  19096. movq 176(%rdx), %rcx
  19097. movq %rax, 168(%r11)
  19098. adcq 176(%r12), %rcx
  19099. movq 184(%rdx), %r8
  19100. movq %rcx, 176(%r11)
  19101. adcq 184(%r12), %r8
  19102. movq %r8, 184(%r11)
  19103. adcq $0x00, %r14
  19104. movq %r14, 1184(%rsp)
  19105. movq %r11, %rdx
  19106. movq %r10, %rsi
  19107. movq %rsp, %rdi
  19108. #ifndef __APPLE__
  19109. callq sp_3072_mul_24@plt
  19110. #else
  19111. callq _sp_3072_mul_24
  19112. #endif /* __APPLE__ */
  19113. movq 1168(%rsp), %rdx
  19114. movq 1160(%rsp), %rsi
  19115. leaq 384(%rsp), %rdi
  19116. addq $0xc0, %rdx
  19117. addq $0xc0, %rsi
  19118. #ifndef __APPLE__
  19119. callq sp_3072_mul_24@plt
  19120. #else
  19121. callq _sp_3072_mul_24
  19122. #endif /* __APPLE__ */
  19123. movq 1168(%rsp), %rdx
  19124. movq 1160(%rsp), %rsi
  19125. movq 1152(%rsp), %rdi
  19126. #ifndef __APPLE__
  19127. callq sp_3072_mul_24@plt
  19128. #else
  19129. callq _sp_3072_mul_24
  19130. #endif /* __APPLE__ */
  19131. movq 1176(%rsp), %r13
  19132. movq 1184(%rsp), %r14
  19133. movq 1152(%rsp), %r15
  19134. movq %r13, %r9
  19135. leaq 768(%rsp), %r10
  19136. leaq 960(%rsp), %r11
  19137. andq %r14, %r9
  19138. negq %r13
  19139. negq %r14
  19140. addq $0x180, %r15
  19141. movq (%r10), %rax
  19142. movq (%r11), %rcx
  19143. andq %r14, %rax
  19144. andq %r13, %rcx
  19145. movq %rax, (%r10)
  19146. movq %rcx, (%r11)
  19147. movq 8(%r10), %rax
  19148. movq 8(%r11), %rcx
  19149. andq %r14, %rax
  19150. andq %r13, %rcx
  19151. movq %rax, 8(%r10)
  19152. movq %rcx, 8(%r11)
  19153. movq 16(%r10), %rax
  19154. movq 16(%r11), %rcx
  19155. andq %r14, %rax
  19156. andq %r13, %rcx
  19157. movq %rax, 16(%r10)
  19158. movq %rcx, 16(%r11)
  19159. movq 24(%r10), %rax
  19160. movq 24(%r11), %rcx
  19161. andq %r14, %rax
  19162. andq %r13, %rcx
  19163. movq %rax, 24(%r10)
  19164. movq %rcx, 24(%r11)
  19165. movq 32(%r10), %rax
  19166. movq 32(%r11), %rcx
  19167. andq %r14, %rax
  19168. andq %r13, %rcx
  19169. movq %rax, 32(%r10)
  19170. movq %rcx, 32(%r11)
  19171. movq 40(%r10), %rax
  19172. movq 40(%r11), %rcx
  19173. andq %r14, %rax
  19174. andq %r13, %rcx
  19175. movq %rax, 40(%r10)
  19176. movq %rcx, 40(%r11)
  19177. movq 48(%r10), %rax
  19178. movq 48(%r11), %rcx
  19179. andq %r14, %rax
  19180. andq %r13, %rcx
  19181. movq %rax, 48(%r10)
  19182. movq %rcx, 48(%r11)
  19183. movq 56(%r10), %rax
  19184. movq 56(%r11), %rcx
  19185. andq %r14, %rax
  19186. andq %r13, %rcx
  19187. movq %rax, 56(%r10)
  19188. movq %rcx, 56(%r11)
  19189. movq 64(%r10), %rax
  19190. movq 64(%r11), %rcx
  19191. andq %r14, %rax
  19192. andq %r13, %rcx
  19193. movq %rax, 64(%r10)
  19194. movq %rcx, 64(%r11)
  19195. movq 72(%r10), %rax
  19196. movq 72(%r11), %rcx
  19197. andq %r14, %rax
  19198. andq %r13, %rcx
  19199. movq %rax, 72(%r10)
  19200. movq %rcx, 72(%r11)
  19201. movq 80(%r10), %rax
  19202. movq 80(%r11), %rcx
  19203. andq %r14, %rax
  19204. andq %r13, %rcx
  19205. movq %rax, 80(%r10)
  19206. movq %rcx, 80(%r11)
  19207. movq 88(%r10), %rax
  19208. movq 88(%r11), %rcx
  19209. andq %r14, %rax
  19210. andq %r13, %rcx
  19211. movq %rax, 88(%r10)
  19212. movq %rcx, 88(%r11)
  19213. movq 96(%r10), %rax
  19214. movq 96(%r11), %rcx
  19215. andq %r14, %rax
  19216. andq %r13, %rcx
  19217. movq %rax, 96(%r10)
  19218. movq %rcx, 96(%r11)
  19219. movq 104(%r10), %rax
  19220. movq 104(%r11), %rcx
  19221. andq %r14, %rax
  19222. andq %r13, %rcx
  19223. movq %rax, 104(%r10)
  19224. movq %rcx, 104(%r11)
  19225. movq 112(%r10), %rax
  19226. movq 112(%r11), %rcx
  19227. andq %r14, %rax
  19228. andq %r13, %rcx
  19229. movq %rax, 112(%r10)
  19230. movq %rcx, 112(%r11)
  19231. movq 120(%r10), %rax
  19232. movq 120(%r11), %rcx
  19233. andq %r14, %rax
  19234. andq %r13, %rcx
  19235. movq %rax, 120(%r10)
  19236. movq %rcx, 120(%r11)
  19237. movq 128(%r10), %rax
  19238. movq 128(%r11), %rcx
  19239. andq %r14, %rax
  19240. andq %r13, %rcx
  19241. movq %rax, 128(%r10)
  19242. movq %rcx, 128(%r11)
  19243. movq 136(%r10), %rax
  19244. movq 136(%r11), %rcx
  19245. andq %r14, %rax
  19246. andq %r13, %rcx
  19247. movq %rax, 136(%r10)
  19248. movq %rcx, 136(%r11)
  19249. movq 144(%r10), %rax
  19250. movq 144(%r11), %rcx
  19251. andq %r14, %rax
  19252. andq %r13, %rcx
  19253. movq %rax, 144(%r10)
  19254. movq %rcx, 144(%r11)
  19255. movq 152(%r10), %rax
  19256. movq 152(%r11), %rcx
  19257. andq %r14, %rax
  19258. andq %r13, %rcx
  19259. movq %rax, 152(%r10)
  19260. movq %rcx, 152(%r11)
  19261. movq 160(%r10), %rax
  19262. movq 160(%r11), %rcx
  19263. andq %r14, %rax
  19264. andq %r13, %rcx
  19265. movq %rax, 160(%r10)
  19266. movq %rcx, 160(%r11)
  19267. movq 168(%r10), %rax
  19268. movq 168(%r11), %rcx
  19269. andq %r14, %rax
  19270. andq %r13, %rcx
  19271. movq %rax, 168(%r10)
  19272. movq %rcx, 168(%r11)
  19273. movq 176(%r10), %rax
  19274. movq 176(%r11), %rcx
  19275. andq %r14, %rax
  19276. andq %r13, %rcx
  19277. movq %rax, 176(%r10)
  19278. movq %rcx, 176(%r11)
  19279. movq 184(%r10), %rax
  19280. movq 184(%r11), %rcx
  19281. andq %r14, %rax
  19282. andq %r13, %rcx
  19283. movq %rax, 184(%r10)
  19284. movq %rcx, 184(%r11)
  19285. movq (%r10), %rax
  19286. addq (%r11), %rax
  19287. movq 8(%r10), %rcx
  19288. movq %rax, (%r15)
  19289. adcq 8(%r11), %rcx
  19290. movq 16(%r10), %r8
  19291. movq %rcx, 8(%r15)
  19292. adcq 16(%r11), %r8
  19293. movq 24(%r10), %rax
  19294. movq %r8, 16(%r15)
  19295. adcq 24(%r11), %rax
  19296. movq 32(%r10), %rcx
  19297. movq %rax, 24(%r15)
  19298. adcq 32(%r11), %rcx
  19299. movq 40(%r10), %r8
  19300. movq %rcx, 32(%r15)
  19301. adcq 40(%r11), %r8
  19302. movq 48(%r10), %rax
  19303. movq %r8, 40(%r15)
  19304. adcq 48(%r11), %rax
  19305. movq 56(%r10), %rcx
  19306. movq %rax, 48(%r15)
  19307. adcq 56(%r11), %rcx
  19308. movq 64(%r10), %r8
  19309. movq %rcx, 56(%r15)
  19310. adcq 64(%r11), %r8
  19311. movq 72(%r10), %rax
  19312. movq %r8, 64(%r15)
  19313. adcq 72(%r11), %rax
  19314. movq 80(%r10), %rcx
  19315. movq %rax, 72(%r15)
  19316. adcq 80(%r11), %rcx
  19317. movq 88(%r10), %r8
  19318. movq %rcx, 80(%r15)
  19319. adcq 88(%r11), %r8
  19320. movq 96(%r10), %rax
  19321. movq %r8, 88(%r15)
  19322. adcq 96(%r11), %rax
  19323. movq 104(%r10), %rcx
  19324. movq %rax, 96(%r15)
  19325. adcq 104(%r11), %rcx
  19326. movq 112(%r10), %r8
  19327. movq %rcx, 104(%r15)
  19328. adcq 112(%r11), %r8
  19329. movq 120(%r10), %rax
  19330. movq %r8, 112(%r15)
  19331. adcq 120(%r11), %rax
  19332. movq 128(%r10), %rcx
  19333. movq %rax, 120(%r15)
  19334. adcq 128(%r11), %rcx
  19335. movq 136(%r10), %r8
  19336. movq %rcx, 128(%r15)
  19337. adcq 136(%r11), %r8
  19338. movq 144(%r10), %rax
  19339. movq %r8, 136(%r15)
  19340. adcq 144(%r11), %rax
  19341. movq 152(%r10), %rcx
  19342. movq %rax, 144(%r15)
  19343. adcq 152(%r11), %rcx
  19344. movq 160(%r10), %r8
  19345. movq %rcx, 152(%r15)
  19346. adcq 160(%r11), %r8
  19347. movq 168(%r10), %rax
  19348. movq %r8, 160(%r15)
  19349. adcq 168(%r11), %rax
  19350. movq 176(%r10), %rcx
  19351. movq %rax, 168(%r15)
  19352. adcq 176(%r11), %rcx
  19353. movq 184(%r10), %r8
  19354. movq %rcx, 176(%r15)
  19355. adcq 184(%r11), %r8
  19356. movq %r8, 184(%r15)
  19357. adcq $0x00, %r9
  19358. leaq 384(%rsp), %r11
  19359. movq %rsp, %r10
  19360. movq (%r10), %rax
  19361. subq (%r11), %rax
  19362. movq 8(%r10), %rcx
  19363. movq %rax, (%r10)
  19364. sbbq 8(%r11), %rcx
  19365. movq 16(%r10), %r8
  19366. movq %rcx, 8(%r10)
  19367. sbbq 16(%r11), %r8
  19368. movq 24(%r10), %rax
  19369. movq %r8, 16(%r10)
  19370. sbbq 24(%r11), %rax
  19371. movq 32(%r10), %rcx
  19372. movq %rax, 24(%r10)
  19373. sbbq 32(%r11), %rcx
  19374. movq 40(%r10), %r8
  19375. movq %rcx, 32(%r10)
  19376. sbbq 40(%r11), %r8
  19377. movq 48(%r10), %rax
  19378. movq %r8, 40(%r10)
  19379. sbbq 48(%r11), %rax
  19380. movq 56(%r10), %rcx
  19381. movq %rax, 48(%r10)
  19382. sbbq 56(%r11), %rcx
  19383. movq 64(%r10), %r8
  19384. movq %rcx, 56(%r10)
  19385. sbbq 64(%r11), %r8
  19386. movq 72(%r10), %rax
  19387. movq %r8, 64(%r10)
  19388. sbbq 72(%r11), %rax
  19389. movq 80(%r10), %rcx
  19390. movq %rax, 72(%r10)
  19391. sbbq 80(%r11), %rcx
  19392. movq 88(%r10), %r8
  19393. movq %rcx, 80(%r10)
  19394. sbbq 88(%r11), %r8
  19395. movq 96(%r10), %rax
  19396. movq %r8, 88(%r10)
  19397. sbbq 96(%r11), %rax
  19398. movq 104(%r10), %rcx
  19399. movq %rax, 96(%r10)
  19400. sbbq 104(%r11), %rcx
  19401. movq 112(%r10), %r8
  19402. movq %rcx, 104(%r10)
  19403. sbbq 112(%r11), %r8
  19404. movq 120(%r10), %rax
  19405. movq %r8, 112(%r10)
  19406. sbbq 120(%r11), %rax
  19407. movq 128(%r10), %rcx
  19408. movq %rax, 120(%r10)
  19409. sbbq 128(%r11), %rcx
  19410. movq 136(%r10), %r8
  19411. movq %rcx, 128(%r10)
  19412. sbbq 136(%r11), %r8
  19413. movq 144(%r10), %rax
  19414. movq %r8, 136(%r10)
  19415. sbbq 144(%r11), %rax
  19416. movq 152(%r10), %rcx
  19417. movq %rax, 144(%r10)
  19418. sbbq 152(%r11), %rcx
  19419. movq 160(%r10), %r8
  19420. movq %rcx, 152(%r10)
  19421. sbbq 160(%r11), %r8
  19422. movq 168(%r10), %rax
  19423. movq %r8, 160(%r10)
  19424. sbbq 168(%r11), %rax
  19425. movq 176(%r10), %rcx
  19426. movq %rax, 168(%r10)
  19427. sbbq 176(%r11), %rcx
  19428. movq 184(%r10), %r8
  19429. movq %rcx, 176(%r10)
  19430. sbbq 184(%r11), %r8
  19431. movq 192(%r10), %rax
  19432. movq %r8, 184(%r10)
  19433. sbbq 192(%r11), %rax
  19434. movq 200(%r10), %rcx
  19435. movq %rax, 192(%r10)
  19436. sbbq 200(%r11), %rcx
  19437. movq 208(%r10), %r8
  19438. movq %rcx, 200(%r10)
  19439. sbbq 208(%r11), %r8
  19440. movq 216(%r10), %rax
  19441. movq %r8, 208(%r10)
  19442. sbbq 216(%r11), %rax
  19443. movq 224(%r10), %rcx
  19444. movq %rax, 216(%r10)
  19445. sbbq 224(%r11), %rcx
  19446. movq 232(%r10), %r8
  19447. movq %rcx, 224(%r10)
  19448. sbbq 232(%r11), %r8
  19449. movq 240(%r10), %rax
  19450. movq %r8, 232(%r10)
  19451. sbbq 240(%r11), %rax
  19452. movq 248(%r10), %rcx
  19453. movq %rax, 240(%r10)
  19454. sbbq 248(%r11), %rcx
  19455. movq 256(%r10), %r8
  19456. movq %rcx, 248(%r10)
  19457. sbbq 256(%r11), %r8
  19458. movq 264(%r10), %rax
  19459. movq %r8, 256(%r10)
  19460. sbbq 264(%r11), %rax
  19461. movq 272(%r10), %rcx
  19462. movq %rax, 264(%r10)
  19463. sbbq 272(%r11), %rcx
  19464. movq 280(%r10), %r8
  19465. movq %rcx, 272(%r10)
  19466. sbbq 280(%r11), %r8
  19467. movq 288(%r10), %rax
  19468. movq %r8, 280(%r10)
  19469. sbbq 288(%r11), %rax
  19470. movq 296(%r10), %rcx
  19471. movq %rax, 288(%r10)
  19472. sbbq 296(%r11), %rcx
  19473. movq 304(%r10), %r8
  19474. movq %rcx, 296(%r10)
  19475. sbbq 304(%r11), %r8
  19476. movq 312(%r10), %rax
  19477. movq %r8, 304(%r10)
  19478. sbbq 312(%r11), %rax
  19479. movq 320(%r10), %rcx
  19480. movq %rax, 312(%r10)
  19481. sbbq 320(%r11), %rcx
  19482. movq 328(%r10), %r8
  19483. movq %rcx, 320(%r10)
  19484. sbbq 328(%r11), %r8
  19485. movq 336(%r10), %rax
  19486. movq %r8, 328(%r10)
  19487. sbbq 336(%r11), %rax
  19488. movq 344(%r10), %rcx
  19489. movq %rax, 336(%r10)
  19490. sbbq 344(%r11), %rcx
  19491. movq 352(%r10), %r8
  19492. movq %rcx, 344(%r10)
  19493. sbbq 352(%r11), %r8
  19494. movq 360(%r10), %rax
  19495. movq %r8, 352(%r10)
  19496. sbbq 360(%r11), %rax
  19497. movq 368(%r10), %rcx
  19498. movq %rax, 360(%r10)
  19499. sbbq 368(%r11), %rcx
  19500. movq 376(%r10), %r8
  19501. movq %rcx, 368(%r10)
  19502. sbbq 376(%r11), %r8
  19503. movq %r8, 376(%r10)
  19504. sbbq $0x00, %r9
  19505. movq (%r10), %rax
  19506. subq (%rdi), %rax
  19507. movq 8(%r10), %rcx
  19508. movq %rax, (%r10)
  19509. sbbq 8(%rdi), %rcx
  19510. movq 16(%r10), %r8
  19511. movq %rcx, 8(%r10)
  19512. sbbq 16(%rdi), %r8
  19513. movq 24(%r10), %rax
  19514. movq %r8, 16(%r10)
  19515. sbbq 24(%rdi), %rax
  19516. movq 32(%r10), %rcx
  19517. movq %rax, 24(%r10)
  19518. sbbq 32(%rdi), %rcx
  19519. movq 40(%r10), %r8
  19520. movq %rcx, 32(%r10)
  19521. sbbq 40(%rdi), %r8
  19522. movq 48(%r10), %rax
  19523. movq %r8, 40(%r10)
  19524. sbbq 48(%rdi), %rax
  19525. movq 56(%r10), %rcx
  19526. movq %rax, 48(%r10)
  19527. sbbq 56(%rdi), %rcx
  19528. movq 64(%r10), %r8
  19529. movq %rcx, 56(%r10)
  19530. sbbq 64(%rdi), %r8
  19531. movq 72(%r10), %rax
  19532. movq %r8, 64(%r10)
  19533. sbbq 72(%rdi), %rax
  19534. movq 80(%r10), %rcx
  19535. movq %rax, 72(%r10)
  19536. sbbq 80(%rdi), %rcx
  19537. movq 88(%r10), %r8
  19538. movq %rcx, 80(%r10)
  19539. sbbq 88(%rdi), %r8
  19540. movq 96(%r10), %rax
  19541. movq %r8, 88(%r10)
  19542. sbbq 96(%rdi), %rax
  19543. movq 104(%r10), %rcx
  19544. movq %rax, 96(%r10)
  19545. sbbq 104(%rdi), %rcx
  19546. movq 112(%r10), %r8
  19547. movq %rcx, 104(%r10)
  19548. sbbq 112(%rdi), %r8
  19549. movq 120(%r10), %rax
  19550. movq %r8, 112(%r10)
  19551. sbbq 120(%rdi), %rax
  19552. movq 128(%r10), %rcx
  19553. movq %rax, 120(%r10)
  19554. sbbq 128(%rdi), %rcx
  19555. movq 136(%r10), %r8
  19556. movq %rcx, 128(%r10)
  19557. sbbq 136(%rdi), %r8
  19558. movq 144(%r10), %rax
  19559. movq %r8, 136(%r10)
  19560. sbbq 144(%rdi), %rax
  19561. movq 152(%r10), %rcx
  19562. movq %rax, 144(%r10)
  19563. sbbq 152(%rdi), %rcx
  19564. movq 160(%r10), %r8
  19565. movq %rcx, 152(%r10)
  19566. sbbq 160(%rdi), %r8
  19567. movq 168(%r10), %rax
  19568. movq %r8, 160(%r10)
  19569. sbbq 168(%rdi), %rax
  19570. movq 176(%r10), %rcx
  19571. movq %rax, 168(%r10)
  19572. sbbq 176(%rdi), %rcx
  19573. movq 184(%r10), %r8
  19574. movq %rcx, 176(%r10)
  19575. sbbq 184(%rdi), %r8
  19576. movq 192(%r10), %rax
  19577. movq %r8, 184(%r10)
  19578. sbbq 192(%rdi), %rax
  19579. movq 200(%r10), %rcx
  19580. movq %rax, 192(%r10)
  19581. sbbq 200(%rdi), %rcx
  19582. movq 208(%r10), %r8
  19583. movq %rcx, 200(%r10)
  19584. sbbq 208(%rdi), %r8
  19585. movq 216(%r10), %rax
  19586. movq %r8, 208(%r10)
  19587. sbbq 216(%rdi), %rax
  19588. movq 224(%r10), %rcx
  19589. movq %rax, 216(%r10)
  19590. sbbq 224(%rdi), %rcx
  19591. movq 232(%r10), %r8
  19592. movq %rcx, 224(%r10)
  19593. sbbq 232(%rdi), %r8
  19594. movq 240(%r10), %rax
  19595. movq %r8, 232(%r10)
  19596. sbbq 240(%rdi), %rax
  19597. movq 248(%r10), %rcx
  19598. movq %rax, 240(%r10)
  19599. sbbq 248(%rdi), %rcx
  19600. movq 256(%r10), %r8
  19601. movq %rcx, 248(%r10)
  19602. sbbq 256(%rdi), %r8
  19603. movq 264(%r10), %rax
  19604. movq %r8, 256(%r10)
  19605. sbbq 264(%rdi), %rax
  19606. movq 272(%r10), %rcx
  19607. movq %rax, 264(%r10)
  19608. sbbq 272(%rdi), %rcx
  19609. movq 280(%r10), %r8
  19610. movq %rcx, 272(%r10)
  19611. sbbq 280(%rdi), %r8
  19612. movq 288(%r10), %rax
  19613. movq %r8, 280(%r10)
  19614. sbbq 288(%rdi), %rax
  19615. movq 296(%r10), %rcx
  19616. movq %rax, 288(%r10)
  19617. sbbq 296(%rdi), %rcx
  19618. movq 304(%r10), %r8
  19619. movq %rcx, 296(%r10)
  19620. sbbq 304(%rdi), %r8
  19621. movq 312(%r10), %rax
  19622. movq %r8, 304(%r10)
  19623. sbbq 312(%rdi), %rax
  19624. movq 320(%r10), %rcx
  19625. movq %rax, 312(%r10)
  19626. sbbq 320(%rdi), %rcx
  19627. movq 328(%r10), %r8
  19628. movq %rcx, 320(%r10)
  19629. sbbq 328(%rdi), %r8
  19630. movq 336(%r10), %rax
  19631. movq %r8, 328(%r10)
  19632. sbbq 336(%rdi), %rax
  19633. movq 344(%r10), %rcx
  19634. movq %rax, 336(%r10)
  19635. sbbq 344(%rdi), %rcx
  19636. movq 352(%r10), %r8
  19637. movq %rcx, 344(%r10)
  19638. sbbq 352(%rdi), %r8
  19639. movq 360(%r10), %rax
  19640. movq %r8, 352(%r10)
  19641. sbbq 360(%rdi), %rax
  19642. movq 368(%r10), %rcx
  19643. movq %rax, 360(%r10)
  19644. sbbq 368(%rdi), %rcx
  19645. movq 376(%r10), %r8
  19646. movq %rcx, 368(%r10)
  19647. sbbq 376(%rdi), %r8
  19648. movq %r8, 376(%r10)
  19649. sbbq $0x00, %r9
  19650. subq $0xc0, %r15
  19651. # Add
  19652. movq (%r15), %rax
  19653. addq (%r10), %rax
  19654. movq 8(%r15), %rcx
  19655. movq %rax, (%r15)
  19656. adcq 8(%r10), %rcx
  19657. movq 16(%r15), %r8
  19658. movq %rcx, 8(%r15)
  19659. adcq 16(%r10), %r8
  19660. movq 24(%r15), %rax
  19661. movq %r8, 16(%r15)
  19662. adcq 24(%r10), %rax
  19663. movq 32(%r15), %rcx
  19664. movq %rax, 24(%r15)
  19665. adcq 32(%r10), %rcx
  19666. movq 40(%r15), %r8
  19667. movq %rcx, 32(%r15)
  19668. adcq 40(%r10), %r8
  19669. movq 48(%r15), %rax
  19670. movq %r8, 40(%r15)
  19671. adcq 48(%r10), %rax
  19672. movq 56(%r15), %rcx
  19673. movq %rax, 48(%r15)
  19674. adcq 56(%r10), %rcx
  19675. movq 64(%r15), %r8
  19676. movq %rcx, 56(%r15)
  19677. adcq 64(%r10), %r8
  19678. movq 72(%r15), %rax
  19679. movq %r8, 64(%r15)
  19680. adcq 72(%r10), %rax
  19681. movq 80(%r15), %rcx
  19682. movq %rax, 72(%r15)
  19683. adcq 80(%r10), %rcx
  19684. movq 88(%r15), %r8
  19685. movq %rcx, 80(%r15)
  19686. adcq 88(%r10), %r8
  19687. movq 96(%r15), %rax
  19688. movq %r8, 88(%r15)
  19689. adcq 96(%r10), %rax
  19690. movq 104(%r15), %rcx
  19691. movq %rax, 96(%r15)
  19692. adcq 104(%r10), %rcx
  19693. movq 112(%r15), %r8
  19694. movq %rcx, 104(%r15)
  19695. adcq 112(%r10), %r8
  19696. movq 120(%r15), %rax
  19697. movq %r8, 112(%r15)
  19698. adcq 120(%r10), %rax
  19699. movq 128(%r15), %rcx
  19700. movq %rax, 120(%r15)
  19701. adcq 128(%r10), %rcx
  19702. movq 136(%r15), %r8
  19703. movq %rcx, 128(%r15)
  19704. adcq 136(%r10), %r8
  19705. movq 144(%r15), %rax
  19706. movq %r8, 136(%r15)
  19707. adcq 144(%r10), %rax
  19708. movq 152(%r15), %rcx
  19709. movq %rax, 144(%r15)
  19710. adcq 152(%r10), %rcx
  19711. movq 160(%r15), %r8
  19712. movq %rcx, 152(%r15)
  19713. adcq 160(%r10), %r8
  19714. movq 168(%r15), %rax
  19715. movq %r8, 160(%r15)
  19716. adcq 168(%r10), %rax
  19717. movq 176(%r15), %rcx
  19718. movq %rax, 168(%r15)
  19719. adcq 176(%r10), %rcx
  19720. movq 184(%r15), %r8
  19721. movq %rcx, 176(%r15)
  19722. adcq 184(%r10), %r8
  19723. movq 192(%r15), %rax
  19724. movq %r8, 184(%r15)
  19725. adcq 192(%r10), %rax
  19726. movq 200(%r15), %rcx
  19727. movq %rax, 192(%r15)
  19728. adcq 200(%r10), %rcx
  19729. movq 208(%r15), %r8
  19730. movq %rcx, 200(%r15)
  19731. adcq 208(%r10), %r8
  19732. movq 216(%r15), %rax
  19733. movq %r8, 208(%r15)
  19734. adcq 216(%r10), %rax
  19735. movq 224(%r15), %rcx
  19736. movq %rax, 216(%r15)
  19737. adcq 224(%r10), %rcx
  19738. movq 232(%r15), %r8
  19739. movq %rcx, 224(%r15)
  19740. adcq 232(%r10), %r8
  19741. movq 240(%r15), %rax
  19742. movq %r8, 232(%r15)
  19743. adcq 240(%r10), %rax
  19744. movq 248(%r15), %rcx
  19745. movq %rax, 240(%r15)
  19746. adcq 248(%r10), %rcx
  19747. movq 256(%r15), %r8
  19748. movq %rcx, 248(%r15)
  19749. adcq 256(%r10), %r8
  19750. movq 264(%r15), %rax
  19751. movq %r8, 256(%r15)
  19752. adcq 264(%r10), %rax
  19753. movq 272(%r15), %rcx
  19754. movq %rax, 264(%r15)
  19755. adcq 272(%r10), %rcx
  19756. movq 280(%r15), %r8
  19757. movq %rcx, 272(%r15)
  19758. adcq 280(%r10), %r8
  19759. movq 288(%r15), %rax
  19760. movq %r8, 280(%r15)
  19761. adcq 288(%r10), %rax
  19762. movq 296(%r15), %rcx
  19763. movq %rax, 288(%r15)
  19764. adcq 296(%r10), %rcx
  19765. movq 304(%r15), %r8
  19766. movq %rcx, 296(%r15)
  19767. adcq 304(%r10), %r8
  19768. movq 312(%r15), %rax
  19769. movq %r8, 304(%r15)
  19770. adcq 312(%r10), %rax
  19771. movq 320(%r15), %rcx
  19772. movq %rax, 312(%r15)
  19773. adcq 320(%r10), %rcx
  19774. movq 328(%r15), %r8
  19775. movq %rcx, 320(%r15)
  19776. adcq 328(%r10), %r8
  19777. movq 336(%r15), %rax
  19778. movq %r8, 328(%r15)
  19779. adcq 336(%r10), %rax
  19780. movq 344(%r15), %rcx
  19781. movq %rax, 336(%r15)
  19782. adcq 344(%r10), %rcx
  19783. movq 352(%r15), %r8
  19784. movq %rcx, 344(%r15)
  19785. adcq 352(%r10), %r8
  19786. movq 360(%r15), %rax
  19787. movq %r8, 352(%r15)
  19788. adcq 360(%r10), %rax
  19789. movq 368(%r15), %rcx
  19790. movq %rax, 360(%r15)
  19791. adcq 368(%r10), %rcx
  19792. movq 376(%r15), %r8
  19793. movq %rcx, 368(%r15)
  19794. adcq 376(%r10), %r8
  19795. movq %r8, 376(%r15)
  19796. adcq $0x00, %r9
  19797. movq %r9, 576(%rdi)
  19798. addq $0xc0, %r15
  19799. # Add
  19800. movq (%r15), %rax
  19801. addq (%r11), %rax
  19802. movq 8(%r15), %rcx
  19803. movq %rax, (%r15)
  19804. adcq 8(%r11), %rcx
  19805. movq 16(%r15), %r8
  19806. movq %rcx, 8(%r15)
  19807. adcq 16(%r11), %r8
  19808. movq 24(%r15), %rax
  19809. movq %r8, 16(%r15)
  19810. adcq 24(%r11), %rax
  19811. movq 32(%r15), %rcx
  19812. movq %rax, 24(%r15)
  19813. adcq 32(%r11), %rcx
  19814. movq 40(%r15), %r8
  19815. movq %rcx, 32(%r15)
  19816. adcq 40(%r11), %r8
  19817. movq 48(%r15), %rax
  19818. movq %r8, 40(%r15)
  19819. adcq 48(%r11), %rax
  19820. movq 56(%r15), %rcx
  19821. movq %rax, 48(%r15)
  19822. adcq 56(%r11), %rcx
  19823. movq 64(%r15), %r8
  19824. movq %rcx, 56(%r15)
  19825. adcq 64(%r11), %r8
  19826. movq 72(%r15), %rax
  19827. movq %r8, 64(%r15)
  19828. adcq 72(%r11), %rax
  19829. movq 80(%r15), %rcx
  19830. movq %rax, 72(%r15)
  19831. adcq 80(%r11), %rcx
  19832. movq 88(%r15), %r8
  19833. movq %rcx, 80(%r15)
  19834. adcq 88(%r11), %r8
  19835. movq 96(%r15), %rax
  19836. movq %r8, 88(%r15)
  19837. adcq 96(%r11), %rax
  19838. movq 104(%r15), %rcx
  19839. movq %rax, 96(%r15)
  19840. adcq 104(%r11), %rcx
  19841. movq 112(%r15), %r8
  19842. movq %rcx, 104(%r15)
  19843. adcq 112(%r11), %r8
  19844. movq 120(%r15), %rax
  19845. movq %r8, 112(%r15)
  19846. adcq 120(%r11), %rax
  19847. movq 128(%r15), %rcx
  19848. movq %rax, 120(%r15)
  19849. adcq 128(%r11), %rcx
  19850. movq 136(%r15), %r8
  19851. movq %rcx, 128(%r15)
  19852. adcq 136(%r11), %r8
  19853. movq 144(%r15), %rax
  19854. movq %r8, 136(%r15)
  19855. adcq 144(%r11), %rax
  19856. movq 152(%r15), %rcx
  19857. movq %rax, 144(%r15)
  19858. adcq 152(%r11), %rcx
  19859. movq 160(%r15), %r8
  19860. movq %rcx, 152(%r15)
  19861. adcq 160(%r11), %r8
  19862. movq 168(%r15), %rax
  19863. movq %r8, 160(%r15)
  19864. adcq 168(%r11), %rax
  19865. movq 176(%r15), %rcx
  19866. movq %rax, 168(%r15)
  19867. adcq 176(%r11), %rcx
  19868. movq 184(%r15), %r8
  19869. movq %rcx, 176(%r15)
  19870. adcq 184(%r11), %r8
  19871. movq 192(%r15), %rax
  19872. movq %r8, 184(%r15)
  19873. adcq 192(%r11), %rax
  19874. movq %rax, 192(%r15)
  19875. # Add to zero
  19876. movq 200(%r11), %rax
  19877. adcq $0x00, %rax
  19878. movq 208(%r11), %rcx
  19879. movq %rax, 200(%r15)
  19880. adcq $0x00, %rcx
  19881. movq 216(%r11), %r8
  19882. movq %rcx, 208(%r15)
  19883. adcq $0x00, %r8
  19884. movq 224(%r11), %rax
  19885. movq %r8, 216(%r15)
  19886. adcq $0x00, %rax
  19887. movq 232(%r11), %rcx
  19888. movq %rax, 224(%r15)
  19889. adcq $0x00, %rcx
  19890. movq 240(%r11), %r8
  19891. movq %rcx, 232(%r15)
  19892. adcq $0x00, %r8
  19893. movq 248(%r11), %rax
  19894. movq %r8, 240(%r15)
  19895. adcq $0x00, %rax
  19896. movq 256(%r11), %rcx
  19897. movq %rax, 248(%r15)
  19898. adcq $0x00, %rcx
  19899. movq 264(%r11), %r8
  19900. movq %rcx, 256(%r15)
  19901. adcq $0x00, %r8
  19902. movq 272(%r11), %rax
  19903. movq %r8, 264(%r15)
  19904. adcq $0x00, %rax
  19905. movq 280(%r11), %rcx
  19906. movq %rax, 272(%r15)
  19907. adcq $0x00, %rcx
  19908. movq 288(%r11), %r8
  19909. movq %rcx, 280(%r15)
  19910. adcq $0x00, %r8
  19911. movq 296(%r11), %rax
  19912. movq %r8, 288(%r15)
  19913. adcq $0x00, %rax
  19914. movq 304(%r11), %rcx
  19915. movq %rax, 296(%r15)
  19916. adcq $0x00, %rcx
  19917. movq 312(%r11), %r8
  19918. movq %rcx, 304(%r15)
  19919. adcq $0x00, %r8
  19920. movq 320(%r11), %rax
  19921. movq %r8, 312(%r15)
  19922. adcq $0x00, %rax
  19923. movq 328(%r11), %rcx
  19924. movq %rax, 320(%r15)
  19925. adcq $0x00, %rcx
  19926. movq 336(%r11), %r8
  19927. movq %rcx, 328(%r15)
  19928. adcq $0x00, %r8
  19929. movq 344(%r11), %rax
  19930. movq %r8, 336(%r15)
  19931. adcq $0x00, %rax
  19932. movq 352(%r11), %rcx
  19933. movq %rax, 344(%r15)
  19934. adcq $0x00, %rcx
  19935. movq 360(%r11), %r8
  19936. movq %rcx, 352(%r15)
  19937. adcq $0x00, %r8
  19938. movq 368(%r11), %rax
  19939. movq %r8, 360(%r15)
  19940. adcq $0x00, %rax
  19941. movq 376(%r11), %rcx
  19942. movq %rax, 368(%r15)
  19943. adcq $0x00, %rcx
  19944. movq %rcx, 376(%r15)
  19945. addq $0x4a8, %rsp
  19946. popq %r15
  19947. popq %r14
  19948. popq %r13
  19949. popq %r12
  19950. repz retq
  19951. #ifndef __APPLE__
  19952. .size sp_3072_mul_48,.-sp_3072_mul_48
  19953. #endif /* __APPLE__ */
  19954. /* Add a to a into r. (r = a + a)
  19955. *
  19956. * r A single precision integer.
  19957. * a A single precision integer.
  19958. */
  19959. #ifndef __APPLE__
  19960. .text
  19961. .globl sp_3072_dbl_24
  19962. .type sp_3072_dbl_24,@function
  19963. .align 16
  19964. sp_3072_dbl_24:
  19965. #else
  19966. .section __TEXT,__text
  19967. .globl _sp_3072_dbl_24
  19968. .p2align 4
  19969. _sp_3072_dbl_24:
  19970. #endif /* __APPLE__ */
  19971. movq (%rsi), %rdx
  19972. xorq %rax, %rax
  19973. addq %rdx, %rdx
  19974. movq 8(%rsi), %rcx
  19975. movq %rdx, (%rdi)
  19976. adcq %rcx, %rcx
  19977. movq 16(%rsi), %rdx
  19978. movq %rcx, 8(%rdi)
  19979. adcq %rdx, %rdx
  19980. movq 24(%rsi), %rcx
  19981. movq %rdx, 16(%rdi)
  19982. adcq %rcx, %rcx
  19983. movq 32(%rsi), %rdx
  19984. movq %rcx, 24(%rdi)
  19985. adcq %rdx, %rdx
  19986. movq 40(%rsi), %rcx
  19987. movq %rdx, 32(%rdi)
  19988. adcq %rcx, %rcx
  19989. movq 48(%rsi), %rdx
  19990. movq %rcx, 40(%rdi)
  19991. adcq %rdx, %rdx
  19992. movq 56(%rsi), %rcx
  19993. movq %rdx, 48(%rdi)
  19994. adcq %rcx, %rcx
  19995. movq 64(%rsi), %rdx
  19996. movq %rcx, 56(%rdi)
  19997. adcq %rdx, %rdx
  19998. movq 72(%rsi), %rcx
  19999. movq %rdx, 64(%rdi)
  20000. adcq %rcx, %rcx
  20001. movq 80(%rsi), %rdx
  20002. movq %rcx, 72(%rdi)
  20003. adcq %rdx, %rdx
  20004. movq 88(%rsi), %rcx
  20005. movq %rdx, 80(%rdi)
  20006. adcq %rcx, %rcx
  20007. movq 96(%rsi), %rdx
  20008. movq %rcx, 88(%rdi)
  20009. adcq %rdx, %rdx
  20010. movq 104(%rsi), %rcx
  20011. movq %rdx, 96(%rdi)
  20012. adcq %rcx, %rcx
  20013. movq 112(%rsi), %rdx
  20014. movq %rcx, 104(%rdi)
  20015. adcq %rdx, %rdx
  20016. movq 120(%rsi), %rcx
  20017. movq %rdx, 112(%rdi)
  20018. adcq %rcx, %rcx
  20019. movq 128(%rsi), %rdx
  20020. movq %rcx, 120(%rdi)
  20021. adcq %rdx, %rdx
  20022. movq 136(%rsi), %rcx
  20023. movq %rdx, 128(%rdi)
  20024. adcq %rcx, %rcx
  20025. movq 144(%rsi), %rdx
  20026. movq %rcx, 136(%rdi)
  20027. adcq %rdx, %rdx
  20028. movq 152(%rsi), %rcx
  20029. movq %rdx, 144(%rdi)
  20030. adcq %rcx, %rcx
  20031. movq 160(%rsi), %rdx
  20032. movq %rcx, 152(%rdi)
  20033. adcq %rdx, %rdx
  20034. movq 168(%rsi), %rcx
  20035. movq %rdx, 160(%rdi)
  20036. adcq %rcx, %rcx
  20037. movq 176(%rsi), %rdx
  20038. movq %rcx, 168(%rdi)
  20039. adcq %rdx, %rdx
  20040. movq 184(%rsi), %rcx
  20041. movq %rdx, 176(%rdi)
  20042. adcq %rcx, %rcx
  20043. movq %rcx, 184(%rdi)
  20044. adcq $0x00, %rax
  20045. repz retq
  20046. #ifndef __APPLE__
  20047. .size sp_3072_dbl_24,.-sp_3072_dbl_24
  20048. #endif /* __APPLE__ */
  20049. /* Square a and put result in r. (r = a * a)
  20050. *
  20051. * r A single precision integer.
  20052. * a A single precision integer.
  20053. */
  20054. #ifndef __APPLE__
  20055. .text
  20056. .globl sp_3072_sqr_48
  20057. .type sp_3072_sqr_48,@function
  20058. .align 16
  20059. sp_3072_sqr_48:
  20060. #else
  20061. .section __TEXT,__text
  20062. .globl _sp_3072_sqr_48
  20063. .p2align 4
  20064. _sp_3072_sqr_48:
  20065. #endif /* __APPLE__ */
  20066. subq $0x3d8, %rsp
  20067. movq %rdi, 960(%rsp)
  20068. movq %rsi, 968(%rsp)
  20069. leaq 768(%rsp), %r8
  20070. leaq 192(%rsi), %r9
  20071. # Add
  20072. movq (%rsi), %rdx
  20073. xorq %rcx, %rcx
  20074. addq (%r9), %rdx
  20075. movq 8(%rsi), %rax
  20076. movq %rdx, (%r8)
  20077. adcq 8(%r9), %rax
  20078. movq 16(%rsi), %rdx
  20079. movq %rax, 8(%r8)
  20080. adcq 16(%r9), %rdx
  20081. movq 24(%rsi), %rax
  20082. movq %rdx, 16(%r8)
  20083. adcq 24(%r9), %rax
  20084. movq 32(%rsi), %rdx
  20085. movq %rax, 24(%r8)
  20086. adcq 32(%r9), %rdx
  20087. movq 40(%rsi), %rax
  20088. movq %rdx, 32(%r8)
  20089. adcq 40(%r9), %rax
  20090. movq 48(%rsi), %rdx
  20091. movq %rax, 40(%r8)
  20092. adcq 48(%r9), %rdx
  20093. movq 56(%rsi), %rax
  20094. movq %rdx, 48(%r8)
  20095. adcq 56(%r9), %rax
  20096. movq 64(%rsi), %rdx
  20097. movq %rax, 56(%r8)
  20098. adcq 64(%r9), %rdx
  20099. movq 72(%rsi), %rax
  20100. movq %rdx, 64(%r8)
  20101. adcq 72(%r9), %rax
  20102. movq 80(%rsi), %rdx
  20103. movq %rax, 72(%r8)
  20104. adcq 80(%r9), %rdx
  20105. movq 88(%rsi), %rax
  20106. movq %rdx, 80(%r8)
  20107. adcq 88(%r9), %rax
  20108. movq 96(%rsi), %rdx
  20109. movq %rax, 88(%r8)
  20110. adcq 96(%r9), %rdx
  20111. movq 104(%rsi), %rax
  20112. movq %rdx, 96(%r8)
  20113. adcq 104(%r9), %rax
  20114. movq 112(%rsi), %rdx
  20115. movq %rax, 104(%r8)
  20116. adcq 112(%r9), %rdx
  20117. movq 120(%rsi), %rax
  20118. movq %rdx, 112(%r8)
  20119. adcq 120(%r9), %rax
  20120. movq 128(%rsi), %rdx
  20121. movq %rax, 120(%r8)
  20122. adcq 128(%r9), %rdx
  20123. movq 136(%rsi), %rax
  20124. movq %rdx, 128(%r8)
  20125. adcq 136(%r9), %rax
  20126. movq 144(%rsi), %rdx
  20127. movq %rax, 136(%r8)
  20128. adcq 144(%r9), %rdx
  20129. movq 152(%rsi), %rax
  20130. movq %rdx, 144(%r8)
  20131. adcq 152(%r9), %rax
  20132. movq 160(%rsi), %rdx
  20133. movq %rax, 152(%r8)
  20134. adcq 160(%r9), %rdx
  20135. movq 168(%rsi), %rax
  20136. movq %rdx, 160(%r8)
  20137. adcq 168(%r9), %rax
  20138. movq 176(%rsi), %rdx
  20139. movq %rax, 168(%r8)
  20140. adcq 176(%r9), %rdx
  20141. movq 184(%rsi), %rax
  20142. movq %rdx, 176(%r8)
  20143. adcq 184(%r9), %rax
  20144. movq %rax, 184(%r8)
  20145. adcq $0x00, %rcx
  20146. movq %rcx, 976(%rsp)
  20147. movq %r8, %rsi
  20148. movq %rsp, %rdi
  20149. #ifndef __APPLE__
  20150. callq sp_3072_sqr_24@plt
  20151. #else
  20152. callq _sp_3072_sqr_24
  20153. #endif /* __APPLE__ */
  20154. movq 968(%rsp), %rsi
  20155. leaq 384(%rsp), %rdi
  20156. addq $0xc0, %rsi
  20157. #ifndef __APPLE__
  20158. callq sp_3072_sqr_24@plt
  20159. #else
  20160. callq _sp_3072_sqr_24
  20161. #endif /* __APPLE__ */
  20162. movq 968(%rsp), %rsi
  20163. movq 960(%rsp), %rdi
  20164. #ifndef __APPLE__
  20165. callq sp_3072_sqr_24@plt
  20166. #else
  20167. callq _sp_3072_sqr_24
  20168. #endif /* __APPLE__ */
  20169. movq 976(%rsp), %r10
  20170. movq %rdi, %r9
  20171. leaq 768(%rsp), %r8
  20172. movq %r10, %rcx
  20173. negq %r10
  20174. addq $0x180, %r9
  20175. movq (%r8), %rdx
  20176. movq 8(%r8), %rax
  20177. andq %r10, %rdx
  20178. andq %r10, %rax
  20179. movq %rdx, (%r9)
  20180. movq %rax, 8(%r9)
  20181. movq 16(%r8), %rdx
  20182. movq 24(%r8), %rax
  20183. andq %r10, %rdx
  20184. andq %r10, %rax
  20185. movq %rdx, 16(%r9)
  20186. movq %rax, 24(%r9)
  20187. movq 32(%r8), %rdx
  20188. movq 40(%r8), %rax
  20189. andq %r10, %rdx
  20190. andq %r10, %rax
  20191. movq %rdx, 32(%r9)
  20192. movq %rax, 40(%r9)
  20193. movq 48(%r8), %rdx
  20194. movq 56(%r8), %rax
  20195. andq %r10, %rdx
  20196. andq %r10, %rax
  20197. movq %rdx, 48(%r9)
  20198. movq %rax, 56(%r9)
  20199. movq 64(%r8), %rdx
  20200. movq 72(%r8), %rax
  20201. andq %r10, %rdx
  20202. andq %r10, %rax
  20203. movq %rdx, 64(%r9)
  20204. movq %rax, 72(%r9)
  20205. movq 80(%r8), %rdx
  20206. movq 88(%r8), %rax
  20207. andq %r10, %rdx
  20208. andq %r10, %rax
  20209. movq %rdx, 80(%r9)
  20210. movq %rax, 88(%r9)
  20211. movq 96(%r8), %rdx
  20212. movq 104(%r8), %rax
  20213. andq %r10, %rdx
  20214. andq %r10, %rax
  20215. movq %rdx, 96(%r9)
  20216. movq %rax, 104(%r9)
  20217. movq 112(%r8), %rdx
  20218. movq 120(%r8), %rax
  20219. andq %r10, %rdx
  20220. andq %r10, %rax
  20221. movq %rdx, 112(%r9)
  20222. movq %rax, 120(%r9)
  20223. movq 128(%r8), %rdx
  20224. movq 136(%r8), %rax
  20225. andq %r10, %rdx
  20226. andq %r10, %rax
  20227. movq %rdx, 128(%r9)
  20228. movq %rax, 136(%r9)
  20229. movq 144(%r8), %rdx
  20230. movq 152(%r8), %rax
  20231. andq %r10, %rdx
  20232. andq %r10, %rax
  20233. movq %rdx, 144(%r9)
  20234. movq %rax, 152(%r9)
  20235. movq 160(%r8), %rdx
  20236. movq 168(%r8), %rax
  20237. andq %r10, %rdx
  20238. andq %r10, %rax
  20239. movq %rdx, 160(%r9)
  20240. movq %rax, 168(%r9)
  20241. movq 176(%r8), %rdx
  20242. movq 184(%r8), %rax
  20243. andq %r10, %rdx
  20244. andq %r10, %rax
  20245. movq %rdx, 176(%r9)
  20246. movq %rax, 184(%r9)
  20247. movq (%r9), %rdx
  20248. addq %rdx, %rdx
  20249. movq 8(%r9), %rax
  20250. movq %rdx, (%r9)
  20251. adcq %rax, %rax
  20252. movq 16(%r9), %rdx
  20253. movq %rax, 8(%r9)
  20254. adcq %rdx, %rdx
  20255. movq 24(%r9), %rax
  20256. movq %rdx, 16(%r9)
  20257. adcq %rax, %rax
  20258. movq 32(%r9), %rdx
  20259. movq %rax, 24(%r9)
  20260. adcq %rdx, %rdx
  20261. movq 40(%r9), %rax
  20262. movq %rdx, 32(%r9)
  20263. adcq %rax, %rax
  20264. movq 48(%r9), %rdx
  20265. movq %rax, 40(%r9)
  20266. adcq %rdx, %rdx
  20267. movq 56(%r9), %rax
  20268. movq %rdx, 48(%r9)
  20269. adcq %rax, %rax
  20270. movq 64(%r9), %rdx
  20271. movq %rax, 56(%r9)
  20272. adcq %rdx, %rdx
  20273. movq 72(%r9), %rax
  20274. movq %rdx, 64(%r9)
  20275. adcq %rax, %rax
  20276. movq 80(%r9), %rdx
  20277. movq %rax, 72(%r9)
  20278. adcq %rdx, %rdx
  20279. movq 88(%r9), %rax
  20280. movq %rdx, 80(%r9)
  20281. adcq %rax, %rax
  20282. movq 96(%r9), %rdx
  20283. movq %rax, 88(%r9)
  20284. adcq %rdx, %rdx
  20285. movq 104(%r9), %rax
  20286. movq %rdx, 96(%r9)
  20287. adcq %rax, %rax
  20288. movq 112(%r9), %rdx
  20289. movq %rax, 104(%r9)
  20290. adcq %rdx, %rdx
  20291. movq 120(%r9), %rax
  20292. movq %rdx, 112(%r9)
  20293. adcq %rax, %rax
  20294. movq 128(%r9), %rdx
  20295. movq %rax, 120(%r9)
  20296. adcq %rdx, %rdx
  20297. movq 136(%r9), %rax
  20298. movq %rdx, 128(%r9)
  20299. adcq %rax, %rax
  20300. movq 144(%r9), %rdx
  20301. movq %rax, 136(%r9)
  20302. adcq %rdx, %rdx
  20303. movq 152(%r9), %rax
  20304. movq %rdx, 144(%r9)
  20305. adcq %rax, %rax
  20306. movq 160(%r9), %rdx
  20307. movq %rax, 152(%r9)
  20308. adcq %rdx, %rdx
  20309. movq 168(%r9), %rax
  20310. movq %rdx, 160(%r9)
  20311. adcq %rax, %rax
  20312. movq 176(%r9), %rdx
  20313. movq %rax, 168(%r9)
  20314. adcq %rdx, %rdx
  20315. movq 184(%r9), %rax
  20316. movq %rdx, 176(%r9)
  20317. adcq %rax, %rax
  20318. movq %rax, 184(%r9)
  20319. adcq $0x00, %rcx
  20320. leaq 384(%rsp), %rsi
  20321. movq %rsp, %r8
  20322. movq (%r8), %rdx
  20323. subq (%rsi), %rdx
  20324. movq 8(%r8), %rax
  20325. movq %rdx, (%r8)
  20326. sbbq 8(%rsi), %rax
  20327. movq 16(%r8), %rdx
  20328. movq %rax, 8(%r8)
  20329. sbbq 16(%rsi), %rdx
  20330. movq 24(%r8), %rax
  20331. movq %rdx, 16(%r8)
  20332. sbbq 24(%rsi), %rax
  20333. movq 32(%r8), %rdx
  20334. movq %rax, 24(%r8)
  20335. sbbq 32(%rsi), %rdx
  20336. movq 40(%r8), %rax
  20337. movq %rdx, 32(%r8)
  20338. sbbq 40(%rsi), %rax
  20339. movq 48(%r8), %rdx
  20340. movq %rax, 40(%r8)
  20341. sbbq 48(%rsi), %rdx
  20342. movq 56(%r8), %rax
  20343. movq %rdx, 48(%r8)
  20344. sbbq 56(%rsi), %rax
  20345. movq 64(%r8), %rdx
  20346. movq %rax, 56(%r8)
  20347. sbbq 64(%rsi), %rdx
  20348. movq 72(%r8), %rax
  20349. movq %rdx, 64(%r8)
  20350. sbbq 72(%rsi), %rax
  20351. movq 80(%r8), %rdx
  20352. movq %rax, 72(%r8)
  20353. sbbq 80(%rsi), %rdx
  20354. movq 88(%r8), %rax
  20355. movq %rdx, 80(%r8)
  20356. sbbq 88(%rsi), %rax
  20357. movq 96(%r8), %rdx
  20358. movq %rax, 88(%r8)
  20359. sbbq 96(%rsi), %rdx
  20360. movq 104(%r8), %rax
  20361. movq %rdx, 96(%r8)
  20362. sbbq 104(%rsi), %rax
  20363. movq 112(%r8), %rdx
  20364. movq %rax, 104(%r8)
  20365. sbbq 112(%rsi), %rdx
  20366. movq 120(%r8), %rax
  20367. movq %rdx, 112(%r8)
  20368. sbbq 120(%rsi), %rax
  20369. movq 128(%r8), %rdx
  20370. movq %rax, 120(%r8)
  20371. sbbq 128(%rsi), %rdx
  20372. movq 136(%r8), %rax
  20373. movq %rdx, 128(%r8)
  20374. sbbq 136(%rsi), %rax
  20375. movq 144(%r8), %rdx
  20376. movq %rax, 136(%r8)
  20377. sbbq 144(%rsi), %rdx
  20378. movq 152(%r8), %rax
  20379. movq %rdx, 144(%r8)
  20380. sbbq 152(%rsi), %rax
  20381. movq 160(%r8), %rdx
  20382. movq %rax, 152(%r8)
  20383. sbbq 160(%rsi), %rdx
  20384. movq 168(%r8), %rax
  20385. movq %rdx, 160(%r8)
  20386. sbbq 168(%rsi), %rax
  20387. movq 176(%r8), %rdx
  20388. movq %rax, 168(%r8)
  20389. sbbq 176(%rsi), %rdx
  20390. movq 184(%r8), %rax
  20391. movq %rdx, 176(%r8)
  20392. sbbq 184(%rsi), %rax
  20393. movq 192(%r8), %rdx
  20394. movq %rax, 184(%r8)
  20395. sbbq 192(%rsi), %rdx
  20396. movq 200(%r8), %rax
  20397. movq %rdx, 192(%r8)
  20398. sbbq 200(%rsi), %rax
  20399. movq 208(%r8), %rdx
  20400. movq %rax, 200(%r8)
  20401. sbbq 208(%rsi), %rdx
  20402. movq 216(%r8), %rax
  20403. movq %rdx, 208(%r8)
  20404. sbbq 216(%rsi), %rax
  20405. movq 224(%r8), %rdx
  20406. movq %rax, 216(%r8)
  20407. sbbq 224(%rsi), %rdx
  20408. movq 232(%r8), %rax
  20409. movq %rdx, 224(%r8)
  20410. sbbq 232(%rsi), %rax
  20411. movq 240(%r8), %rdx
  20412. movq %rax, 232(%r8)
  20413. sbbq 240(%rsi), %rdx
  20414. movq 248(%r8), %rax
  20415. movq %rdx, 240(%r8)
  20416. sbbq 248(%rsi), %rax
  20417. movq 256(%r8), %rdx
  20418. movq %rax, 248(%r8)
  20419. sbbq 256(%rsi), %rdx
  20420. movq 264(%r8), %rax
  20421. movq %rdx, 256(%r8)
  20422. sbbq 264(%rsi), %rax
  20423. movq 272(%r8), %rdx
  20424. movq %rax, 264(%r8)
  20425. sbbq 272(%rsi), %rdx
  20426. movq 280(%r8), %rax
  20427. movq %rdx, 272(%r8)
  20428. sbbq 280(%rsi), %rax
  20429. movq 288(%r8), %rdx
  20430. movq %rax, 280(%r8)
  20431. sbbq 288(%rsi), %rdx
  20432. movq 296(%r8), %rax
  20433. movq %rdx, 288(%r8)
  20434. sbbq 296(%rsi), %rax
  20435. movq 304(%r8), %rdx
  20436. movq %rax, 296(%r8)
  20437. sbbq 304(%rsi), %rdx
  20438. movq 312(%r8), %rax
  20439. movq %rdx, 304(%r8)
  20440. sbbq 312(%rsi), %rax
  20441. movq 320(%r8), %rdx
  20442. movq %rax, 312(%r8)
  20443. sbbq 320(%rsi), %rdx
  20444. movq 328(%r8), %rax
  20445. movq %rdx, 320(%r8)
  20446. sbbq 328(%rsi), %rax
  20447. movq 336(%r8), %rdx
  20448. movq %rax, 328(%r8)
  20449. sbbq 336(%rsi), %rdx
  20450. movq 344(%r8), %rax
  20451. movq %rdx, 336(%r8)
  20452. sbbq 344(%rsi), %rax
  20453. movq 352(%r8), %rdx
  20454. movq %rax, 344(%r8)
  20455. sbbq 352(%rsi), %rdx
  20456. movq 360(%r8), %rax
  20457. movq %rdx, 352(%r8)
  20458. sbbq 360(%rsi), %rax
  20459. movq 368(%r8), %rdx
  20460. movq %rax, 360(%r8)
  20461. sbbq 368(%rsi), %rdx
  20462. movq 376(%r8), %rax
  20463. movq %rdx, 368(%r8)
  20464. sbbq 376(%rsi), %rax
  20465. movq %rax, 376(%r8)
  20466. sbbq $0x00, %rcx
  20467. movq (%r8), %rdx
  20468. subq (%rdi), %rdx
  20469. movq 8(%r8), %rax
  20470. movq %rdx, (%r8)
  20471. sbbq 8(%rdi), %rax
  20472. movq 16(%r8), %rdx
  20473. movq %rax, 8(%r8)
  20474. sbbq 16(%rdi), %rdx
  20475. movq 24(%r8), %rax
  20476. movq %rdx, 16(%r8)
  20477. sbbq 24(%rdi), %rax
  20478. movq 32(%r8), %rdx
  20479. movq %rax, 24(%r8)
  20480. sbbq 32(%rdi), %rdx
  20481. movq 40(%r8), %rax
  20482. movq %rdx, 32(%r8)
  20483. sbbq 40(%rdi), %rax
  20484. movq 48(%r8), %rdx
  20485. movq %rax, 40(%r8)
  20486. sbbq 48(%rdi), %rdx
  20487. movq 56(%r8), %rax
  20488. movq %rdx, 48(%r8)
  20489. sbbq 56(%rdi), %rax
  20490. movq 64(%r8), %rdx
  20491. movq %rax, 56(%r8)
  20492. sbbq 64(%rdi), %rdx
  20493. movq 72(%r8), %rax
  20494. movq %rdx, 64(%r8)
  20495. sbbq 72(%rdi), %rax
  20496. movq 80(%r8), %rdx
  20497. movq %rax, 72(%r8)
  20498. sbbq 80(%rdi), %rdx
  20499. movq 88(%r8), %rax
  20500. movq %rdx, 80(%r8)
  20501. sbbq 88(%rdi), %rax
  20502. movq 96(%r8), %rdx
  20503. movq %rax, 88(%r8)
  20504. sbbq 96(%rdi), %rdx
  20505. movq 104(%r8), %rax
  20506. movq %rdx, 96(%r8)
  20507. sbbq 104(%rdi), %rax
  20508. movq 112(%r8), %rdx
  20509. movq %rax, 104(%r8)
  20510. sbbq 112(%rdi), %rdx
  20511. movq 120(%r8), %rax
  20512. movq %rdx, 112(%r8)
  20513. sbbq 120(%rdi), %rax
  20514. movq 128(%r8), %rdx
  20515. movq %rax, 120(%r8)
  20516. sbbq 128(%rdi), %rdx
  20517. movq 136(%r8), %rax
  20518. movq %rdx, 128(%r8)
  20519. sbbq 136(%rdi), %rax
  20520. movq 144(%r8), %rdx
  20521. movq %rax, 136(%r8)
  20522. sbbq 144(%rdi), %rdx
  20523. movq 152(%r8), %rax
  20524. movq %rdx, 144(%r8)
  20525. sbbq 152(%rdi), %rax
  20526. movq 160(%r8), %rdx
  20527. movq %rax, 152(%r8)
  20528. sbbq 160(%rdi), %rdx
  20529. movq 168(%r8), %rax
  20530. movq %rdx, 160(%r8)
  20531. sbbq 168(%rdi), %rax
  20532. movq 176(%r8), %rdx
  20533. movq %rax, 168(%r8)
  20534. sbbq 176(%rdi), %rdx
  20535. movq 184(%r8), %rax
  20536. movq %rdx, 176(%r8)
  20537. sbbq 184(%rdi), %rax
  20538. movq 192(%r8), %rdx
  20539. movq %rax, 184(%r8)
  20540. sbbq 192(%rdi), %rdx
  20541. movq 200(%r8), %rax
  20542. movq %rdx, 192(%r8)
  20543. sbbq 200(%rdi), %rax
  20544. movq 208(%r8), %rdx
  20545. movq %rax, 200(%r8)
  20546. sbbq 208(%rdi), %rdx
  20547. movq 216(%r8), %rax
  20548. movq %rdx, 208(%r8)
  20549. sbbq 216(%rdi), %rax
  20550. movq 224(%r8), %rdx
  20551. movq %rax, 216(%r8)
  20552. sbbq 224(%rdi), %rdx
  20553. movq 232(%r8), %rax
  20554. movq %rdx, 224(%r8)
  20555. sbbq 232(%rdi), %rax
  20556. movq 240(%r8), %rdx
  20557. movq %rax, 232(%r8)
  20558. sbbq 240(%rdi), %rdx
  20559. movq 248(%r8), %rax
  20560. movq %rdx, 240(%r8)
  20561. sbbq 248(%rdi), %rax
  20562. movq 256(%r8), %rdx
  20563. movq %rax, 248(%r8)
  20564. sbbq 256(%rdi), %rdx
  20565. movq 264(%r8), %rax
  20566. movq %rdx, 256(%r8)
  20567. sbbq 264(%rdi), %rax
  20568. movq 272(%r8), %rdx
  20569. movq %rax, 264(%r8)
  20570. sbbq 272(%rdi), %rdx
  20571. movq 280(%r8), %rax
  20572. movq %rdx, 272(%r8)
  20573. sbbq 280(%rdi), %rax
  20574. movq 288(%r8), %rdx
  20575. movq %rax, 280(%r8)
  20576. sbbq 288(%rdi), %rdx
  20577. movq 296(%r8), %rax
  20578. movq %rdx, 288(%r8)
  20579. sbbq 296(%rdi), %rax
  20580. movq 304(%r8), %rdx
  20581. movq %rax, 296(%r8)
  20582. sbbq 304(%rdi), %rdx
  20583. movq 312(%r8), %rax
  20584. movq %rdx, 304(%r8)
  20585. sbbq 312(%rdi), %rax
  20586. movq 320(%r8), %rdx
  20587. movq %rax, 312(%r8)
  20588. sbbq 320(%rdi), %rdx
  20589. movq 328(%r8), %rax
  20590. movq %rdx, 320(%r8)
  20591. sbbq 328(%rdi), %rax
  20592. movq 336(%r8), %rdx
  20593. movq %rax, 328(%r8)
  20594. sbbq 336(%rdi), %rdx
  20595. movq 344(%r8), %rax
  20596. movq %rdx, 336(%r8)
  20597. sbbq 344(%rdi), %rax
  20598. movq 352(%r8), %rdx
  20599. movq %rax, 344(%r8)
  20600. sbbq 352(%rdi), %rdx
  20601. movq 360(%r8), %rax
  20602. movq %rdx, 352(%r8)
  20603. sbbq 360(%rdi), %rax
  20604. movq 368(%r8), %rdx
  20605. movq %rax, 360(%r8)
  20606. sbbq 368(%rdi), %rdx
  20607. movq 376(%r8), %rax
  20608. movq %rdx, 368(%r8)
  20609. sbbq 376(%rdi), %rax
  20610. movq %rax, 376(%r8)
  20611. sbbq $0x00, %rcx
  20612. subq $0xc0, %r9
  20613. # Add in place
  20614. movq (%r9), %rdx
  20615. addq (%r8), %rdx
  20616. movq 8(%r9), %rax
  20617. movq %rdx, (%r9)
  20618. adcq 8(%r8), %rax
  20619. movq 16(%r9), %rdx
  20620. movq %rax, 8(%r9)
  20621. adcq 16(%r8), %rdx
  20622. movq 24(%r9), %rax
  20623. movq %rdx, 16(%r9)
  20624. adcq 24(%r8), %rax
  20625. movq 32(%r9), %rdx
  20626. movq %rax, 24(%r9)
  20627. adcq 32(%r8), %rdx
  20628. movq 40(%r9), %rax
  20629. movq %rdx, 32(%r9)
  20630. adcq 40(%r8), %rax
  20631. movq 48(%r9), %rdx
  20632. movq %rax, 40(%r9)
  20633. adcq 48(%r8), %rdx
  20634. movq 56(%r9), %rax
  20635. movq %rdx, 48(%r9)
  20636. adcq 56(%r8), %rax
  20637. movq 64(%r9), %rdx
  20638. movq %rax, 56(%r9)
  20639. adcq 64(%r8), %rdx
  20640. movq 72(%r9), %rax
  20641. movq %rdx, 64(%r9)
  20642. adcq 72(%r8), %rax
  20643. movq 80(%r9), %rdx
  20644. movq %rax, 72(%r9)
  20645. adcq 80(%r8), %rdx
  20646. movq 88(%r9), %rax
  20647. movq %rdx, 80(%r9)
  20648. adcq 88(%r8), %rax
  20649. movq 96(%r9), %rdx
  20650. movq %rax, 88(%r9)
  20651. adcq 96(%r8), %rdx
  20652. movq 104(%r9), %rax
  20653. movq %rdx, 96(%r9)
  20654. adcq 104(%r8), %rax
  20655. movq 112(%r9), %rdx
  20656. movq %rax, 104(%r9)
  20657. adcq 112(%r8), %rdx
  20658. movq 120(%r9), %rax
  20659. movq %rdx, 112(%r9)
  20660. adcq 120(%r8), %rax
  20661. movq 128(%r9), %rdx
  20662. movq %rax, 120(%r9)
  20663. adcq 128(%r8), %rdx
  20664. movq 136(%r9), %rax
  20665. movq %rdx, 128(%r9)
  20666. adcq 136(%r8), %rax
  20667. movq 144(%r9), %rdx
  20668. movq %rax, 136(%r9)
  20669. adcq 144(%r8), %rdx
  20670. movq 152(%r9), %rax
  20671. movq %rdx, 144(%r9)
  20672. adcq 152(%r8), %rax
  20673. movq 160(%r9), %rdx
  20674. movq %rax, 152(%r9)
  20675. adcq 160(%r8), %rdx
  20676. movq 168(%r9), %rax
  20677. movq %rdx, 160(%r9)
  20678. adcq 168(%r8), %rax
  20679. movq 176(%r9), %rdx
  20680. movq %rax, 168(%r9)
  20681. adcq 176(%r8), %rdx
  20682. movq 184(%r9), %rax
  20683. movq %rdx, 176(%r9)
  20684. adcq 184(%r8), %rax
  20685. movq 192(%r9), %rdx
  20686. movq %rax, 184(%r9)
  20687. adcq 192(%r8), %rdx
  20688. movq 200(%r9), %rax
  20689. movq %rdx, 192(%r9)
  20690. adcq 200(%r8), %rax
  20691. movq 208(%r9), %rdx
  20692. movq %rax, 200(%r9)
  20693. adcq 208(%r8), %rdx
  20694. movq 216(%r9), %rax
  20695. movq %rdx, 208(%r9)
  20696. adcq 216(%r8), %rax
  20697. movq 224(%r9), %rdx
  20698. movq %rax, 216(%r9)
  20699. adcq 224(%r8), %rdx
  20700. movq 232(%r9), %rax
  20701. movq %rdx, 224(%r9)
  20702. adcq 232(%r8), %rax
  20703. movq 240(%r9), %rdx
  20704. movq %rax, 232(%r9)
  20705. adcq 240(%r8), %rdx
  20706. movq 248(%r9), %rax
  20707. movq %rdx, 240(%r9)
  20708. adcq 248(%r8), %rax
  20709. movq 256(%r9), %rdx
  20710. movq %rax, 248(%r9)
  20711. adcq 256(%r8), %rdx
  20712. movq 264(%r9), %rax
  20713. movq %rdx, 256(%r9)
  20714. adcq 264(%r8), %rax
  20715. movq 272(%r9), %rdx
  20716. movq %rax, 264(%r9)
  20717. adcq 272(%r8), %rdx
  20718. movq 280(%r9), %rax
  20719. movq %rdx, 272(%r9)
  20720. adcq 280(%r8), %rax
  20721. movq 288(%r9), %rdx
  20722. movq %rax, 280(%r9)
  20723. adcq 288(%r8), %rdx
  20724. movq 296(%r9), %rax
  20725. movq %rdx, 288(%r9)
  20726. adcq 296(%r8), %rax
  20727. movq 304(%r9), %rdx
  20728. movq %rax, 296(%r9)
  20729. adcq 304(%r8), %rdx
  20730. movq 312(%r9), %rax
  20731. movq %rdx, 304(%r9)
  20732. adcq 312(%r8), %rax
  20733. movq 320(%r9), %rdx
  20734. movq %rax, 312(%r9)
  20735. adcq 320(%r8), %rdx
  20736. movq 328(%r9), %rax
  20737. movq %rdx, 320(%r9)
  20738. adcq 328(%r8), %rax
  20739. movq 336(%r9), %rdx
  20740. movq %rax, 328(%r9)
  20741. adcq 336(%r8), %rdx
  20742. movq 344(%r9), %rax
  20743. movq %rdx, 336(%r9)
  20744. adcq 344(%r8), %rax
  20745. movq 352(%r9), %rdx
  20746. movq %rax, 344(%r9)
  20747. adcq 352(%r8), %rdx
  20748. movq 360(%r9), %rax
  20749. movq %rdx, 352(%r9)
  20750. adcq 360(%r8), %rax
  20751. movq 368(%r9), %rdx
  20752. movq %rax, 360(%r9)
  20753. adcq 368(%r8), %rdx
  20754. movq 376(%r9), %rax
  20755. movq %rdx, 368(%r9)
  20756. adcq 376(%r8), %rax
  20757. movq %rax, 376(%r9)
  20758. adcq $0x00, %rcx
  20759. movq %rcx, 576(%rdi)
  20760. # Add in place
  20761. movq 192(%r9), %rdx
  20762. addq (%rsi), %rdx
  20763. movq 200(%r9), %rax
  20764. movq %rdx, 192(%r9)
  20765. adcq 8(%rsi), %rax
  20766. movq 208(%r9), %rdx
  20767. movq %rax, 200(%r9)
  20768. adcq 16(%rsi), %rdx
  20769. movq 216(%r9), %rax
  20770. movq %rdx, 208(%r9)
  20771. adcq 24(%rsi), %rax
  20772. movq 224(%r9), %rdx
  20773. movq %rax, 216(%r9)
  20774. adcq 32(%rsi), %rdx
  20775. movq 232(%r9), %rax
  20776. movq %rdx, 224(%r9)
  20777. adcq 40(%rsi), %rax
  20778. movq 240(%r9), %rdx
  20779. movq %rax, 232(%r9)
  20780. adcq 48(%rsi), %rdx
  20781. movq 248(%r9), %rax
  20782. movq %rdx, 240(%r9)
  20783. adcq 56(%rsi), %rax
  20784. movq 256(%r9), %rdx
  20785. movq %rax, 248(%r9)
  20786. adcq 64(%rsi), %rdx
  20787. movq 264(%r9), %rax
  20788. movq %rdx, 256(%r9)
  20789. adcq 72(%rsi), %rax
  20790. movq 272(%r9), %rdx
  20791. movq %rax, 264(%r9)
  20792. adcq 80(%rsi), %rdx
  20793. movq 280(%r9), %rax
  20794. movq %rdx, 272(%r9)
  20795. adcq 88(%rsi), %rax
  20796. movq 288(%r9), %rdx
  20797. movq %rax, 280(%r9)
  20798. adcq 96(%rsi), %rdx
  20799. movq 296(%r9), %rax
  20800. movq %rdx, 288(%r9)
  20801. adcq 104(%rsi), %rax
  20802. movq 304(%r9), %rdx
  20803. movq %rax, 296(%r9)
  20804. adcq 112(%rsi), %rdx
  20805. movq 312(%r9), %rax
  20806. movq %rdx, 304(%r9)
  20807. adcq 120(%rsi), %rax
  20808. movq 320(%r9), %rdx
  20809. movq %rax, 312(%r9)
  20810. adcq 128(%rsi), %rdx
  20811. movq 328(%r9), %rax
  20812. movq %rdx, 320(%r9)
  20813. adcq 136(%rsi), %rax
  20814. movq 336(%r9), %rdx
  20815. movq %rax, 328(%r9)
  20816. adcq 144(%rsi), %rdx
  20817. movq 344(%r9), %rax
  20818. movq %rdx, 336(%r9)
  20819. adcq 152(%rsi), %rax
  20820. movq 352(%r9), %rdx
  20821. movq %rax, 344(%r9)
  20822. adcq 160(%rsi), %rdx
  20823. movq 360(%r9), %rax
  20824. movq %rdx, 352(%r9)
  20825. adcq 168(%rsi), %rax
  20826. movq 368(%r9), %rdx
  20827. movq %rax, 360(%r9)
  20828. adcq 176(%rsi), %rdx
  20829. movq 376(%r9), %rax
  20830. movq %rdx, 368(%r9)
  20831. adcq 184(%rsi), %rax
  20832. movq 384(%r9), %rdx
  20833. movq %rax, 376(%r9)
  20834. adcq 192(%rsi), %rdx
  20835. movq %rdx, 384(%r9)
  20836. # Add to zero
  20837. movq 200(%rsi), %rdx
  20838. adcq $0x00, %rdx
  20839. movq 208(%rsi), %rax
  20840. movq %rdx, 392(%r9)
  20841. adcq $0x00, %rax
  20842. movq 216(%rsi), %rdx
  20843. movq %rax, 400(%r9)
  20844. adcq $0x00, %rdx
  20845. movq 224(%rsi), %rax
  20846. movq %rdx, 408(%r9)
  20847. adcq $0x00, %rax
  20848. movq 232(%rsi), %rdx
  20849. movq %rax, 416(%r9)
  20850. adcq $0x00, %rdx
  20851. movq 240(%rsi), %rax
  20852. movq %rdx, 424(%r9)
  20853. adcq $0x00, %rax
  20854. movq 248(%rsi), %rdx
  20855. movq %rax, 432(%r9)
  20856. adcq $0x00, %rdx
  20857. movq 256(%rsi), %rax
  20858. movq %rdx, 440(%r9)
  20859. adcq $0x00, %rax
  20860. movq 264(%rsi), %rdx
  20861. movq %rax, 448(%r9)
  20862. adcq $0x00, %rdx
  20863. movq 272(%rsi), %rax
  20864. movq %rdx, 456(%r9)
  20865. adcq $0x00, %rax
  20866. movq 280(%rsi), %rdx
  20867. movq %rax, 464(%r9)
  20868. adcq $0x00, %rdx
  20869. movq 288(%rsi), %rax
  20870. movq %rdx, 472(%r9)
  20871. adcq $0x00, %rax
  20872. movq 296(%rsi), %rdx
  20873. movq %rax, 480(%r9)
  20874. adcq $0x00, %rdx
  20875. movq 304(%rsi), %rax
  20876. movq %rdx, 488(%r9)
  20877. adcq $0x00, %rax
  20878. movq 312(%rsi), %rdx
  20879. movq %rax, 496(%r9)
  20880. adcq $0x00, %rdx
  20881. movq 320(%rsi), %rax
  20882. movq %rdx, 504(%r9)
  20883. adcq $0x00, %rax
  20884. movq 328(%rsi), %rdx
  20885. movq %rax, 512(%r9)
  20886. adcq $0x00, %rdx
  20887. movq 336(%rsi), %rax
  20888. movq %rdx, 520(%r9)
  20889. adcq $0x00, %rax
  20890. movq 344(%rsi), %rdx
  20891. movq %rax, 528(%r9)
  20892. adcq $0x00, %rdx
  20893. movq 352(%rsi), %rax
  20894. movq %rdx, 536(%r9)
  20895. adcq $0x00, %rax
  20896. movq 360(%rsi), %rdx
  20897. movq %rax, 544(%r9)
  20898. adcq $0x00, %rdx
  20899. movq 368(%rsi), %rax
  20900. movq %rdx, 552(%r9)
  20901. adcq $0x00, %rax
  20902. movq 376(%rsi), %rdx
  20903. movq %rax, 560(%r9)
  20904. adcq $0x00, %rdx
  20905. movq %rdx, 568(%r9)
  20906. addq $0x3d8, %rsp
  20907. repz retq
  20908. #ifndef __APPLE__
  20909. .size sp_3072_sqr_48,.-sp_3072_sqr_48
  20910. #endif /* __APPLE__ */
  20911. /* Multiply a and b into r. (r = a * b)
  20912. *
  20913. * r A single precision integer.
  20914. * a A single precision integer.
  20915. * b A single precision integer.
  20916. */
  20917. #ifndef __APPLE__
  20918. .text
  20919. .globl sp_3072_mul_avx2_48
  20920. .type sp_3072_mul_avx2_48,@function
  20921. .align 16
  20922. sp_3072_mul_avx2_48:
  20923. #else
  20924. .section __TEXT,__text
  20925. .globl _sp_3072_mul_avx2_48
  20926. .p2align 4
  20927. _sp_3072_mul_avx2_48:
  20928. #endif /* __APPLE__ */
  20929. pushq %r12
  20930. pushq %r13
  20931. pushq %r14
  20932. pushq %r15
  20933. subq $0x4a8, %rsp
  20934. movq %rdi, 1152(%rsp)
  20935. movq %rsi, 1160(%rsp)
  20936. movq %rdx, 1168(%rsp)
  20937. leaq 768(%rsp), %r10
  20938. leaq 192(%rsi), %r12
  20939. # Add
  20940. movq (%rsi), %rax
  20941. xorq %r13, %r13
  20942. addq (%r12), %rax
  20943. movq 8(%rsi), %rcx
  20944. movq %rax, (%r10)
  20945. adcq 8(%r12), %rcx
  20946. movq 16(%rsi), %r8
  20947. movq %rcx, 8(%r10)
  20948. adcq 16(%r12), %r8
  20949. movq 24(%rsi), %rax
  20950. movq %r8, 16(%r10)
  20951. adcq 24(%r12), %rax
  20952. movq 32(%rsi), %rcx
  20953. movq %rax, 24(%r10)
  20954. adcq 32(%r12), %rcx
  20955. movq 40(%rsi), %r8
  20956. movq %rcx, 32(%r10)
  20957. adcq 40(%r12), %r8
  20958. movq 48(%rsi), %rax
  20959. movq %r8, 40(%r10)
  20960. adcq 48(%r12), %rax
  20961. movq 56(%rsi), %rcx
  20962. movq %rax, 48(%r10)
  20963. adcq 56(%r12), %rcx
  20964. movq 64(%rsi), %r8
  20965. movq %rcx, 56(%r10)
  20966. adcq 64(%r12), %r8
  20967. movq 72(%rsi), %rax
  20968. movq %r8, 64(%r10)
  20969. adcq 72(%r12), %rax
  20970. movq 80(%rsi), %rcx
  20971. movq %rax, 72(%r10)
  20972. adcq 80(%r12), %rcx
  20973. movq 88(%rsi), %r8
  20974. movq %rcx, 80(%r10)
  20975. adcq 88(%r12), %r8
  20976. movq 96(%rsi), %rax
  20977. movq %r8, 88(%r10)
  20978. adcq 96(%r12), %rax
  20979. movq 104(%rsi), %rcx
  20980. movq %rax, 96(%r10)
  20981. adcq 104(%r12), %rcx
  20982. movq 112(%rsi), %r8
  20983. movq %rcx, 104(%r10)
  20984. adcq 112(%r12), %r8
  20985. movq 120(%rsi), %rax
  20986. movq %r8, 112(%r10)
  20987. adcq 120(%r12), %rax
  20988. movq 128(%rsi), %rcx
  20989. movq %rax, 120(%r10)
  20990. adcq 128(%r12), %rcx
  20991. movq 136(%rsi), %r8
  20992. movq %rcx, 128(%r10)
  20993. adcq 136(%r12), %r8
  20994. movq 144(%rsi), %rax
  20995. movq %r8, 136(%r10)
  20996. adcq 144(%r12), %rax
  20997. movq 152(%rsi), %rcx
  20998. movq %rax, 144(%r10)
  20999. adcq 152(%r12), %rcx
  21000. movq 160(%rsi), %r8
  21001. movq %rcx, 152(%r10)
  21002. adcq 160(%r12), %r8
  21003. movq 168(%rsi), %rax
  21004. movq %r8, 160(%r10)
  21005. adcq 168(%r12), %rax
  21006. movq 176(%rsi), %rcx
  21007. movq %rax, 168(%r10)
  21008. adcq 176(%r12), %rcx
  21009. movq 184(%rsi), %r8
  21010. movq %rcx, 176(%r10)
  21011. adcq 184(%r12), %r8
  21012. movq %r8, 184(%r10)
  21013. adcq $0x00, %r13
  21014. movq %r13, 1176(%rsp)
  21015. leaq 960(%rsp), %r11
  21016. leaq 192(%rdx), %r12
  21017. # Add
  21018. movq (%rdx), %rax
  21019. xorq %r14, %r14
  21020. addq (%r12), %rax
  21021. movq 8(%rdx), %rcx
  21022. movq %rax, (%r11)
  21023. adcq 8(%r12), %rcx
  21024. movq 16(%rdx), %r8
  21025. movq %rcx, 8(%r11)
  21026. adcq 16(%r12), %r8
  21027. movq 24(%rdx), %rax
  21028. movq %r8, 16(%r11)
  21029. adcq 24(%r12), %rax
  21030. movq 32(%rdx), %rcx
  21031. movq %rax, 24(%r11)
  21032. adcq 32(%r12), %rcx
  21033. movq 40(%rdx), %r8
  21034. movq %rcx, 32(%r11)
  21035. adcq 40(%r12), %r8
  21036. movq 48(%rdx), %rax
  21037. movq %r8, 40(%r11)
  21038. adcq 48(%r12), %rax
  21039. movq 56(%rdx), %rcx
  21040. movq %rax, 48(%r11)
  21041. adcq 56(%r12), %rcx
  21042. movq 64(%rdx), %r8
  21043. movq %rcx, 56(%r11)
  21044. adcq 64(%r12), %r8
  21045. movq 72(%rdx), %rax
  21046. movq %r8, 64(%r11)
  21047. adcq 72(%r12), %rax
  21048. movq 80(%rdx), %rcx
  21049. movq %rax, 72(%r11)
  21050. adcq 80(%r12), %rcx
  21051. movq 88(%rdx), %r8
  21052. movq %rcx, 80(%r11)
  21053. adcq 88(%r12), %r8
  21054. movq 96(%rdx), %rax
  21055. movq %r8, 88(%r11)
  21056. adcq 96(%r12), %rax
  21057. movq 104(%rdx), %rcx
  21058. movq %rax, 96(%r11)
  21059. adcq 104(%r12), %rcx
  21060. movq 112(%rdx), %r8
  21061. movq %rcx, 104(%r11)
  21062. adcq 112(%r12), %r8
  21063. movq 120(%rdx), %rax
  21064. movq %r8, 112(%r11)
  21065. adcq 120(%r12), %rax
  21066. movq 128(%rdx), %rcx
  21067. movq %rax, 120(%r11)
  21068. adcq 128(%r12), %rcx
  21069. movq 136(%rdx), %r8
  21070. movq %rcx, 128(%r11)
  21071. adcq 136(%r12), %r8
  21072. movq 144(%rdx), %rax
  21073. movq %r8, 136(%r11)
  21074. adcq 144(%r12), %rax
  21075. movq 152(%rdx), %rcx
  21076. movq %rax, 144(%r11)
  21077. adcq 152(%r12), %rcx
  21078. movq 160(%rdx), %r8
  21079. movq %rcx, 152(%r11)
  21080. adcq 160(%r12), %r8
  21081. movq 168(%rdx), %rax
  21082. movq %r8, 160(%r11)
  21083. adcq 168(%r12), %rax
  21084. movq 176(%rdx), %rcx
  21085. movq %rax, 168(%r11)
  21086. adcq 176(%r12), %rcx
  21087. movq 184(%rdx), %r8
  21088. movq %rcx, 176(%r11)
  21089. adcq 184(%r12), %r8
  21090. movq %r8, 184(%r11)
  21091. adcq $0x00, %r14
  21092. movq %r14, 1184(%rsp)
  21093. movq %r11, %rdx
  21094. movq %r10, %rsi
  21095. movq %rsp, %rdi
  21096. #ifndef __APPLE__
  21097. callq sp_3072_mul_avx2_24@plt
  21098. #else
  21099. callq _sp_3072_mul_avx2_24
  21100. #endif /* __APPLE__ */
  21101. movq 1168(%rsp), %rdx
  21102. movq 1160(%rsp), %rsi
  21103. leaq 384(%rsp), %rdi
  21104. addq $0xc0, %rdx
  21105. addq $0xc0, %rsi
  21106. #ifndef __APPLE__
  21107. callq sp_3072_mul_avx2_24@plt
  21108. #else
  21109. callq _sp_3072_mul_avx2_24
  21110. #endif /* __APPLE__ */
  21111. movq 1168(%rsp), %rdx
  21112. movq 1160(%rsp), %rsi
  21113. movq 1152(%rsp), %rdi
  21114. #ifndef __APPLE__
  21115. callq sp_3072_mul_avx2_24@plt
  21116. #else
  21117. callq _sp_3072_mul_avx2_24
  21118. #endif /* __APPLE__ */
  21119. movq 1176(%rsp), %r13
  21120. movq 1184(%rsp), %r14
  21121. movq 1152(%rsp), %r15
  21122. movq %r13, %r9
  21123. leaq 768(%rsp), %r10
  21124. leaq 960(%rsp), %r11
  21125. andq %r14, %r9
  21126. negq %r13
  21127. negq %r14
  21128. addq $0x180, %r15
  21129. movq (%r10), %rax
  21130. movq (%r11), %rcx
  21131. pextq %r14, %rax, %rax
  21132. pextq %r13, %rcx, %rcx
  21133. addq %rcx, %rax
  21134. movq 8(%r10), %rcx
  21135. movq 8(%r11), %r8
  21136. pextq %r14, %rcx, %rcx
  21137. pextq %r13, %r8, %r8
  21138. movq %rax, (%r15)
  21139. adcq %r8, %rcx
  21140. movq 16(%r10), %r8
  21141. movq 16(%r11), %rax
  21142. pextq %r14, %r8, %r8
  21143. pextq %r13, %rax, %rax
  21144. movq %rcx, 8(%r15)
  21145. adcq %rax, %r8
  21146. movq 24(%r10), %rax
  21147. movq 24(%r11), %rcx
  21148. pextq %r14, %rax, %rax
  21149. pextq %r13, %rcx, %rcx
  21150. movq %r8, 16(%r15)
  21151. adcq %rcx, %rax
  21152. movq 32(%r10), %rcx
  21153. movq 32(%r11), %r8
  21154. pextq %r14, %rcx, %rcx
  21155. pextq %r13, %r8, %r8
  21156. movq %rax, 24(%r15)
  21157. adcq %r8, %rcx
  21158. movq 40(%r10), %r8
  21159. movq 40(%r11), %rax
  21160. pextq %r14, %r8, %r8
  21161. pextq %r13, %rax, %rax
  21162. movq %rcx, 32(%r15)
  21163. adcq %rax, %r8
  21164. movq 48(%r10), %rax
  21165. movq 48(%r11), %rcx
  21166. pextq %r14, %rax, %rax
  21167. pextq %r13, %rcx, %rcx
  21168. movq %r8, 40(%r15)
  21169. adcq %rcx, %rax
  21170. movq 56(%r10), %rcx
  21171. movq 56(%r11), %r8
  21172. pextq %r14, %rcx, %rcx
  21173. pextq %r13, %r8, %r8
  21174. movq %rax, 48(%r15)
  21175. adcq %r8, %rcx
  21176. movq 64(%r10), %r8
  21177. movq 64(%r11), %rax
  21178. pextq %r14, %r8, %r8
  21179. pextq %r13, %rax, %rax
  21180. movq %rcx, 56(%r15)
  21181. adcq %rax, %r8
  21182. movq 72(%r10), %rax
  21183. movq 72(%r11), %rcx
  21184. pextq %r14, %rax, %rax
  21185. pextq %r13, %rcx, %rcx
  21186. movq %r8, 64(%r15)
  21187. adcq %rcx, %rax
  21188. movq 80(%r10), %rcx
  21189. movq 80(%r11), %r8
  21190. pextq %r14, %rcx, %rcx
  21191. pextq %r13, %r8, %r8
  21192. movq %rax, 72(%r15)
  21193. adcq %r8, %rcx
  21194. movq 88(%r10), %r8
  21195. movq 88(%r11), %rax
  21196. pextq %r14, %r8, %r8
  21197. pextq %r13, %rax, %rax
  21198. movq %rcx, 80(%r15)
  21199. adcq %rax, %r8
  21200. movq 96(%r10), %rax
  21201. movq 96(%r11), %rcx
  21202. pextq %r14, %rax, %rax
  21203. pextq %r13, %rcx, %rcx
  21204. movq %r8, 88(%r15)
  21205. adcq %rcx, %rax
  21206. movq 104(%r10), %rcx
  21207. movq 104(%r11), %r8
  21208. pextq %r14, %rcx, %rcx
  21209. pextq %r13, %r8, %r8
  21210. movq %rax, 96(%r15)
  21211. adcq %r8, %rcx
  21212. movq 112(%r10), %r8
  21213. movq 112(%r11), %rax
  21214. pextq %r14, %r8, %r8
  21215. pextq %r13, %rax, %rax
  21216. movq %rcx, 104(%r15)
  21217. adcq %rax, %r8
  21218. movq 120(%r10), %rax
  21219. movq 120(%r11), %rcx
  21220. pextq %r14, %rax, %rax
  21221. pextq %r13, %rcx, %rcx
  21222. movq %r8, 112(%r15)
  21223. adcq %rcx, %rax
  21224. movq 128(%r10), %rcx
  21225. movq 128(%r11), %r8
  21226. pextq %r14, %rcx, %rcx
  21227. pextq %r13, %r8, %r8
  21228. movq %rax, 120(%r15)
  21229. adcq %r8, %rcx
  21230. movq 136(%r10), %r8
  21231. movq 136(%r11), %rax
  21232. pextq %r14, %r8, %r8
  21233. pextq %r13, %rax, %rax
  21234. movq %rcx, 128(%r15)
  21235. adcq %rax, %r8
  21236. movq 144(%r10), %rax
  21237. movq 144(%r11), %rcx
  21238. pextq %r14, %rax, %rax
  21239. pextq %r13, %rcx, %rcx
  21240. movq %r8, 136(%r15)
  21241. adcq %rcx, %rax
  21242. movq 152(%r10), %rcx
  21243. movq 152(%r11), %r8
  21244. pextq %r14, %rcx, %rcx
  21245. pextq %r13, %r8, %r8
  21246. movq %rax, 144(%r15)
  21247. adcq %r8, %rcx
  21248. movq 160(%r10), %r8
  21249. movq 160(%r11), %rax
  21250. pextq %r14, %r8, %r8
  21251. pextq %r13, %rax, %rax
  21252. movq %rcx, 152(%r15)
  21253. adcq %rax, %r8
  21254. movq 168(%r10), %rax
  21255. movq 168(%r11), %rcx
  21256. pextq %r14, %rax, %rax
  21257. pextq %r13, %rcx, %rcx
  21258. movq %r8, 160(%r15)
  21259. adcq %rcx, %rax
  21260. movq 176(%r10), %rcx
  21261. movq 176(%r11), %r8
  21262. pextq %r14, %rcx, %rcx
  21263. pextq %r13, %r8, %r8
  21264. movq %rax, 168(%r15)
  21265. adcq %r8, %rcx
  21266. movq 184(%r10), %r8
  21267. movq 184(%r11), %rax
  21268. pextq %r14, %r8, %r8
  21269. pextq %r13, %rax, %rax
  21270. movq %rcx, 176(%r15)
  21271. adcq %rax, %r8
  21272. movq %r8, 184(%r15)
  21273. adcq $0x00, %r9
  21274. leaq 384(%rsp), %r11
  21275. movq %rsp, %r10
  21276. movq (%r10), %rax
  21277. subq (%r11), %rax
  21278. movq 8(%r10), %rcx
  21279. movq %rax, (%r10)
  21280. sbbq 8(%r11), %rcx
  21281. movq 16(%r10), %r8
  21282. movq %rcx, 8(%r10)
  21283. sbbq 16(%r11), %r8
  21284. movq 24(%r10), %rax
  21285. movq %r8, 16(%r10)
  21286. sbbq 24(%r11), %rax
  21287. movq 32(%r10), %rcx
  21288. movq %rax, 24(%r10)
  21289. sbbq 32(%r11), %rcx
  21290. movq 40(%r10), %r8
  21291. movq %rcx, 32(%r10)
  21292. sbbq 40(%r11), %r8
  21293. movq 48(%r10), %rax
  21294. movq %r8, 40(%r10)
  21295. sbbq 48(%r11), %rax
  21296. movq 56(%r10), %rcx
  21297. movq %rax, 48(%r10)
  21298. sbbq 56(%r11), %rcx
  21299. movq 64(%r10), %r8
  21300. movq %rcx, 56(%r10)
  21301. sbbq 64(%r11), %r8
  21302. movq 72(%r10), %rax
  21303. movq %r8, 64(%r10)
  21304. sbbq 72(%r11), %rax
  21305. movq 80(%r10), %rcx
  21306. movq %rax, 72(%r10)
  21307. sbbq 80(%r11), %rcx
  21308. movq 88(%r10), %r8
  21309. movq %rcx, 80(%r10)
  21310. sbbq 88(%r11), %r8
  21311. movq 96(%r10), %rax
  21312. movq %r8, 88(%r10)
  21313. sbbq 96(%r11), %rax
  21314. movq 104(%r10), %rcx
  21315. movq %rax, 96(%r10)
  21316. sbbq 104(%r11), %rcx
  21317. movq 112(%r10), %r8
  21318. movq %rcx, 104(%r10)
  21319. sbbq 112(%r11), %r8
  21320. movq 120(%r10), %rax
  21321. movq %r8, 112(%r10)
  21322. sbbq 120(%r11), %rax
  21323. movq 128(%r10), %rcx
  21324. movq %rax, 120(%r10)
  21325. sbbq 128(%r11), %rcx
  21326. movq 136(%r10), %r8
  21327. movq %rcx, 128(%r10)
  21328. sbbq 136(%r11), %r8
  21329. movq 144(%r10), %rax
  21330. movq %r8, 136(%r10)
  21331. sbbq 144(%r11), %rax
  21332. movq 152(%r10), %rcx
  21333. movq %rax, 144(%r10)
  21334. sbbq 152(%r11), %rcx
  21335. movq 160(%r10), %r8
  21336. movq %rcx, 152(%r10)
  21337. sbbq 160(%r11), %r8
  21338. movq 168(%r10), %rax
  21339. movq %r8, 160(%r10)
  21340. sbbq 168(%r11), %rax
  21341. movq 176(%r10), %rcx
  21342. movq %rax, 168(%r10)
  21343. sbbq 176(%r11), %rcx
  21344. movq 184(%r10), %r8
  21345. movq %rcx, 176(%r10)
  21346. sbbq 184(%r11), %r8
  21347. movq 192(%r10), %rax
  21348. movq %r8, 184(%r10)
  21349. sbbq 192(%r11), %rax
  21350. movq 200(%r10), %rcx
  21351. movq %rax, 192(%r10)
  21352. sbbq 200(%r11), %rcx
  21353. movq 208(%r10), %r8
  21354. movq %rcx, 200(%r10)
  21355. sbbq 208(%r11), %r8
  21356. movq 216(%r10), %rax
  21357. movq %r8, 208(%r10)
  21358. sbbq 216(%r11), %rax
  21359. movq 224(%r10), %rcx
  21360. movq %rax, 216(%r10)
  21361. sbbq 224(%r11), %rcx
  21362. movq 232(%r10), %r8
  21363. movq %rcx, 224(%r10)
  21364. sbbq 232(%r11), %r8
  21365. movq 240(%r10), %rax
  21366. movq %r8, 232(%r10)
  21367. sbbq 240(%r11), %rax
  21368. movq 248(%r10), %rcx
  21369. movq %rax, 240(%r10)
  21370. sbbq 248(%r11), %rcx
  21371. movq 256(%r10), %r8
  21372. movq %rcx, 248(%r10)
  21373. sbbq 256(%r11), %r8
  21374. movq 264(%r10), %rax
  21375. movq %r8, 256(%r10)
  21376. sbbq 264(%r11), %rax
  21377. movq 272(%r10), %rcx
  21378. movq %rax, 264(%r10)
  21379. sbbq 272(%r11), %rcx
  21380. movq 280(%r10), %r8
  21381. movq %rcx, 272(%r10)
  21382. sbbq 280(%r11), %r8
  21383. movq 288(%r10), %rax
  21384. movq %r8, 280(%r10)
  21385. sbbq 288(%r11), %rax
  21386. movq 296(%r10), %rcx
  21387. movq %rax, 288(%r10)
  21388. sbbq 296(%r11), %rcx
  21389. movq 304(%r10), %r8
  21390. movq %rcx, 296(%r10)
  21391. sbbq 304(%r11), %r8
  21392. movq 312(%r10), %rax
  21393. movq %r8, 304(%r10)
  21394. sbbq 312(%r11), %rax
  21395. movq 320(%r10), %rcx
  21396. movq %rax, 312(%r10)
  21397. sbbq 320(%r11), %rcx
  21398. movq 328(%r10), %r8
  21399. movq %rcx, 320(%r10)
  21400. sbbq 328(%r11), %r8
  21401. movq 336(%r10), %rax
  21402. movq %r8, 328(%r10)
  21403. sbbq 336(%r11), %rax
  21404. movq 344(%r10), %rcx
  21405. movq %rax, 336(%r10)
  21406. sbbq 344(%r11), %rcx
  21407. movq 352(%r10), %r8
  21408. movq %rcx, 344(%r10)
  21409. sbbq 352(%r11), %r8
  21410. movq 360(%r10), %rax
  21411. movq %r8, 352(%r10)
  21412. sbbq 360(%r11), %rax
  21413. movq 368(%r10), %rcx
  21414. movq %rax, 360(%r10)
  21415. sbbq 368(%r11), %rcx
  21416. movq 376(%r10), %r8
  21417. movq %rcx, 368(%r10)
  21418. sbbq 376(%r11), %r8
  21419. movq %r8, 376(%r10)
  21420. sbbq $0x00, %r9
  21421. movq (%r10), %rax
  21422. subq (%rdi), %rax
  21423. movq 8(%r10), %rcx
  21424. movq %rax, (%r10)
  21425. sbbq 8(%rdi), %rcx
  21426. movq 16(%r10), %r8
  21427. movq %rcx, 8(%r10)
  21428. sbbq 16(%rdi), %r8
  21429. movq 24(%r10), %rax
  21430. movq %r8, 16(%r10)
  21431. sbbq 24(%rdi), %rax
  21432. movq 32(%r10), %rcx
  21433. movq %rax, 24(%r10)
  21434. sbbq 32(%rdi), %rcx
  21435. movq 40(%r10), %r8
  21436. movq %rcx, 32(%r10)
  21437. sbbq 40(%rdi), %r8
  21438. movq 48(%r10), %rax
  21439. movq %r8, 40(%r10)
  21440. sbbq 48(%rdi), %rax
  21441. movq 56(%r10), %rcx
  21442. movq %rax, 48(%r10)
  21443. sbbq 56(%rdi), %rcx
  21444. movq 64(%r10), %r8
  21445. movq %rcx, 56(%r10)
  21446. sbbq 64(%rdi), %r8
  21447. movq 72(%r10), %rax
  21448. movq %r8, 64(%r10)
  21449. sbbq 72(%rdi), %rax
  21450. movq 80(%r10), %rcx
  21451. movq %rax, 72(%r10)
  21452. sbbq 80(%rdi), %rcx
  21453. movq 88(%r10), %r8
  21454. movq %rcx, 80(%r10)
  21455. sbbq 88(%rdi), %r8
  21456. movq 96(%r10), %rax
  21457. movq %r8, 88(%r10)
  21458. sbbq 96(%rdi), %rax
  21459. movq 104(%r10), %rcx
  21460. movq %rax, 96(%r10)
  21461. sbbq 104(%rdi), %rcx
  21462. movq 112(%r10), %r8
  21463. movq %rcx, 104(%r10)
  21464. sbbq 112(%rdi), %r8
  21465. movq 120(%r10), %rax
  21466. movq %r8, 112(%r10)
  21467. sbbq 120(%rdi), %rax
  21468. movq 128(%r10), %rcx
  21469. movq %rax, 120(%r10)
  21470. sbbq 128(%rdi), %rcx
  21471. movq 136(%r10), %r8
  21472. movq %rcx, 128(%r10)
  21473. sbbq 136(%rdi), %r8
  21474. movq 144(%r10), %rax
  21475. movq %r8, 136(%r10)
  21476. sbbq 144(%rdi), %rax
  21477. movq 152(%r10), %rcx
  21478. movq %rax, 144(%r10)
  21479. sbbq 152(%rdi), %rcx
  21480. movq 160(%r10), %r8
  21481. movq %rcx, 152(%r10)
  21482. sbbq 160(%rdi), %r8
  21483. movq 168(%r10), %rax
  21484. movq %r8, 160(%r10)
  21485. sbbq 168(%rdi), %rax
  21486. movq 176(%r10), %rcx
  21487. movq %rax, 168(%r10)
  21488. sbbq 176(%rdi), %rcx
  21489. movq 184(%r10), %r8
  21490. movq %rcx, 176(%r10)
  21491. sbbq 184(%rdi), %r8
  21492. movq 192(%r10), %rax
  21493. movq %r8, 184(%r10)
  21494. sbbq 192(%rdi), %rax
  21495. movq 200(%r10), %rcx
  21496. movq %rax, 192(%r10)
  21497. sbbq 200(%rdi), %rcx
  21498. movq 208(%r10), %r8
  21499. movq %rcx, 200(%r10)
  21500. sbbq 208(%rdi), %r8
  21501. movq 216(%r10), %rax
  21502. movq %r8, 208(%r10)
  21503. sbbq 216(%rdi), %rax
  21504. movq 224(%r10), %rcx
  21505. movq %rax, 216(%r10)
  21506. sbbq 224(%rdi), %rcx
  21507. movq 232(%r10), %r8
  21508. movq %rcx, 224(%r10)
  21509. sbbq 232(%rdi), %r8
  21510. movq 240(%r10), %rax
  21511. movq %r8, 232(%r10)
  21512. sbbq 240(%rdi), %rax
  21513. movq 248(%r10), %rcx
  21514. movq %rax, 240(%r10)
  21515. sbbq 248(%rdi), %rcx
  21516. movq 256(%r10), %r8
  21517. movq %rcx, 248(%r10)
  21518. sbbq 256(%rdi), %r8
  21519. movq 264(%r10), %rax
  21520. movq %r8, 256(%r10)
  21521. sbbq 264(%rdi), %rax
  21522. movq 272(%r10), %rcx
  21523. movq %rax, 264(%r10)
  21524. sbbq 272(%rdi), %rcx
  21525. movq 280(%r10), %r8
  21526. movq %rcx, 272(%r10)
  21527. sbbq 280(%rdi), %r8
  21528. movq 288(%r10), %rax
  21529. movq %r8, 280(%r10)
  21530. sbbq 288(%rdi), %rax
  21531. movq 296(%r10), %rcx
  21532. movq %rax, 288(%r10)
  21533. sbbq 296(%rdi), %rcx
  21534. movq 304(%r10), %r8
  21535. movq %rcx, 296(%r10)
  21536. sbbq 304(%rdi), %r8
  21537. movq 312(%r10), %rax
  21538. movq %r8, 304(%r10)
  21539. sbbq 312(%rdi), %rax
  21540. movq 320(%r10), %rcx
  21541. movq %rax, 312(%r10)
  21542. sbbq 320(%rdi), %rcx
  21543. movq 328(%r10), %r8
  21544. movq %rcx, 320(%r10)
  21545. sbbq 328(%rdi), %r8
  21546. movq 336(%r10), %rax
  21547. movq %r8, 328(%r10)
  21548. sbbq 336(%rdi), %rax
  21549. movq 344(%r10), %rcx
  21550. movq %rax, 336(%r10)
  21551. sbbq 344(%rdi), %rcx
  21552. movq 352(%r10), %r8
  21553. movq %rcx, 344(%r10)
  21554. sbbq 352(%rdi), %r8
  21555. movq 360(%r10), %rax
  21556. movq %r8, 352(%r10)
  21557. sbbq 360(%rdi), %rax
  21558. movq 368(%r10), %rcx
  21559. movq %rax, 360(%r10)
  21560. sbbq 368(%rdi), %rcx
  21561. movq 376(%r10), %r8
  21562. movq %rcx, 368(%r10)
  21563. sbbq 376(%rdi), %r8
  21564. movq %r8, 376(%r10)
  21565. sbbq $0x00, %r9
  21566. subq $0xc0, %r15
  21567. # Add
  21568. movq (%r15), %rax
  21569. addq (%r10), %rax
  21570. movq 8(%r15), %rcx
  21571. movq %rax, (%r15)
  21572. adcq 8(%r10), %rcx
  21573. movq 16(%r15), %r8
  21574. movq %rcx, 8(%r15)
  21575. adcq 16(%r10), %r8
  21576. movq 24(%r15), %rax
  21577. movq %r8, 16(%r15)
  21578. adcq 24(%r10), %rax
  21579. movq 32(%r15), %rcx
  21580. movq %rax, 24(%r15)
  21581. adcq 32(%r10), %rcx
  21582. movq 40(%r15), %r8
  21583. movq %rcx, 32(%r15)
  21584. adcq 40(%r10), %r8
  21585. movq 48(%r15), %rax
  21586. movq %r8, 40(%r15)
  21587. adcq 48(%r10), %rax
  21588. movq 56(%r15), %rcx
  21589. movq %rax, 48(%r15)
  21590. adcq 56(%r10), %rcx
  21591. movq 64(%r15), %r8
  21592. movq %rcx, 56(%r15)
  21593. adcq 64(%r10), %r8
  21594. movq 72(%r15), %rax
  21595. movq %r8, 64(%r15)
  21596. adcq 72(%r10), %rax
  21597. movq 80(%r15), %rcx
  21598. movq %rax, 72(%r15)
  21599. adcq 80(%r10), %rcx
  21600. movq 88(%r15), %r8
  21601. movq %rcx, 80(%r15)
  21602. adcq 88(%r10), %r8
  21603. movq 96(%r15), %rax
  21604. movq %r8, 88(%r15)
  21605. adcq 96(%r10), %rax
  21606. movq 104(%r15), %rcx
  21607. movq %rax, 96(%r15)
  21608. adcq 104(%r10), %rcx
  21609. movq 112(%r15), %r8
  21610. movq %rcx, 104(%r15)
  21611. adcq 112(%r10), %r8
  21612. movq 120(%r15), %rax
  21613. movq %r8, 112(%r15)
  21614. adcq 120(%r10), %rax
  21615. movq 128(%r15), %rcx
  21616. movq %rax, 120(%r15)
  21617. adcq 128(%r10), %rcx
  21618. movq 136(%r15), %r8
  21619. movq %rcx, 128(%r15)
  21620. adcq 136(%r10), %r8
  21621. movq 144(%r15), %rax
  21622. movq %r8, 136(%r15)
  21623. adcq 144(%r10), %rax
  21624. movq 152(%r15), %rcx
  21625. movq %rax, 144(%r15)
  21626. adcq 152(%r10), %rcx
  21627. movq 160(%r15), %r8
  21628. movq %rcx, 152(%r15)
  21629. adcq 160(%r10), %r8
  21630. movq 168(%r15), %rax
  21631. movq %r8, 160(%r15)
  21632. adcq 168(%r10), %rax
  21633. movq 176(%r15), %rcx
  21634. movq %rax, 168(%r15)
  21635. adcq 176(%r10), %rcx
  21636. movq 184(%r15), %r8
  21637. movq %rcx, 176(%r15)
  21638. adcq 184(%r10), %r8
  21639. movq 192(%r15), %rax
  21640. movq %r8, 184(%r15)
  21641. adcq 192(%r10), %rax
  21642. movq 200(%r15), %rcx
  21643. movq %rax, 192(%r15)
  21644. adcq 200(%r10), %rcx
  21645. movq 208(%r15), %r8
  21646. movq %rcx, 200(%r15)
  21647. adcq 208(%r10), %r8
  21648. movq 216(%r15), %rax
  21649. movq %r8, 208(%r15)
  21650. adcq 216(%r10), %rax
  21651. movq 224(%r15), %rcx
  21652. movq %rax, 216(%r15)
  21653. adcq 224(%r10), %rcx
  21654. movq 232(%r15), %r8
  21655. movq %rcx, 224(%r15)
  21656. adcq 232(%r10), %r8
  21657. movq 240(%r15), %rax
  21658. movq %r8, 232(%r15)
  21659. adcq 240(%r10), %rax
  21660. movq 248(%r15), %rcx
  21661. movq %rax, 240(%r15)
  21662. adcq 248(%r10), %rcx
  21663. movq 256(%r15), %r8
  21664. movq %rcx, 248(%r15)
  21665. adcq 256(%r10), %r8
  21666. movq 264(%r15), %rax
  21667. movq %r8, 256(%r15)
  21668. adcq 264(%r10), %rax
  21669. movq 272(%r15), %rcx
  21670. movq %rax, 264(%r15)
  21671. adcq 272(%r10), %rcx
  21672. movq 280(%r15), %r8
  21673. movq %rcx, 272(%r15)
  21674. adcq 280(%r10), %r8
  21675. movq 288(%r15), %rax
  21676. movq %r8, 280(%r15)
  21677. adcq 288(%r10), %rax
  21678. movq 296(%r15), %rcx
  21679. movq %rax, 288(%r15)
  21680. adcq 296(%r10), %rcx
  21681. movq 304(%r15), %r8
  21682. movq %rcx, 296(%r15)
  21683. adcq 304(%r10), %r8
  21684. movq 312(%r15), %rax
  21685. movq %r8, 304(%r15)
  21686. adcq 312(%r10), %rax
  21687. movq 320(%r15), %rcx
  21688. movq %rax, 312(%r15)
  21689. adcq 320(%r10), %rcx
  21690. movq 328(%r15), %r8
  21691. movq %rcx, 320(%r15)
  21692. adcq 328(%r10), %r8
  21693. movq 336(%r15), %rax
  21694. movq %r8, 328(%r15)
  21695. adcq 336(%r10), %rax
  21696. movq 344(%r15), %rcx
  21697. movq %rax, 336(%r15)
  21698. adcq 344(%r10), %rcx
  21699. movq 352(%r15), %r8
  21700. movq %rcx, 344(%r15)
  21701. adcq 352(%r10), %r8
  21702. movq 360(%r15), %rax
  21703. movq %r8, 352(%r15)
  21704. adcq 360(%r10), %rax
  21705. movq 368(%r15), %rcx
  21706. movq %rax, 360(%r15)
  21707. adcq 368(%r10), %rcx
  21708. movq 376(%r15), %r8
  21709. movq %rcx, 368(%r15)
  21710. adcq 376(%r10), %r8
  21711. movq %r8, 376(%r15)
  21712. adcq $0x00, %r9
  21713. movq %r9, 576(%rdi)
  21714. addq $0xc0, %r15
  21715. # Add
  21716. movq (%r15), %rax
  21717. addq (%r11), %rax
  21718. movq 8(%r15), %rcx
  21719. movq %rax, (%r15)
  21720. adcq 8(%r11), %rcx
  21721. movq 16(%r15), %r8
  21722. movq %rcx, 8(%r15)
  21723. adcq 16(%r11), %r8
  21724. movq 24(%r15), %rax
  21725. movq %r8, 16(%r15)
  21726. adcq 24(%r11), %rax
  21727. movq 32(%r15), %rcx
  21728. movq %rax, 24(%r15)
  21729. adcq 32(%r11), %rcx
  21730. movq 40(%r15), %r8
  21731. movq %rcx, 32(%r15)
  21732. adcq 40(%r11), %r8
  21733. movq 48(%r15), %rax
  21734. movq %r8, 40(%r15)
  21735. adcq 48(%r11), %rax
  21736. movq 56(%r15), %rcx
  21737. movq %rax, 48(%r15)
  21738. adcq 56(%r11), %rcx
  21739. movq 64(%r15), %r8
  21740. movq %rcx, 56(%r15)
  21741. adcq 64(%r11), %r8
  21742. movq 72(%r15), %rax
  21743. movq %r8, 64(%r15)
  21744. adcq 72(%r11), %rax
  21745. movq 80(%r15), %rcx
  21746. movq %rax, 72(%r15)
  21747. adcq 80(%r11), %rcx
  21748. movq 88(%r15), %r8
  21749. movq %rcx, 80(%r15)
  21750. adcq 88(%r11), %r8
  21751. movq 96(%r15), %rax
  21752. movq %r8, 88(%r15)
  21753. adcq 96(%r11), %rax
  21754. movq 104(%r15), %rcx
  21755. movq %rax, 96(%r15)
  21756. adcq 104(%r11), %rcx
  21757. movq 112(%r15), %r8
  21758. movq %rcx, 104(%r15)
  21759. adcq 112(%r11), %r8
  21760. movq 120(%r15), %rax
  21761. movq %r8, 112(%r15)
  21762. adcq 120(%r11), %rax
  21763. movq 128(%r15), %rcx
  21764. movq %rax, 120(%r15)
  21765. adcq 128(%r11), %rcx
  21766. movq 136(%r15), %r8
  21767. movq %rcx, 128(%r15)
  21768. adcq 136(%r11), %r8
  21769. movq 144(%r15), %rax
  21770. movq %r8, 136(%r15)
  21771. adcq 144(%r11), %rax
  21772. movq 152(%r15), %rcx
  21773. movq %rax, 144(%r15)
  21774. adcq 152(%r11), %rcx
  21775. movq 160(%r15), %r8
  21776. movq %rcx, 152(%r15)
  21777. adcq 160(%r11), %r8
  21778. movq 168(%r15), %rax
  21779. movq %r8, 160(%r15)
  21780. adcq 168(%r11), %rax
  21781. movq 176(%r15), %rcx
  21782. movq %rax, 168(%r15)
  21783. adcq 176(%r11), %rcx
  21784. movq 184(%r15), %r8
  21785. movq %rcx, 176(%r15)
  21786. adcq 184(%r11), %r8
  21787. movq 192(%r15), %rax
  21788. movq %r8, 184(%r15)
  21789. adcq 192(%r11), %rax
  21790. movq %rax, 192(%r15)
  21791. # Add to zero
  21792. movq 200(%r11), %rax
  21793. adcq $0x00, %rax
  21794. movq 208(%r11), %rcx
  21795. movq %rax, 200(%r15)
  21796. adcq $0x00, %rcx
  21797. movq 216(%r11), %r8
  21798. movq %rcx, 208(%r15)
  21799. adcq $0x00, %r8
  21800. movq 224(%r11), %rax
  21801. movq %r8, 216(%r15)
  21802. adcq $0x00, %rax
  21803. movq 232(%r11), %rcx
  21804. movq %rax, 224(%r15)
  21805. adcq $0x00, %rcx
  21806. movq 240(%r11), %r8
  21807. movq %rcx, 232(%r15)
  21808. adcq $0x00, %r8
  21809. movq 248(%r11), %rax
  21810. movq %r8, 240(%r15)
  21811. adcq $0x00, %rax
  21812. movq 256(%r11), %rcx
  21813. movq %rax, 248(%r15)
  21814. adcq $0x00, %rcx
  21815. movq 264(%r11), %r8
  21816. movq %rcx, 256(%r15)
  21817. adcq $0x00, %r8
  21818. movq 272(%r11), %rax
  21819. movq %r8, 264(%r15)
  21820. adcq $0x00, %rax
  21821. movq 280(%r11), %rcx
  21822. movq %rax, 272(%r15)
  21823. adcq $0x00, %rcx
  21824. movq 288(%r11), %r8
  21825. movq %rcx, 280(%r15)
  21826. adcq $0x00, %r8
  21827. movq 296(%r11), %rax
  21828. movq %r8, 288(%r15)
  21829. adcq $0x00, %rax
  21830. movq 304(%r11), %rcx
  21831. movq %rax, 296(%r15)
  21832. adcq $0x00, %rcx
  21833. movq 312(%r11), %r8
  21834. movq %rcx, 304(%r15)
  21835. adcq $0x00, %r8
  21836. movq 320(%r11), %rax
  21837. movq %r8, 312(%r15)
  21838. adcq $0x00, %rax
  21839. movq 328(%r11), %rcx
  21840. movq %rax, 320(%r15)
  21841. adcq $0x00, %rcx
  21842. movq 336(%r11), %r8
  21843. movq %rcx, 328(%r15)
  21844. adcq $0x00, %r8
  21845. movq 344(%r11), %rax
  21846. movq %r8, 336(%r15)
  21847. adcq $0x00, %rax
  21848. movq 352(%r11), %rcx
  21849. movq %rax, 344(%r15)
  21850. adcq $0x00, %rcx
  21851. movq 360(%r11), %r8
  21852. movq %rcx, 352(%r15)
  21853. adcq $0x00, %r8
  21854. movq 368(%r11), %rax
  21855. movq %r8, 360(%r15)
  21856. adcq $0x00, %rax
  21857. movq 376(%r11), %rcx
  21858. movq %rax, 368(%r15)
  21859. adcq $0x00, %rcx
  21860. movq %rcx, 376(%r15)
  21861. addq $0x4a8, %rsp
  21862. popq %r15
  21863. popq %r14
  21864. popq %r13
  21865. popq %r12
  21866. repz retq
  21867. #ifndef __APPLE__
  21868. .size sp_3072_mul_avx2_48,.-sp_3072_mul_avx2_48
  21869. #endif /* __APPLE__ */
  21870. /* Square a and put result in r. (r = a * a)
  21871. *
  21872. * r A single precision integer.
  21873. * a A single precision integer.
  21874. */
  21875. #ifndef __APPLE__
  21876. .text
  21877. .globl sp_3072_sqr_avx2_48
  21878. .type sp_3072_sqr_avx2_48,@function
  21879. .align 16
  21880. sp_3072_sqr_avx2_48:
  21881. #else
  21882. .section __TEXT,__text
  21883. .globl _sp_3072_sqr_avx2_48
  21884. .p2align 4
  21885. _sp_3072_sqr_avx2_48:
  21886. #endif /* __APPLE__ */
  21887. subq $0x3d8, %rsp
  21888. movq %rdi, 960(%rsp)
  21889. movq %rsi, 968(%rsp)
  21890. leaq 768(%rsp), %r8
  21891. leaq 192(%rsi), %r9
  21892. # Add
  21893. movq (%rsi), %rdx
  21894. xorq %rcx, %rcx
  21895. addq (%r9), %rdx
  21896. movq 8(%rsi), %rax
  21897. movq %rdx, (%r8)
  21898. adcq 8(%r9), %rax
  21899. movq 16(%rsi), %rdx
  21900. movq %rax, 8(%r8)
  21901. adcq 16(%r9), %rdx
  21902. movq 24(%rsi), %rax
  21903. movq %rdx, 16(%r8)
  21904. adcq 24(%r9), %rax
  21905. movq 32(%rsi), %rdx
  21906. movq %rax, 24(%r8)
  21907. adcq 32(%r9), %rdx
  21908. movq 40(%rsi), %rax
  21909. movq %rdx, 32(%r8)
  21910. adcq 40(%r9), %rax
  21911. movq 48(%rsi), %rdx
  21912. movq %rax, 40(%r8)
  21913. adcq 48(%r9), %rdx
  21914. movq 56(%rsi), %rax
  21915. movq %rdx, 48(%r8)
  21916. adcq 56(%r9), %rax
  21917. movq 64(%rsi), %rdx
  21918. movq %rax, 56(%r8)
  21919. adcq 64(%r9), %rdx
  21920. movq 72(%rsi), %rax
  21921. movq %rdx, 64(%r8)
  21922. adcq 72(%r9), %rax
  21923. movq 80(%rsi), %rdx
  21924. movq %rax, 72(%r8)
  21925. adcq 80(%r9), %rdx
  21926. movq 88(%rsi), %rax
  21927. movq %rdx, 80(%r8)
  21928. adcq 88(%r9), %rax
  21929. movq 96(%rsi), %rdx
  21930. movq %rax, 88(%r8)
  21931. adcq 96(%r9), %rdx
  21932. movq 104(%rsi), %rax
  21933. movq %rdx, 96(%r8)
  21934. adcq 104(%r9), %rax
  21935. movq 112(%rsi), %rdx
  21936. movq %rax, 104(%r8)
  21937. adcq 112(%r9), %rdx
  21938. movq 120(%rsi), %rax
  21939. movq %rdx, 112(%r8)
  21940. adcq 120(%r9), %rax
  21941. movq 128(%rsi), %rdx
  21942. movq %rax, 120(%r8)
  21943. adcq 128(%r9), %rdx
  21944. movq 136(%rsi), %rax
  21945. movq %rdx, 128(%r8)
  21946. adcq 136(%r9), %rax
  21947. movq 144(%rsi), %rdx
  21948. movq %rax, 136(%r8)
  21949. adcq 144(%r9), %rdx
  21950. movq 152(%rsi), %rax
  21951. movq %rdx, 144(%r8)
  21952. adcq 152(%r9), %rax
  21953. movq 160(%rsi), %rdx
  21954. movq %rax, 152(%r8)
  21955. adcq 160(%r9), %rdx
  21956. movq 168(%rsi), %rax
  21957. movq %rdx, 160(%r8)
  21958. adcq 168(%r9), %rax
  21959. movq 176(%rsi), %rdx
  21960. movq %rax, 168(%r8)
  21961. adcq 176(%r9), %rdx
  21962. movq 184(%rsi), %rax
  21963. movq %rdx, 176(%r8)
  21964. adcq 184(%r9), %rax
  21965. movq %rax, 184(%r8)
  21966. adcq $0x00, %rcx
  21967. movq %rcx, 976(%rsp)
  21968. movq %r8, %rsi
  21969. movq %rsp, %rdi
  21970. #ifndef __APPLE__
  21971. callq sp_3072_sqr_avx2_24@plt
  21972. #else
  21973. callq _sp_3072_sqr_avx2_24
  21974. #endif /* __APPLE__ */
  21975. movq 968(%rsp), %rsi
  21976. leaq 384(%rsp), %rdi
  21977. addq $0xc0, %rsi
  21978. #ifndef __APPLE__
  21979. callq sp_3072_sqr_avx2_24@plt
  21980. #else
  21981. callq _sp_3072_sqr_avx2_24
  21982. #endif /* __APPLE__ */
  21983. movq 968(%rsp), %rsi
  21984. movq 960(%rsp), %rdi
  21985. #ifndef __APPLE__
  21986. callq sp_3072_sqr_avx2_24@plt
  21987. #else
  21988. callq _sp_3072_sqr_avx2_24
  21989. #endif /* __APPLE__ */
  21990. movq 976(%rsp), %r10
  21991. movq %rdi, %r9
  21992. leaq 768(%rsp), %r8
  21993. movq %r10, %rcx
  21994. negq %r10
  21995. addq $0x180, %r9
  21996. movq (%r8), %rdx
  21997. pextq %r10, %rdx, %rdx
  21998. addq %rdx, %rdx
  21999. movq 8(%r8), %rax
  22000. movq %rdx, (%r9)
  22001. pextq %r10, %rax, %rax
  22002. adcq %rax, %rax
  22003. movq 16(%r8), %rdx
  22004. movq %rax, 8(%r9)
  22005. pextq %r10, %rdx, %rdx
  22006. adcq %rdx, %rdx
  22007. movq 24(%r8), %rax
  22008. movq %rdx, 16(%r9)
  22009. pextq %r10, %rax, %rax
  22010. adcq %rax, %rax
  22011. movq 32(%r8), %rdx
  22012. movq %rax, 24(%r9)
  22013. pextq %r10, %rdx, %rdx
  22014. adcq %rdx, %rdx
  22015. movq 40(%r8), %rax
  22016. movq %rdx, 32(%r9)
  22017. pextq %r10, %rax, %rax
  22018. adcq %rax, %rax
  22019. movq 48(%r8), %rdx
  22020. movq %rax, 40(%r9)
  22021. pextq %r10, %rdx, %rdx
  22022. adcq %rdx, %rdx
  22023. movq 56(%r8), %rax
  22024. movq %rdx, 48(%r9)
  22025. pextq %r10, %rax, %rax
  22026. adcq %rax, %rax
  22027. movq 64(%r8), %rdx
  22028. movq %rax, 56(%r9)
  22029. pextq %r10, %rdx, %rdx
  22030. adcq %rdx, %rdx
  22031. movq 72(%r8), %rax
  22032. movq %rdx, 64(%r9)
  22033. pextq %r10, %rax, %rax
  22034. adcq %rax, %rax
  22035. movq 80(%r8), %rdx
  22036. movq %rax, 72(%r9)
  22037. pextq %r10, %rdx, %rdx
  22038. adcq %rdx, %rdx
  22039. movq 88(%r8), %rax
  22040. movq %rdx, 80(%r9)
  22041. pextq %r10, %rax, %rax
  22042. adcq %rax, %rax
  22043. movq 96(%r8), %rdx
  22044. movq %rax, 88(%r9)
  22045. pextq %r10, %rdx, %rdx
  22046. adcq %rdx, %rdx
  22047. movq 104(%r8), %rax
  22048. movq %rdx, 96(%r9)
  22049. pextq %r10, %rax, %rax
  22050. adcq %rax, %rax
  22051. movq 112(%r8), %rdx
  22052. movq %rax, 104(%r9)
  22053. pextq %r10, %rdx, %rdx
  22054. adcq %rdx, %rdx
  22055. movq 120(%r8), %rax
  22056. movq %rdx, 112(%r9)
  22057. pextq %r10, %rax, %rax
  22058. adcq %rax, %rax
  22059. movq 128(%r8), %rdx
  22060. movq %rax, 120(%r9)
  22061. pextq %r10, %rdx, %rdx
  22062. adcq %rdx, %rdx
  22063. movq 136(%r8), %rax
  22064. movq %rdx, 128(%r9)
  22065. pextq %r10, %rax, %rax
  22066. adcq %rax, %rax
  22067. movq 144(%r8), %rdx
  22068. movq %rax, 136(%r9)
  22069. pextq %r10, %rdx, %rdx
  22070. adcq %rdx, %rdx
  22071. movq 152(%r8), %rax
  22072. movq %rdx, 144(%r9)
  22073. pextq %r10, %rax, %rax
  22074. adcq %rax, %rax
  22075. movq 160(%r8), %rdx
  22076. movq %rax, 152(%r9)
  22077. pextq %r10, %rdx, %rdx
  22078. adcq %rdx, %rdx
  22079. movq 168(%r8), %rax
  22080. movq %rdx, 160(%r9)
  22081. pextq %r10, %rax, %rax
  22082. adcq %rax, %rax
  22083. movq 176(%r8), %rdx
  22084. movq %rax, 168(%r9)
  22085. pextq %r10, %rdx, %rdx
  22086. adcq %rdx, %rdx
  22087. movq 184(%r8), %rax
  22088. movq %rdx, 176(%r9)
  22089. pextq %r10, %rax, %rax
  22090. adcq %rax, %rax
  22091. movq %rax, 184(%r9)
  22092. adcq $0x00, %rcx
  22093. leaq 384(%rsp), %rsi
  22094. movq %rsp, %r8
  22095. movq (%r8), %rdx
  22096. subq (%rsi), %rdx
  22097. movq 8(%r8), %rax
  22098. movq %rdx, (%r8)
  22099. sbbq 8(%rsi), %rax
  22100. movq 16(%r8), %rdx
  22101. movq %rax, 8(%r8)
  22102. sbbq 16(%rsi), %rdx
  22103. movq 24(%r8), %rax
  22104. movq %rdx, 16(%r8)
  22105. sbbq 24(%rsi), %rax
  22106. movq 32(%r8), %rdx
  22107. movq %rax, 24(%r8)
  22108. sbbq 32(%rsi), %rdx
  22109. movq 40(%r8), %rax
  22110. movq %rdx, 32(%r8)
  22111. sbbq 40(%rsi), %rax
  22112. movq 48(%r8), %rdx
  22113. movq %rax, 40(%r8)
  22114. sbbq 48(%rsi), %rdx
  22115. movq 56(%r8), %rax
  22116. movq %rdx, 48(%r8)
  22117. sbbq 56(%rsi), %rax
  22118. movq 64(%r8), %rdx
  22119. movq %rax, 56(%r8)
  22120. sbbq 64(%rsi), %rdx
  22121. movq 72(%r8), %rax
  22122. movq %rdx, 64(%r8)
  22123. sbbq 72(%rsi), %rax
  22124. movq 80(%r8), %rdx
  22125. movq %rax, 72(%r8)
  22126. sbbq 80(%rsi), %rdx
  22127. movq 88(%r8), %rax
  22128. movq %rdx, 80(%r8)
  22129. sbbq 88(%rsi), %rax
  22130. movq 96(%r8), %rdx
  22131. movq %rax, 88(%r8)
  22132. sbbq 96(%rsi), %rdx
  22133. movq 104(%r8), %rax
  22134. movq %rdx, 96(%r8)
  22135. sbbq 104(%rsi), %rax
  22136. movq 112(%r8), %rdx
  22137. movq %rax, 104(%r8)
  22138. sbbq 112(%rsi), %rdx
  22139. movq 120(%r8), %rax
  22140. movq %rdx, 112(%r8)
  22141. sbbq 120(%rsi), %rax
  22142. movq 128(%r8), %rdx
  22143. movq %rax, 120(%r8)
  22144. sbbq 128(%rsi), %rdx
  22145. movq 136(%r8), %rax
  22146. movq %rdx, 128(%r8)
  22147. sbbq 136(%rsi), %rax
  22148. movq 144(%r8), %rdx
  22149. movq %rax, 136(%r8)
  22150. sbbq 144(%rsi), %rdx
  22151. movq 152(%r8), %rax
  22152. movq %rdx, 144(%r8)
  22153. sbbq 152(%rsi), %rax
  22154. movq 160(%r8), %rdx
  22155. movq %rax, 152(%r8)
  22156. sbbq 160(%rsi), %rdx
  22157. movq 168(%r8), %rax
  22158. movq %rdx, 160(%r8)
  22159. sbbq 168(%rsi), %rax
  22160. movq 176(%r8), %rdx
  22161. movq %rax, 168(%r8)
  22162. sbbq 176(%rsi), %rdx
  22163. movq 184(%r8), %rax
  22164. movq %rdx, 176(%r8)
  22165. sbbq 184(%rsi), %rax
  22166. movq 192(%r8), %rdx
  22167. movq %rax, 184(%r8)
  22168. sbbq 192(%rsi), %rdx
  22169. movq 200(%r8), %rax
  22170. movq %rdx, 192(%r8)
  22171. sbbq 200(%rsi), %rax
  22172. movq 208(%r8), %rdx
  22173. movq %rax, 200(%r8)
  22174. sbbq 208(%rsi), %rdx
  22175. movq 216(%r8), %rax
  22176. movq %rdx, 208(%r8)
  22177. sbbq 216(%rsi), %rax
  22178. movq 224(%r8), %rdx
  22179. movq %rax, 216(%r8)
  22180. sbbq 224(%rsi), %rdx
  22181. movq 232(%r8), %rax
  22182. movq %rdx, 224(%r8)
  22183. sbbq 232(%rsi), %rax
  22184. movq 240(%r8), %rdx
  22185. movq %rax, 232(%r8)
  22186. sbbq 240(%rsi), %rdx
  22187. movq 248(%r8), %rax
  22188. movq %rdx, 240(%r8)
  22189. sbbq 248(%rsi), %rax
  22190. movq 256(%r8), %rdx
  22191. movq %rax, 248(%r8)
  22192. sbbq 256(%rsi), %rdx
  22193. movq 264(%r8), %rax
  22194. movq %rdx, 256(%r8)
  22195. sbbq 264(%rsi), %rax
  22196. movq 272(%r8), %rdx
  22197. movq %rax, 264(%r8)
  22198. sbbq 272(%rsi), %rdx
  22199. movq 280(%r8), %rax
  22200. movq %rdx, 272(%r8)
  22201. sbbq 280(%rsi), %rax
  22202. movq 288(%r8), %rdx
  22203. movq %rax, 280(%r8)
  22204. sbbq 288(%rsi), %rdx
  22205. movq 296(%r8), %rax
  22206. movq %rdx, 288(%r8)
  22207. sbbq 296(%rsi), %rax
  22208. movq 304(%r8), %rdx
  22209. movq %rax, 296(%r8)
  22210. sbbq 304(%rsi), %rdx
  22211. movq 312(%r8), %rax
  22212. movq %rdx, 304(%r8)
  22213. sbbq 312(%rsi), %rax
  22214. movq 320(%r8), %rdx
  22215. movq %rax, 312(%r8)
  22216. sbbq 320(%rsi), %rdx
  22217. movq 328(%r8), %rax
  22218. movq %rdx, 320(%r8)
  22219. sbbq 328(%rsi), %rax
  22220. movq 336(%r8), %rdx
  22221. movq %rax, 328(%r8)
  22222. sbbq 336(%rsi), %rdx
  22223. movq 344(%r8), %rax
  22224. movq %rdx, 336(%r8)
  22225. sbbq 344(%rsi), %rax
  22226. movq 352(%r8), %rdx
  22227. movq %rax, 344(%r8)
  22228. sbbq 352(%rsi), %rdx
  22229. movq 360(%r8), %rax
  22230. movq %rdx, 352(%r8)
  22231. sbbq 360(%rsi), %rax
  22232. movq 368(%r8), %rdx
  22233. movq %rax, 360(%r8)
  22234. sbbq 368(%rsi), %rdx
  22235. movq 376(%r8), %rax
  22236. movq %rdx, 368(%r8)
  22237. sbbq 376(%rsi), %rax
  22238. movq %rax, 376(%r8)
  22239. sbbq $0x00, %rcx
  22240. movq (%r8), %rdx
  22241. subq (%rdi), %rdx
  22242. movq 8(%r8), %rax
  22243. movq %rdx, (%r8)
  22244. sbbq 8(%rdi), %rax
  22245. movq 16(%r8), %rdx
  22246. movq %rax, 8(%r8)
  22247. sbbq 16(%rdi), %rdx
  22248. movq 24(%r8), %rax
  22249. movq %rdx, 16(%r8)
  22250. sbbq 24(%rdi), %rax
  22251. movq 32(%r8), %rdx
  22252. movq %rax, 24(%r8)
  22253. sbbq 32(%rdi), %rdx
  22254. movq 40(%r8), %rax
  22255. movq %rdx, 32(%r8)
  22256. sbbq 40(%rdi), %rax
  22257. movq 48(%r8), %rdx
  22258. movq %rax, 40(%r8)
  22259. sbbq 48(%rdi), %rdx
  22260. movq 56(%r8), %rax
  22261. movq %rdx, 48(%r8)
  22262. sbbq 56(%rdi), %rax
  22263. movq 64(%r8), %rdx
  22264. movq %rax, 56(%r8)
  22265. sbbq 64(%rdi), %rdx
  22266. movq 72(%r8), %rax
  22267. movq %rdx, 64(%r8)
  22268. sbbq 72(%rdi), %rax
  22269. movq 80(%r8), %rdx
  22270. movq %rax, 72(%r8)
  22271. sbbq 80(%rdi), %rdx
  22272. movq 88(%r8), %rax
  22273. movq %rdx, 80(%r8)
  22274. sbbq 88(%rdi), %rax
  22275. movq 96(%r8), %rdx
  22276. movq %rax, 88(%r8)
  22277. sbbq 96(%rdi), %rdx
  22278. movq 104(%r8), %rax
  22279. movq %rdx, 96(%r8)
  22280. sbbq 104(%rdi), %rax
  22281. movq 112(%r8), %rdx
  22282. movq %rax, 104(%r8)
  22283. sbbq 112(%rdi), %rdx
  22284. movq 120(%r8), %rax
  22285. movq %rdx, 112(%r8)
  22286. sbbq 120(%rdi), %rax
  22287. movq 128(%r8), %rdx
  22288. movq %rax, 120(%r8)
  22289. sbbq 128(%rdi), %rdx
  22290. movq 136(%r8), %rax
  22291. movq %rdx, 128(%r8)
  22292. sbbq 136(%rdi), %rax
  22293. movq 144(%r8), %rdx
  22294. movq %rax, 136(%r8)
  22295. sbbq 144(%rdi), %rdx
  22296. movq 152(%r8), %rax
  22297. movq %rdx, 144(%r8)
  22298. sbbq 152(%rdi), %rax
  22299. movq 160(%r8), %rdx
  22300. movq %rax, 152(%r8)
  22301. sbbq 160(%rdi), %rdx
  22302. movq 168(%r8), %rax
  22303. movq %rdx, 160(%r8)
  22304. sbbq 168(%rdi), %rax
  22305. movq 176(%r8), %rdx
  22306. movq %rax, 168(%r8)
  22307. sbbq 176(%rdi), %rdx
  22308. movq 184(%r8), %rax
  22309. movq %rdx, 176(%r8)
  22310. sbbq 184(%rdi), %rax
  22311. movq 192(%r8), %rdx
  22312. movq %rax, 184(%r8)
  22313. sbbq 192(%rdi), %rdx
  22314. movq 200(%r8), %rax
  22315. movq %rdx, 192(%r8)
  22316. sbbq 200(%rdi), %rax
  22317. movq 208(%r8), %rdx
  22318. movq %rax, 200(%r8)
  22319. sbbq 208(%rdi), %rdx
  22320. movq 216(%r8), %rax
  22321. movq %rdx, 208(%r8)
  22322. sbbq 216(%rdi), %rax
  22323. movq 224(%r8), %rdx
  22324. movq %rax, 216(%r8)
  22325. sbbq 224(%rdi), %rdx
  22326. movq 232(%r8), %rax
  22327. movq %rdx, 224(%r8)
  22328. sbbq 232(%rdi), %rax
  22329. movq 240(%r8), %rdx
  22330. movq %rax, 232(%r8)
  22331. sbbq 240(%rdi), %rdx
  22332. movq 248(%r8), %rax
  22333. movq %rdx, 240(%r8)
  22334. sbbq 248(%rdi), %rax
  22335. movq 256(%r8), %rdx
  22336. movq %rax, 248(%r8)
  22337. sbbq 256(%rdi), %rdx
  22338. movq 264(%r8), %rax
  22339. movq %rdx, 256(%r8)
  22340. sbbq 264(%rdi), %rax
  22341. movq 272(%r8), %rdx
  22342. movq %rax, 264(%r8)
  22343. sbbq 272(%rdi), %rdx
  22344. movq 280(%r8), %rax
  22345. movq %rdx, 272(%r8)
  22346. sbbq 280(%rdi), %rax
  22347. movq 288(%r8), %rdx
  22348. movq %rax, 280(%r8)
  22349. sbbq 288(%rdi), %rdx
  22350. movq 296(%r8), %rax
  22351. movq %rdx, 288(%r8)
  22352. sbbq 296(%rdi), %rax
  22353. movq 304(%r8), %rdx
  22354. movq %rax, 296(%r8)
  22355. sbbq 304(%rdi), %rdx
  22356. movq 312(%r8), %rax
  22357. movq %rdx, 304(%r8)
  22358. sbbq 312(%rdi), %rax
  22359. movq 320(%r8), %rdx
  22360. movq %rax, 312(%r8)
  22361. sbbq 320(%rdi), %rdx
  22362. movq 328(%r8), %rax
  22363. movq %rdx, 320(%r8)
  22364. sbbq 328(%rdi), %rax
  22365. movq 336(%r8), %rdx
  22366. movq %rax, 328(%r8)
  22367. sbbq 336(%rdi), %rdx
  22368. movq 344(%r8), %rax
  22369. movq %rdx, 336(%r8)
  22370. sbbq 344(%rdi), %rax
  22371. movq 352(%r8), %rdx
  22372. movq %rax, 344(%r8)
  22373. sbbq 352(%rdi), %rdx
  22374. movq 360(%r8), %rax
  22375. movq %rdx, 352(%r8)
  22376. sbbq 360(%rdi), %rax
  22377. movq 368(%r8), %rdx
  22378. movq %rax, 360(%r8)
  22379. sbbq 368(%rdi), %rdx
  22380. movq 376(%r8), %rax
  22381. movq %rdx, 368(%r8)
  22382. sbbq 376(%rdi), %rax
  22383. movq %rax, 376(%r8)
  22384. sbbq $0x00, %rcx
  22385. subq $0xc0, %r9
  22386. # Add in place
  22387. movq (%r9), %rdx
  22388. addq (%r8), %rdx
  22389. movq 8(%r9), %rax
  22390. movq %rdx, (%r9)
  22391. adcq 8(%r8), %rax
  22392. movq 16(%r9), %rdx
  22393. movq %rax, 8(%r9)
  22394. adcq 16(%r8), %rdx
  22395. movq 24(%r9), %rax
  22396. movq %rdx, 16(%r9)
  22397. adcq 24(%r8), %rax
  22398. movq 32(%r9), %rdx
  22399. movq %rax, 24(%r9)
  22400. adcq 32(%r8), %rdx
  22401. movq 40(%r9), %rax
  22402. movq %rdx, 32(%r9)
  22403. adcq 40(%r8), %rax
  22404. movq 48(%r9), %rdx
  22405. movq %rax, 40(%r9)
  22406. adcq 48(%r8), %rdx
  22407. movq 56(%r9), %rax
  22408. movq %rdx, 48(%r9)
  22409. adcq 56(%r8), %rax
  22410. movq 64(%r9), %rdx
  22411. movq %rax, 56(%r9)
  22412. adcq 64(%r8), %rdx
  22413. movq 72(%r9), %rax
  22414. movq %rdx, 64(%r9)
  22415. adcq 72(%r8), %rax
  22416. movq 80(%r9), %rdx
  22417. movq %rax, 72(%r9)
  22418. adcq 80(%r8), %rdx
  22419. movq 88(%r9), %rax
  22420. movq %rdx, 80(%r9)
  22421. adcq 88(%r8), %rax
  22422. movq 96(%r9), %rdx
  22423. movq %rax, 88(%r9)
  22424. adcq 96(%r8), %rdx
  22425. movq 104(%r9), %rax
  22426. movq %rdx, 96(%r9)
  22427. adcq 104(%r8), %rax
  22428. movq 112(%r9), %rdx
  22429. movq %rax, 104(%r9)
  22430. adcq 112(%r8), %rdx
  22431. movq 120(%r9), %rax
  22432. movq %rdx, 112(%r9)
  22433. adcq 120(%r8), %rax
  22434. movq 128(%r9), %rdx
  22435. movq %rax, 120(%r9)
  22436. adcq 128(%r8), %rdx
  22437. movq 136(%r9), %rax
  22438. movq %rdx, 128(%r9)
  22439. adcq 136(%r8), %rax
  22440. movq 144(%r9), %rdx
  22441. movq %rax, 136(%r9)
  22442. adcq 144(%r8), %rdx
  22443. movq 152(%r9), %rax
  22444. movq %rdx, 144(%r9)
  22445. adcq 152(%r8), %rax
  22446. movq 160(%r9), %rdx
  22447. movq %rax, 152(%r9)
  22448. adcq 160(%r8), %rdx
  22449. movq 168(%r9), %rax
  22450. movq %rdx, 160(%r9)
  22451. adcq 168(%r8), %rax
  22452. movq 176(%r9), %rdx
  22453. movq %rax, 168(%r9)
  22454. adcq 176(%r8), %rdx
  22455. movq 184(%r9), %rax
  22456. movq %rdx, 176(%r9)
  22457. adcq 184(%r8), %rax
  22458. movq 192(%r9), %rdx
  22459. movq %rax, 184(%r9)
  22460. adcq 192(%r8), %rdx
  22461. movq 200(%r9), %rax
  22462. movq %rdx, 192(%r9)
  22463. adcq 200(%r8), %rax
  22464. movq 208(%r9), %rdx
  22465. movq %rax, 200(%r9)
  22466. adcq 208(%r8), %rdx
  22467. movq 216(%r9), %rax
  22468. movq %rdx, 208(%r9)
  22469. adcq 216(%r8), %rax
  22470. movq 224(%r9), %rdx
  22471. movq %rax, 216(%r9)
  22472. adcq 224(%r8), %rdx
  22473. movq 232(%r9), %rax
  22474. movq %rdx, 224(%r9)
  22475. adcq 232(%r8), %rax
  22476. movq 240(%r9), %rdx
  22477. movq %rax, 232(%r9)
  22478. adcq 240(%r8), %rdx
  22479. movq 248(%r9), %rax
  22480. movq %rdx, 240(%r9)
  22481. adcq 248(%r8), %rax
  22482. movq 256(%r9), %rdx
  22483. movq %rax, 248(%r9)
  22484. adcq 256(%r8), %rdx
  22485. movq 264(%r9), %rax
  22486. movq %rdx, 256(%r9)
  22487. adcq 264(%r8), %rax
  22488. movq 272(%r9), %rdx
  22489. movq %rax, 264(%r9)
  22490. adcq 272(%r8), %rdx
  22491. movq 280(%r9), %rax
  22492. movq %rdx, 272(%r9)
  22493. adcq 280(%r8), %rax
  22494. movq 288(%r9), %rdx
  22495. movq %rax, 280(%r9)
  22496. adcq 288(%r8), %rdx
  22497. movq 296(%r9), %rax
  22498. movq %rdx, 288(%r9)
  22499. adcq 296(%r8), %rax
  22500. movq 304(%r9), %rdx
  22501. movq %rax, 296(%r9)
  22502. adcq 304(%r8), %rdx
  22503. movq 312(%r9), %rax
  22504. movq %rdx, 304(%r9)
  22505. adcq 312(%r8), %rax
  22506. movq 320(%r9), %rdx
  22507. movq %rax, 312(%r9)
  22508. adcq 320(%r8), %rdx
  22509. movq 328(%r9), %rax
  22510. movq %rdx, 320(%r9)
  22511. adcq 328(%r8), %rax
  22512. movq 336(%r9), %rdx
  22513. movq %rax, 328(%r9)
  22514. adcq 336(%r8), %rdx
  22515. movq 344(%r9), %rax
  22516. movq %rdx, 336(%r9)
  22517. adcq 344(%r8), %rax
  22518. movq 352(%r9), %rdx
  22519. movq %rax, 344(%r9)
  22520. adcq 352(%r8), %rdx
  22521. movq 360(%r9), %rax
  22522. movq %rdx, 352(%r9)
  22523. adcq 360(%r8), %rax
  22524. movq 368(%r9), %rdx
  22525. movq %rax, 360(%r9)
  22526. adcq 368(%r8), %rdx
  22527. movq 376(%r9), %rax
  22528. movq %rdx, 368(%r9)
  22529. adcq 376(%r8), %rax
  22530. movq %rax, 376(%r9)
  22531. adcq $0x00, %rcx
  22532. movq %rcx, 576(%rdi)
  22533. # Add in place
  22534. movq 192(%r9), %rdx
  22535. addq (%rsi), %rdx
  22536. movq 200(%r9), %rax
  22537. movq %rdx, 192(%r9)
  22538. adcq 8(%rsi), %rax
  22539. movq 208(%r9), %rdx
  22540. movq %rax, 200(%r9)
  22541. adcq 16(%rsi), %rdx
  22542. movq 216(%r9), %rax
  22543. movq %rdx, 208(%r9)
  22544. adcq 24(%rsi), %rax
  22545. movq 224(%r9), %rdx
  22546. movq %rax, 216(%r9)
  22547. adcq 32(%rsi), %rdx
  22548. movq 232(%r9), %rax
  22549. movq %rdx, 224(%r9)
  22550. adcq 40(%rsi), %rax
  22551. movq 240(%r9), %rdx
  22552. movq %rax, 232(%r9)
  22553. adcq 48(%rsi), %rdx
  22554. movq 248(%r9), %rax
  22555. movq %rdx, 240(%r9)
  22556. adcq 56(%rsi), %rax
  22557. movq 256(%r9), %rdx
  22558. movq %rax, 248(%r9)
  22559. adcq 64(%rsi), %rdx
  22560. movq 264(%r9), %rax
  22561. movq %rdx, 256(%r9)
  22562. adcq 72(%rsi), %rax
  22563. movq 272(%r9), %rdx
  22564. movq %rax, 264(%r9)
  22565. adcq 80(%rsi), %rdx
  22566. movq 280(%r9), %rax
  22567. movq %rdx, 272(%r9)
  22568. adcq 88(%rsi), %rax
  22569. movq 288(%r9), %rdx
  22570. movq %rax, 280(%r9)
  22571. adcq 96(%rsi), %rdx
  22572. movq 296(%r9), %rax
  22573. movq %rdx, 288(%r9)
  22574. adcq 104(%rsi), %rax
  22575. movq 304(%r9), %rdx
  22576. movq %rax, 296(%r9)
  22577. adcq 112(%rsi), %rdx
  22578. movq 312(%r9), %rax
  22579. movq %rdx, 304(%r9)
  22580. adcq 120(%rsi), %rax
  22581. movq 320(%r9), %rdx
  22582. movq %rax, 312(%r9)
  22583. adcq 128(%rsi), %rdx
  22584. movq 328(%r9), %rax
  22585. movq %rdx, 320(%r9)
  22586. adcq 136(%rsi), %rax
  22587. movq 336(%r9), %rdx
  22588. movq %rax, 328(%r9)
  22589. adcq 144(%rsi), %rdx
  22590. movq 344(%r9), %rax
  22591. movq %rdx, 336(%r9)
  22592. adcq 152(%rsi), %rax
  22593. movq 352(%r9), %rdx
  22594. movq %rax, 344(%r9)
  22595. adcq 160(%rsi), %rdx
  22596. movq 360(%r9), %rax
  22597. movq %rdx, 352(%r9)
  22598. adcq 168(%rsi), %rax
  22599. movq 368(%r9), %rdx
  22600. movq %rax, 360(%r9)
  22601. adcq 176(%rsi), %rdx
  22602. movq 376(%r9), %rax
  22603. movq %rdx, 368(%r9)
  22604. adcq 184(%rsi), %rax
  22605. movq 384(%r9), %rdx
  22606. movq %rax, 376(%r9)
  22607. adcq 192(%rsi), %rdx
  22608. movq %rdx, 384(%r9)
  22609. # Add to zero
  22610. movq 200(%rsi), %rdx
  22611. adcq $0x00, %rdx
  22612. movq 208(%rsi), %rax
  22613. movq %rdx, 392(%r9)
  22614. adcq $0x00, %rax
  22615. movq 216(%rsi), %rdx
  22616. movq %rax, 400(%r9)
  22617. adcq $0x00, %rdx
  22618. movq 224(%rsi), %rax
  22619. movq %rdx, 408(%r9)
  22620. adcq $0x00, %rax
  22621. movq 232(%rsi), %rdx
  22622. movq %rax, 416(%r9)
  22623. adcq $0x00, %rdx
  22624. movq 240(%rsi), %rax
  22625. movq %rdx, 424(%r9)
  22626. adcq $0x00, %rax
  22627. movq 248(%rsi), %rdx
  22628. movq %rax, 432(%r9)
  22629. adcq $0x00, %rdx
  22630. movq 256(%rsi), %rax
  22631. movq %rdx, 440(%r9)
  22632. adcq $0x00, %rax
  22633. movq 264(%rsi), %rdx
  22634. movq %rax, 448(%r9)
  22635. adcq $0x00, %rdx
  22636. movq 272(%rsi), %rax
  22637. movq %rdx, 456(%r9)
  22638. adcq $0x00, %rax
  22639. movq 280(%rsi), %rdx
  22640. movq %rax, 464(%r9)
  22641. adcq $0x00, %rdx
  22642. movq 288(%rsi), %rax
  22643. movq %rdx, 472(%r9)
  22644. adcq $0x00, %rax
  22645. movq 296(%rsi), %rdx
  22646. movq %rax, 480(%r9)
  22647. adcq $0x00, %rdx
  22648. movq 304(%rsi), %rax
  22649. movq %rdx, 488(%r9)
  22650. adcq $0x00, %rax
  22651. movq 312(%rsi), %rdx
  22652. movq %rax, 496(%r9)
  22653. adcq $0x00, %rdx
  22654. movq 320(%rsi), %rax
  22655. movq %rdx, 504(%r9)
  22656. adcq $0x00, %rax
  22657. movq 328(%rsi), %rdx
  22658. movq %rax, 512(%r9)
  22659. adcq $0x00, %rdx
  22660. movq 336(%rsi), %rax
  22661. movq %rdx, 520(%r9)
  22662. adcq $0x00, %rax
  22663. movq 344(%rsi), %rdx
  22664. movq %rax, 528(%r9)
  22665. adcq $0x00, %rdx
  22666. movq 352(%rsi), %rax
  22667. movq %rdx, 536(%r9)
  22668. adcq $0x00, %rax
  22669. movq 360(%rsi), %rdx
  22670. movq %rax, 544(%r9)
  22671. adcq $0x00, %rdx
  22672. movq 368(%rsi), %rax
  22673. movq %rdx, 552(%r9)
  22674. adcq $0x00, %rax
  22675. movq 376(%rsi), %rdx
  22676. movq %rax, 560(%r9)
  22677. adcq $0x00, %rdx
  22678. movq %rdx, 568(%r9)
  22679. addq $0x3d8, %rsp
  22680. repz retq
  22681. #ifndef __APPLE__
  22682. .size sp_3072_sqr_avx2_48,.-sp_3072_sqr_avx2_48
  22683. #endif /* __APPLE__ */
  22684. /* Mul a by digit b into r. (r = a * b)
  22685. *
  22686. * r A single precision integer.
  22687. * a A single precision integer.
  22688. * b A single precision digit.
  22689. */
  22690. #ifndef __APPLE__
  22691. .text
  22692. .globl sp_3072_mul_d_48
  22693. .type sp_3072_mul_d_48,@function
  22694. .align 16
  22695. sp_3072_mul_d_48:
  22696. #else
  22697. .section __TEXT,__text
  22698. .globl _sp_3072_mul_d_48
  22699. .p2align 4
  22700. _sp_3072_mul_d_48:
  22701. #endif /* __APPLE__ */
  22702. movq %rdx, %rcx
  22703. # A[0] * B
  22704. movq %rcx, %rax
  22705. xorq %r10, %r10
  22706. mulq (%rsi)
  22707. movq %rax, %r8
  22708. movq %rdx, %r9
  22709. movq %r8, (%rdi)
  22710. # A[1] * B
  22711. movq %rcx, %rax
  22712. xorq %r8, %r8
  22713. mulq 8(%rsi)
  22714. addq %rax, %r9
  22715. movq %r9, 8(%rdi)
  22716. adcq %rdx, %r10
  22717. adcq $0x00, %r8
  22718. # A[2] * B
  22719. movq %rcx, %rax
  22720. xorq %r9, %r9
  22721. mulq 16(%rsi)
  22722. addq %rax, %r10
  22723. movq %r10, 16(%rdi)
  22724. adcq %rdx, %r8
  22725. adcq $0x00, %r9
  22726. # A[3] * B
  22727. movq %rcx, %rax
  22728. xorq %r10, %r10
  22729. mulq 24(%rsi)
  22730. addq %rax, %r8
  22731. movq %r8, 24(%rdi)
  22732. adcq %rdx, %r9
  22733. adcq $0x00, %r10
  22734. # A[4] * B
  22735. movq %rcx, %rax
  22736. xorq %r8, %r8
  22737. mulq 32(%rsi)
  22738. addq %rax, %r9
  22739. movq %r9, 32(%rdi)
  22740. adcq %rdx, %r10
  22741. adcq $0x00, %r8
  22742. # A[5] * B
  22743. movq %rcx, %rax
  22744. xorq %r9, %r9
  22745. mulq 40(%rsi)
  22746. addq %rax, %r10
  22747. movq %r10, 40(%rdi)
  22748. adcq %rdx, %r8
  22749. adcq $0x00, %r9
  22750. # A[6] * B
  22751. movq %rcx, %rax
  22752. xorq %r10, %r10
  22753. mulq 48(%rsi)
  22754. addq %rax, %r8
  22755. movq %r8, 48(%rdi)
  22756. adcq %rdx, %r9
  22757. adcq $0x00, %r10
  22758. # A[7] * B
  22759. movq %rcx, %rax
  22760. xorq %r8, %r8
  22761. mulq 56(%rsi)
  22762. addq %rax, %r9
  22763. movq %r9, 56(%rdi)
  22764. adcq %rdx, %r10
  22765. adcq $0x00, %r8
  22766. # A[8] * B
  22767. movq %rcx, %rax
  22768. xorq %r9, %r9
  22769. mulq 64(%rsi)
  22770. addq %rax, %r10
  22771. movq %r10, 64(%rdi)
  22772. adcq %rdx, %r8
  22773. adcq $0x00, %r9
  22774. # A[9] * B
  22775. movq %rcx, %rax
  22776. xorq %r10, %r10
  22777. mulq 72(%rsi)
  22778. addq %rax, %r8
  22779. movq %r8, 72(%rdi)
  22780. adcq %rdx, %r9
  22781. adcq $0x00, %r10
  22782. # A[10] * B
  22783. movq %rcx, %rax
  22784. xorq %r8, %r8
  22785. mulq 80(%rsi)
  22786. addq %rax, %r9
  22787. movq %r9, 80(%rdi)
  22788. adcq %rdx, %r10
  22789. adcq $0x00, %r8
  22790. # A[11] * B
  22791. movq %rcx, %rax
  22792. xorq %r9, %r9
  22793. mulq 88(%rsi)
  22794. addq %rax, %r10
  22795. movq %r10, 88(%rdi)
  22796. adcq %rdx, %r8
  22797. adcq $0x00, %r9
  22798. # A[12] * B
  22799. movq %rcx, %rax
  22800. xorq %r10, %r10
  22801. mulq 96(%rsi)
  22802. addq %rax, %r8
  22803. movq %r8, 96(%rdi)
  22804. adcq %rdx, %r9
  22805. adcq $0x00, %r10
  22806. # A[13] * B
  22807. movq %rcx, %rax
  22808. xorq %r8, %r8
  22809. mulq 104(%rsi)
  22810. addq %rax, %r9
  22811. movq %r9, 104(%rdi)
  22812. adcq %rdx, %r10
  22813. adcq $0x00, %r8
  22814. # A[14] * B
  22815. movq %rcx, %rax
  22816. xorq %r9, %r9
  22817. mulq 112(%rsi)
  22818. addq %rax, %r10
  22819. movq %r10, 112(%rdi)
  22820. adcq %rdx, %r8
  22821. adcq $0x00, %r9
  22822. # A[15] * B
  22823. movq %rcx, %rax
  22824. xorq %r10, %r10
  22825. mulq 120(%rsi)
  22826. addq %rax, %r8
  22827. movq %r8, 120(%rdi)
  22828. adcq %rdx, %r9
  22829. adcq $0x00, %r10
  22830. # A[16] * B
  22831. movq %rcx, %rax
  22832. xorq %r8, %r8
  22833. mulq 128(%rsi)
  22834. addq %rax, %r9
  22835. movq %r9, 128(%rdi)
  22836. adcq %rdx, %r10
  22837. adcq $0x00, %r8
  22838. # A[17] * B
  22839. movq %rcx, %rax
  22840. xorq %r9, %r9
  22841. mulq 136(%rsi)
  22842. addq %rax, %r10
  22843. movq %r10, 136(%rdi)
  22844. adcq %rdx, %r8
  22845. adcq $0x00, %r9
  22846. # A[18] * B
  22847. movq %rcx, %rax
  22848. xorq %r10, %r10
  22849. mulq 144(%rsi)
  22850. addq %rax, %r8
  22851. movq %r8, 144(%rdi)
  22852. adcq %rdx, %r9
  22853. adcq $0x00, %r10
  22854. # A[19] * B
  22855. movq %rcx, %rax
  22856. xorq %r8, %r8
  22857. mulq 152(%rsi)
  22858. addq %rax, %r9
  22859. movq %r9, 152(%rdi)
  22860. adcq %rdx, %r10
  22861. adcq $0x00, %r8
  22862. # A[20] * B
  22863. movq %rcx, %rax
  22864. xorq %r9, %r9
  22865. mulq 160(%rsi)
  22866. addq %rax, %r10
  22867. movq %r10, 160(%rdi)
  22868. adcq %rdx, %r8
  22869. adcq $0x00, %r9
  22870. # A[21] * B
  22871. movq %rcx, %rax
  22872. xorq %r10, %r10
  22873. mulq 168(%rsi)
  22874. addq %rax, %r8
  22875. movq %r8, 168(%rdi)
  22876. adcq %rdx, %r9
  22877. adcq $0x00, %r10
  22878. # A[22] * B
  22879. movq %rcx, %rax
  22880. xorq %r8, %r8
  22881. mulq 176(%rsi)
  22882. addq %rax, %r9
  22883. movq %r9, 176(%rdi)
  22884. adcq %rdx, %r10
  22885. adcq $0x00, %r8
  22886. # A[23] * B
  22887. movq %rcx, %rax
  22888. xorq %r9, %r9
  22889. mulq 184(%rsi)
  22890. addq %rax, %r10
  22891. movq %r10, 184(%rdi)
  22892. adcq %rdx, %r8
  22893. adcq $0x00, %r9
  22894. # A[24] * B
  22895. movq %rcx, %rax
  22896. xorq %r10, %r10
  22897. mulq 192(%rsi)
  22898. addq %rax, %r8
  22899. movq %r8, 192(%rdi)
  22900. adcq %rdx, %r9
  22901. adcq $0x00, %r10
  22902. # A[25] * B
  22903. movq %rcx, %rax
  22904. xorq %r8, %r8
  22905. mulq 200(%rsi)
  22906. addq %rax, %r9
  22907. movq %r9, 200(%rdi)
  22908. adcq %rdx, %r10
  22909. adcq $0x00, %r8
  22910. # A[26] * B
  22911. movq %rcx, %rax
  22912. xorq %r9, %r9
  22913. mulq 208(%rsi)
  22914. addq %rax, %r10
  22915. movq %r10, 208(%rdi)
  22916. adcq %rdx, %r8
  22917. adcq $0x00, %r9
  22918. # A[27] * B
  22919. movq %rcx, %rax
  22920. xorq %r10, %r10
  22921. mulq 216(%rsi)
  22922. addq %rax, %r8
  22923. movq %r8, 216(%rdi)
  22924. adcq %rdx, %r9
  22925. adcq $0x00, %r10
  22926. # A[28] * B
  22927. movq %rcx, %rax
  22928. xorq %r8, %r8
  22929. mulq 224(%rsi)
  22930. addq %rax, %r9
  22931. movq %r9, 224(%rdi)
  22932. adcq %rdx, %r10
  22933. adcq $0x00, %r8
  22934. # A[29] * B
  22935. movq %rcx, %rax
  22936. xorq %r9, %r9
  22937. mulq 232(%rsi)
  22938. addq %rax, %r10
  22939. movq %r10, 232(%rdi)
  22940. adcq %rdx, %r8
  22941. adcq $0x00, %r9
  22942. # A[30] * B
  22943. movq %rcx, %rax
  22944. xorq %r10, %r10
  22945. mulq 240(%rsi)
  22946. addq %rax, %r8
  22947. movq %r8, 240(%rdi)
  22948. adcq %rdx, %r9
  22949. adcq $0x00, %r10
  22950. # A[31] * B
  22951. movq %rcx, %rax
  22952. xorq %r8, %r8
  22953. mulq 248(%rsi)
  22954. addq %rax, %r9
  22955. movq %r9, 248(%rdi)
  22956. adcq %rdx, %r10
  22957. adcq $0x00, %r8
  22958. # A[32] * B
  22959. movq %rcx, %rax
  22960. xorq %r9, %r9
  22961. mulq 256(%rsi)
  22962. addq %rax, %r10
  22963. movq %r10, 256(%rdi)
  22964. adcq %rdx, %r8
  22965. adcq $0x00, %r9
  22966. # A[33] * B
  22967. movq %rcx, %rax
  22968. xorq %r10, %r10
  22969. mulq 264(%rsi)
  22970. addq %rax, %r8
  22971. movq %r8, 264(%rdi)
  22972. adcq %rdx, %r9
  22973. adcq $0x00, %r10
  22974. # A[34] * B
  22975. movq %rcx, %rax
  22976. xorq %r8, %r8
  22977. mulq 272(%rsi)
  22978. addq %rax, %r9
  22979. movq %r9, 272(%rdi)
  22980. adcq %rdx, %r10
  22981. adcq $0x00, %r8
  22982. # A[35] * B
  22983. movq %rcx, %rax
  22984. xorq %r9, %r9
  22985. mulq 280(%rsi)
  22986. addq %rax, %r10
  22987. movq %r10, 280(%rdi)
  22988. adcq %rdx, %r8
  22989. adcq $0x00, %r9
  22990. # A[36] * B
  22991. movq %rcx, %rax
  22992. xorq %r10, %r10
  22993. mulq 288(%rsi)
  22994. addq %rax, %r8
  22995. movq %r8, 288(%rdi)
  22996. adcq %rdx, %r9
  22997. adcq $0x00, %r10
  22998. # A[37] * B
  22999. movq %rcx, %rax
  23000. xorq %r8, %r8
  23001. mulq 296(%rsi)
  23002. addq %rax, %r9
  23003. movq %r9, 296(%rdi)
  23004. adcq %rdx, %r10
  23005. adcq $0x00, %r8
  23006. # A[38] * B
  23007. movq %rcx, %rax
  23008. xorq %r9, %r9
  23009. mulq 304(%rsi)
  23010. addq %rax, %r10
  23011. movq %r10, 304(%rdi)
  23012. adcq %rdx, %r8
  23013. adcq $0x00, %r9
  23014. # A[39] * B
  23015. movq %rcx, %rax
  23016. xorq %r10, %r10
  23017. mulq 312(%rsi)
  23018. addq %rax, %r8
  23019. movq %r8, 312(%rdi)
  23020. adcq %rdx, %r9
  23021. adcq $0x00, %r10
  23022. # A[40] * B
  23023. movq %rcx, %rax
  23024. xorq %r8, %r8
  23025. mulq 320(%rsi)
  23026. addq %rax, %r9
  23027. movq %r9, 320(%rdi)
  23028. adcq %rdx, %r10
  23029. adcq $0x00, %r8
  23030. # A[41] * B
  23031. movq %rcx, %rax
  23032. xorq %r9, %r9
  23033. mulq 328(%rsi)
  23034. addq %rax, %r10
  23035. movq %r10, 328(%rdi)
  23036. adcq %rdx, %r8
  23037. adcq $0x00, %r9
  23038. # A[42] * B
  23039. movq %rcx, %rax
  23040. xorq %r10, %r10
  23041. mulq 336(%rsi)
  23042. addq %rax, %r8
  23043. movq %r8, 336(%rdi)
  23044. adcq %rdx, %r9
  23045. adcq $0x00, %r10
  23046. # A[43] * B
  23047. movq %rcx, %rax
  23048. xorq %r8, %r8
  23049. mulq 344(%rsi)
  23050. addq %rax, %r9
  23051. movq %r9, 344(%rdi)
  23052. adcq %rdx, %r10
  23053. adcq $0x00, %r8
  23054. # A[44] * B
  23055. movq %rcx, %rax
  23056. xorq %r9, %r9
  23057. mulq 352(%rsi)
  23058. addq %rax, %r10
  23059. movq %r10, 352(%rdi)
  23060. adcq %rdx, %r8
  23061. adcq $0x00, %r9
  23062. # A[45] * B
  23063. movq %rcx, %rax
  23064. xorq %r10, %r10
  23065. mulq 360(%rsi)
  23066. addq %rax, %r8
  23067. movq %r8, 360(%rdi)
  23068. adcq %rdx, %r9
  23069. adcq $0x00, %r10
  23070. # A[46] * B
  23071. movq %rcx, %rax
  23072. xorq %r8, %r8
  23073. mulq 368(%rsi)
  23074. addq %rax, %r9
  23075. movq %r9, 368(%rdi)
  23076. adcq %rdx, %r10
  23077. adcq $0x00, %r8
  23078. # A[47] * B
  23079. movq %rcx, %rax
  23080. mulq 376(%rsi)
  23081. addq %rax, %r10
  23082. adcq %rdx, %r8
  23083. movq %r10, 376(%rdi)
  23084. movq %r8, 384(%rdi)
  23085. repz retq
  23086. #ifndef __APPLE__
  23087. .size sp_3072_mul_d_48,.-sp_3072_mul_d_48
  23088. #endif /* __APPLE__ */
  23089. /* Conditionally subtract b from a using the mask m.
  23090. * m is -1 to subtract and 0 when not copying.
  23091. *
  23092. * r A single precision number representing condition subtract result.
  23093. * a A single precision number to subtract from.
  23094. * b A single precision number to subtract.
  23095. * m Mask value to apply.
  23096. */
  23097. #ifndef __APPLE__
  23098. .text
  23099. .globl sp_3072_cond_sub_24
  23100. .type sp_3072_cond_sub_24,@function
  23101. .align 16
  23102. sp_3072_cond_sub_24:
  23103. #else
  23104. .section __TEXT,__text
  23105. .globl _sp_3072_cond_sub_24
  23106. .p2align 4
  23107. _sp_3072_cond_sub_24:
  23108. #endif /* __APPLE__ */
  23109. subq $0xc0, %rsp
  23110. movq $0x00, %rax
  23111. movq (%rdx), %r8
  23112. movq 8(%rdx), %r9
  23113. andq %rcx, %r8
  23114. andq %rcx, %r9
  23115. movq %r8, (%rsp)
  23116. movq %r9, 8(%rsp)
  23117. movq 16(%rdx), %r8
  23118. movq 24(%rdx), %r9
  23119. andq %rcx, %r8
  23120. andq %rcx, %r9
  23121. movq %r8, 16(%rsp)
  23122. movq %r9, 24(%rsp)
  23123. movq 32(%rdx), %r8
  23124. movq 40(%rdx), %r9
  23125. andq %rcx, %r8
  23126. andq %rcx, %r9
  23127. movq %r8, 32(%rsp)
  23128. movq %r9, 40(%rsp)
  23129. movq 48(%rdx), %r8
  23130. movq 56(%rdx), %r9
  23131. andq %rcx, %r8
  23132. andq %rcx, %r9
  23133. movq %r8, 48(%rsp)
  23134. movq %r9, 56(%rsp)
  23135. movq 64(%rdx), %r8
  23136. movq 72(%rdx), %r9
  23137. andq %rcx, %r8
  23138. andq %rcx, %r9
  23139. movq %r8, 64(%rsp)
  23140. movq %r9, 72(%rsp)
  23141. movq 80(%rdx), %r8
  23142. movq 88(%rdx), %r9
  23143. andq %rcx, %r8
  23144. andq %rcx, %r9
  23145. movq %r8, 80(%rsp)
  23146. movq %r9, 88(%rsp)
  23147. movq 96(%rdx), %r8
  23148. movq 104(%rdx), %r9
  23149. andq %rcx, %r8
  23150. andq %rcx, %r9
  23151. movq %r8, 96(%rsp)
  23152. movq %r9, 104(%rsp)
  23153. movq 112(%rdx), %r8
  23154. movq 120(%rdx), %r9
  23155. andq %rcx, %r8
  23156. andq %rcx, %r9
  23157. movq %r8, 112(%rsp)
  23158. movq %r9, 120(%rsp)
  23159. movq 128(%rdx), %r8
  23160. movq 136(%rdx), %r9
  23161. andq %rcx, %r8
  23162. andq %rcx, %r9
  23163. movq %r8, 128(%rsp)
  23164. movq %r9, 136(%rsp)
  23165. movq 144(%rdx), %r8
  23166. movq 152(%rdx), %r9
  23167. andq %rcx, %r8
  23168. andq %rcx, %r9
  23169. movq %r8, 144(%rsp)
  23170. movq %r9, 152(%rsp)
  23171. movq 160(%rdx), %r8
  23172. movq 168(%rdx), %r9
  23173. andq %rcx, %r8
  23174. andq %rcx, %r9
  23175. movq %r8, 160(%rsp)
  23176. movq %r9, 168(%rsp)
  23177. movq 176(%rdx), %r8
  23178. movq 184(%rdx), %r9
  23179. andq %rcx, %r8
  23180. andq %rcx, %r9
  23181. movq %r8, 176(%rsp)
  23182. movq %r9, 184(%rsp)
  23183. movq (%rsi), %r8
  23184. movq (%rsp), %rdx
  23185. subq %rdx, %r8
  23186. movq 8(%rsi), %r9
  23187. movq 8(%rsp), %rdx
  23188. sbbq %rdx, %r9
  23189. movq %r8, (%rdi)
  23190. movq 16(%rsi), %r8
  23191. movq 16(%rsp), %rdx
  23192. sbbq %rdx, %r8
  23193. movq %r9, 8(%rdi)
  23194. movq 24(%rsi), %r9
  23195. movq 24(%rsp), %rdx
  23196. sbbq %rdx, %r9
  23197. movq %r8, 16(%rdi)
  23198. movq 32(%rsi), %r8
  23199. movq 32(%rsp), %rdx
  23200. sbbq %rdx, %r8
  23201. movq %r9, 24(%rdi)
  23202. movq 40(%rsi), %r9
  23203. movq 40(%rsp), %rdx
  23204. sbbq %rdx, %r9
  23205. movq %r8, 32(%rdi)
  23206. movq 48(%rsi), %r8
  23207. movq 48(%rsp), %rdx
  23208. sbbq %rdx, %r8
  23209. movq %r9, 40(%rdi)
  23210. movq 56(%rsi), %r9
  23211. movq 56(%rsp), %rdx
  23212. sbbq %rdx, %r9
  23213. movq %r8, 48(%rdi)
  23214. movq 64(%rsi), %r8
  23215. movq 64(%rsp), %rdx
  23216. sbbq %rdx, %r8
  23217. movq %r9, 56(%rdi)
  23218. movq 72(%rsi), %r9
  23219. movq 72(%rsp), %rdx
  23220. sbbq %rdx, %r9
  23221. movq %r8, 64(%rdi)
  23222. movq 80(%rsi), %r8
  23223. movq 80(%rsp), %rdx
  23224. sbbq %rdx, %r8
  23225. movq %r9, 72(%rdi)
  23226. movq 88(%rsi), %r9
  23227. movq 88(%rsp), %rdx
  23228. sbbq %rdx, %r9
  23229. movq %r8, 80(%rdi)
  23230. movq 96(%rsi), %r8
  23231. movq 96(%rsp), %rdx
  23232. sbbq %rdx, %r8
  23233. movq %r9, 88(%rdi)
  23234. movq 104(%rsi), %r9
  23235. movq 104(%rsp), %rdx
  23236. sbbq %rdx, %r9
  23237. movq %r8, 96(%rdi)
  23238. movq 112(%rsi), %r8
  23239. movq 112(%rsp), %rdx
  23240. sbbq %rdx, %r8
  23241. movq %r9, 104(%rdi)
  23242. movq 120(%rsi), %r9
  23243. movq 120(%rsp), %rdx
  23244. sbbq %rdx, %r9
  23245. movq %r8, 112(%rdi)
  23246. movq 128(%rsi), %r8
  23247. movq 128(%rsp), %rdx
  23248. sbbq %rdx, %r8
  23249. movq %r9, 120(%rdi)
  23250. movq 136(%rsi), %r9
  23251. movq 136(%rsp), %rdx
  23252. sbbq %rdx, %r9
  23253. movq %r8, 128(%rdi)
  23254. movq 144(%rsi), %r8
  23255. movq 144(%rsp), %rdx
  23256. sbbq %rdx, %r8
  23257. movq %r9, 136(%rdi)
  23258. movq 152(%rsi), %r9
  23259. movq 152(%rsp), %rdx
  23260. sbbq %rdx, %r9
  23261. movq %r8, 144(%rdi)
  23262. movq 160(%rsi), %r8
  23263. movq 160(%rsp), %rdx
  23264. sbbq %rdx, %r8
  23265. movq %r9, 152(%rdi)
  23266. movq 168(%rsi), %r9
  23267. movq 168(%rsp), %rdx
  23268. sbbq %rdx, %r9
  23269. movq %r8, 160(%rdi)
  23270. movq 176(%rsi), %r8
  23271. movq 176(%rsp), %rdx
  23272. sbbq %rdx, %r8
  23273. movq %r9, 168(%rdi)
  23274. movq 184(%rsi), %r9
  23275. movq 184(%rsp), %rdx
  23276. sbbq %rdx, %r9
  23277. movq %r8, 176(%rdi)
  23278. movq %r9, 184(%rdi)
  23279. sbbq $0x00, %rax
  23280. addq $0xc0, %rsp
  23281. repz retq
  23282. #ifndef __APPLE__
  23283. .size sp_3072_cond_sub_24,.-sp_3072_cond_sub_24
  23284. #endif /* __APPLE__ */
  23285. /* Reduce the number back to 3072 bits using Montgomery reduction.
  23286. *
  23287. * a A single precision number to reduce in place.
  23288. * m The single precision number representing the modulus.
  23289. * mp The digit representing the negative inverse of m mod 2^n.
  23290. */
  23291. #ifndef __APPLE__
  23292. .text
  23293. .globl sp_3072_mont_reduce_24
  23294. .type sp_3072_mont_reduce_24,@function
  23295. .align 16
  23296. sp_3072_mont_reduce_24:
  23297. #else
  23298. .section __TEXT,__text
  23299. .globl _sp_3072_mont_reduce_24
  23300. .p2align 4
  23301. _sp_3072_mont_reduce_24:
  23302. #endif /* __APPLE__ */
  23303. pushq %r12
  23304. pushq %r13
  23305. pushq %r14
  23306. pushq %r15
  23307. movq %rdx, %rcx
  23308. xorq %r15, %r15
  23309. # i = 24
  23310. movq $24, %r8
  23311. movq (%rdi), %r13
  23312. movq 8(%rdi), %r14
  23313. L_mont_loop_24:
  23314. # mu = a[i] * mp
  23315. movq %r13, %r11
  23316. imulq %rcx, %r11
  23317. # a[i+0] += m[0] * mu
  23318. movq %r11, %rax
  23319. xorq %r10, %r10
  23320. mulq (%rsi)
  23321. addq %rax, %r13
  23322. adcq %rdx, %r10
  23323. # a[i+1] += m[1] * mu
  23324. movq %r11, %rax
  23325. xorq %r9, %r9
  23326. mulq 8(%rsi)
  23327. movq %r14, %r13
  23328. addq %rax, %r13
  23329. adcq %rdx, %r9
  23330. addq %r10, %r13
  23331. adcq $0x00, %r9
  23332. # a[i+2] += m[2] * mu
  23333. movq %r11, %rax
  23334. xorq %r10, %r10
  23335. mulq 16(%rsi)
  23336. movq 16(%rdi), %r14
  23337. addq %rax, %r14
  23338. adcq %rdx, %r10
  23339. addq %r9, %r14
  23340. adcq $0x00, %r10
  23341. # a[i+3] += m[3] * mu
  23342. movq %r11, %rax
  23343. xorq %r9, %r9
  23344. mulq 24(%rsi)
  23345. movq 24(%rdi), %r12
  23346. addq %rax, %r12
  23347. adcq %rdx, %r9
  23348. addq %r10, %r12
  23349. movq %r12, 24(%rdi)
  23350. adcq $0x00, %r9
  23351. # a[i+4] += m[4] * mu
  23352. movq %r11, %rax
  23353. xorq %r10, %r10
  23354. mulq 32(%rsi)
  23355. movq 32(%rdi), %r12
  23356. addq %rax, %r12
  23357. adcq %rdx, %r10
  23358. addq %r9, %r12
  23359. movq %r12, 32(%rdi)
  23360. adcq $0x00, %r10
  23361. # a[i+5] += m[5] * mu
  23362. movq %r11, %rax
  23363. xorq %r9, %r9
  23364. mulq 40(%rsi)
  23365. movq 40(%rdi), %r12
  23366. addq %rax, %r12
  23367. adcq %rdx, %r9
  23368. addq %r10, %r12
  23369. movq %r12, 40(%rdi)
  23370. adcq $0x00, %r9
  23371. # a[i+6] += m[6] * mu
  23372. movq %r11, %rax
  23373. xorq %r10, %r10
  23374. mulq 48(%rsi)
  23375. movq 48(%rdi), %r12
  23376. addq %rax, %r12
  23377. adcq %rdx, %r10
  23378. addq %r9, %r12
  23379. movq %r12, 48(%rdi)
  23380. adcq $0x00, %r10
  23381. # a[i+7] += m[7] * mu
  23382. movq %r11, %rax
  23383. xorq %r9, %r9
  23384. mulq 56(%rsi)
  23385. movq 56(%rdi), %r12
  23386. addq %rax, %r12
  23387. adcq %rdx, %r9
  23388. addq %r10, %r12
  23389. movq %r12, 56(%rdi)
  23390. adcq $0x00, %r9
  23391. # a[i+8] += m[8] * mu
  23392. movq %r11, %rax
  23393. xorq %r10, %r10
  23394. mulq 64(%rsi)
  23395. movq 64(%rdi), %r12
  23396. addq %rax, %r12
  23397. adcq %rdx, %r10
  23398. addq %r9, %r12
  23399. movq %r12, 64(%rdi)
  23400. adcq $0x00, %r10
  23401. # a[i+9] += m[9] * mu
  23402. movq %r11, %rax
  23403. xorq %r9, %r9
  23404. mulq 72(%rsi)
  23405. movq 72(%rdi), %r12
  23406. addq %rax, %r12
  23407. adcq %rdx, %r9
  23408. addq %r10, %r12
  23409. movq %r12, 72(%rdi)
  23410. adcq $0x00, %r9
  23411. # a[i+10] += m[10] * mu
  23412. movq %r11, %rax
  23413. xorq %r10, %r10
  23414. mulq 80(%rsi)
  23415. movq 80(%rdi), %r12
  23416. addq %rax, %r12
  23417. adcq %rdx, %r10
  23418. addq %r9, %r12
  23419. movq %r12, 80(%rdi)
  23420. adcq $0x00, %r10
  23421. # a[i+11] += m[11] * mu
  23422. movq %r11, %rax
  23423. xorq %r9, %r9
  23424. mulq 88(%rsi)
  23425. movq 88(%rdi), %r12
  23426. addq %rax, %r12
  23427. adcq %rdx, %r9
  23428. addq %r10, %r12
  23429. movq %r12, 88(%rdi)
  23430. adcq $0x00, %r9
  23431. # a[i+12] += m[12] * mu
  23432. movq %r11, %rax
  23433. xorq %r10, %r10
  23434. mulq 96(%rsi)
  23435. movq 96(%rdi), %r12
  23436. addq %rax, %r12
  23437. adcq %rdx, %r10
  23438. addq %r9, %r12
  23439. movq %r12, 96(%rdi)
  23440. adcq $0x00, %r10
  23441. # a[i+13] += m[13] * mu
  23442. movq %r11, %rax
  23443. xorq %r9, %r9
  23444. mulq 104(%rsi)
  23445. movq 104(%rdi), %r12
  23446. addq %rax, %r12
  23447. adcq %rdx, %r9
  23448. addq %r10, %r12
  23449. movq %r12, 104(%rdi)
  23450. adcq $0x00, %r9
  23451. # a[i+14] += m[14] * mu
  23452. movq %r11, %rax
  23453. xorq %r10, %r10
  23454. mulq 112(%rsi)
  23455. movq 112(%rdi), %r12
  23456. addq %rax, %r12
  23457. adcq %rdx, %r10
  23458. addq %r9, %r12
  23459. movq %r12, 112(%rdi)
  23460. adcq $0x00, %r10
  23461. # a[i+15] += m[15] * mu
  23462. movq %r11, %rax
  23463. xorq %r9, %r9
  23464. mulq 120(%rsi)
  23465. movq 120(%rdi), %r12
  23466. addq %rax, %r12
  23467. adcq %rdx, %r9
  23468. addq %r10, %r12
  23469. movq %r12, 120(%rdi)
  23470. adcq $0x00, %r9
  23471. # a[i+16] += m[16] * mu
  23472. movq %r11, %rax
  23473. xorq %r10, %r10
  23474. mulq 128(%rsi)
  23475. movq 128(%rdi), %r12
  23476. addq %rax, %r12
  23477. adcq %rdx, %r10
  23478. addq %r9, %r12
  23479. movq %r12, 128(%rdi)
  23480. adcq $0x00, %r10
  23481. # a[i+17] += m[17] * mu
  23482. movq %r11, %rax
  23483. xorq %r9, %r9
  23484. mulq 136(%rsi)
  23485. movq 136(%rdi), %r12
  23486. addq %rax, %r12
  23487. adcq %rdx, %r9
  23488. addq %r10, %r12
  23489. movq %r12, 136(%rdi)
  23490. adcq $0x00, %r9
  23491. # a[i+18] += m[18] * mu
  23492. movq %r11, %rax
  23493. xorq %r10, %r10
  23494. mulq 144(%rsi)
  23495. movq 144(%rdi), %r12
  23496. addq %rax, %r12
  23497. adcq %rdx, %r10
  23498. addq %r9, %r12
  23499. movq %r12, 144(%rdi)
  23500. adcq $0x00, %r10
  23501. # a[i+19] += m[19] * mu
  23502. movq %r11, %rax
  23503. xorq %r9, %r9
  23504. mulq 152(%rsi)
  23505. movq 152(%rdi), %r12
  23506. addq %rax, %r12
  23507. adcq %rdx, %r9
  23508. addq %r10, %r12
  23509. movq %r12, 152(%rdi)
  23510. adcq $0x00, %r9
  23511. # a[i+20] += m[20] * mu
  23512. movq %r11, %rax
  23513. xorq %r10, %r10
  23514. mulq 160(%rsi)
  23515. movq 160(%rdi), %r12
  23516. addq %rax, %r12
  23517. adcq %rdx, %r10
  23518. addq %r9, %r12
  23519. movq %r12, 160(%rdi)
  23520. adcq $0x00, %r10
  23521. # a[i+21] += m[21] * mu
  23522. movq %r11, %rax
  23523. xorq %r9, %r9
  23524. mulq 168(%rsi)
  23525. movq 168(%rdi), %r12
  23526. addq %rax, %r12
  23527. adcq %rdx, %r9
  23528. addq %r10, %r12
  23529. movq %r12, 168(%rdi)
  23530. adcq $0x00, %r9
  23531. # a[i+22] += m[22] * mu
  23532. movq %r11, %rax
  23533. xorq %r10, %r10
  23534. mulq 176(%rsi)
  23535. movq 176(%rdi), %r12
  23536. addq %rax, %r12
  23537. adcq %rdx, %r10
  23538. addq %r9, %r12
  23539. movq %r12, 176(%rdi)
  23540. adcq $0x00, %r10
  23541. # a[i+23] += m[23] * mu
  23542. movq %r11, %rax
  23543. mulq 184(%rsi)
  23544. movq 184(%rdi), %r12
  23545. addq %rax, %r10
  23546. adcq %r15, %rdx
  23547. movq $0x00, %r15
  23548. adcq $0x00, %r15
  23549. addq %r10, %r12
  23550. movq %r12, 184(%rdi)
  23551. adcq %rdx, 192(%rdi)
  23552. adcq $0x00, %r15
  23553. # i -= 1
  23554. addq $8, %rdi
  23555. decq %r8
  23556. jnz L_mont_loop_24
  23557. movq %r13, (%rdi)
  23558. movq %r14, 8(%rdi)
  23559. negq %r15
  23560. movq %r15, %rcx
  23561. movq %rsi, %rdx
  23562. movq %rdi, %rsi
  23563. movq %rdi, %rdi
  23564. subq $0xc0, %rdi
  23565. #ifndef __APPLE__
  23566. callq sp_3072_cond_sub_24@plt
  23567. #else
  23568. callq _sp_3072_cond_sub_24
  23569. #endif /* __APPLE__ */
  23570. popq %r15
  23571. popq %r14
  23572. popq %r13
  23573. popq %r12
  23574. repz retq
  23575. #ifndef __APPLE__
  23576. .size sp_3072_mont_reduce_24,.-sp_3072_mont_reduce_24
  23577. #endif /* __APPLE__ */
  23578. /* Conditionally subtract b from a using the mask m.
  23579. * m is -1 to subtract and 0 when not copying.
  23580. *
  23581. * r A single precision number representing condition subtract result.
  23582. * a A single precision number to subtract from.
  23583. * b A single precision number to subtract.
  23584. * m Mask value to apply.
  23585. */
  23586. #ifndef __APPLE__
  23587. .text
  23588. .globl sp_3072_cond_sub_avx2_24
  23589. .type sp_3072_cond_sub_avx2_24,@function
  23590. .align 16
  23591. sp_3072_cond_sub_avx2_24:
  23592. #else
  23593. .section __TEXT,__text
  23594. .globl _sp_3072_cond_sub_avx2_24
  23595. .p2align 4
  23596. _sp_3072_cond_sub_avx2_24:
  23597. #endif /* __APPLE__ */
  23598. movq $0x00, %rax
  23599. movq (%rdx), %r10
  23600. movq (%rsi), %r8
  23601. pextq %rcx, %r10, %r10
  23602. subq %r10, %r8
  23603. movq 8(%rdx), %r10
  23604. movq 8(%rsi), %r9
  23605. pextq %rcx, %r10, %r10
  23606. movq %r8, (%rdi)
  23607. sbbq %r10, %r9
  23608. movq 16(%rdx), %r8
  23609. movq 16(%rsi), %r10
  23610. pextq %rcx, %r8, %r8
  23611. movq %r9, 8(%rdi)
  23612. sbbq %r8, %r10
  23613. movq 24(%rdx), %r9
  23614. movq 24(%rsi), %r8
  23615. pextq %rcx, %r9, %r9
  23616. movq %r10, 16(%rdi)
  23617. sbbq %r9, %r8
  23618. movq 32(%rdx), %r10
  23619. movq 32(%rsi), %r9
  23620. pextq %rcx, %r10, %r10
  23621. movq %r8, 24(%rdi)
  23622. sbbq %r10, %r9
  23623. movq 40(%rdx), %r8
  23624. movq 40(%rsi), %r10
  23625. pextq %rcx, %r8, %r8
  23626. movq %r9, 32(%rdi)
  23627. sbbq %r8, %r10
  23628. movq 48(%rdx), %r9
  23629. movq 48(%rsi), %r8
  23630. pextq %rcx, %r9, %r9
  23631. movq %r10, 40(%rdi)
  23632. sbbq %r9, %r8
  23633. movq 56(%rdx), %r10
  23634. movq 56(%rsi), %r9
  23635. pextq %rcx, %r10, %r10
  23636. movq %r8, 48(%rdi)
  23637. sbbq %r10, %r9
  23638. movq 64(%rdx), %r8
  23639. movq 64(%rsi), %r10
  23640. pextq %rcx, %r8, %r8
  23641. movq %r9, 56(%rdi)
  23642. sbbq %r8, %r10
  23643. movq 72(%rdx), %r9
  23644. movq 72(%rsi), %r8
  23645. pextq %rcx, %r9, %r9
  23646. movq %r10, 64(%rdi)
  23647. sbbq %r9, %r8
  23648. movq 80(%rdx), %r10
  23649. movq 80(%rsi), %r9
  23650. pextq %rcx, %r10, %r10
  23651. movq %r8, 72(%rdi)
  23652. sbbq %r10, %r9
  23653. movq 88(%rdx), %r8
  23654. movq 88(%rsi), %r10
  23655. pextq %rcx, %r8, %r8
  23656. movq %r9, 80(%rdi)
  23657. sbbq %r8, %r10
  23658. movq 96(%rdx), %r9
  23659. movq 96(%rsi), %r8
  23660. pextq %rcx, %r9, %r9
  23661. movq %r10, 88(%rdi)
  23662. sbbq %r9, %r8
  23663. movq 104(%rdx), %r10
  23664. movq 104(%rsi), %r9
  23665. pextq %rcx, %r10, %r10
  23666. movq %r8, 96(%rdi)
  23667. sbbq %r10, %r9
  23668. movq 112(%rdx), %r8
  23669. movq 112(%rsi), %r10
  23670. pextq %rcx, %r8, %r8
  23671. movq %r9, 104(%rdi)
  23672. sbbq %r8, %r10
  23673. movq 120(%rdx), %r9
  23674. movq 120(%rsi), %r8
  23675. pextq %rcx, %r9, %r9
  23676. movq %r10, 112(%rdi)
  23677. sbbq %r9, %r8
  23678. movq 128(%rdx), %r10
  23679. movq 128(%rsi), %r9
  23680. pextq %rcx, %r10, %r10
  23681. movq %r8, 120(%rdi)
  23682. sbbq %r10, %r9
  23683. movq 136(%rdx), %r8
  23684. movq 136(%rsi), %r10
  23685. pextq %rcx, %r8, %r8
  23686. movq %r9, 128(%rdi)
  23687. sbbq %r8, %r10
  23688. movq 144(%rdx), %r9
  23689. movq 144(%rsi), %r8
  23690. pextq %rcx, %r9, %r9
  23691. movq %r10, 136(%rdi)
  23692. sbbq %r9, %r8
  23693. movq 152(%rdx), %r10
  23694. movq 152(%rsi), %r9
  23695. pextq %rcx, %r10, %r10
  23696. movq %r8, 144(%rdi)
  23697. sbbq %r10, %r9
  23698. movq 160(%rdx), %r8
  23699. movq 160(%rsi), %r10
  23700. pextq %rcx, %r8, %r8
  23701. movq %r9, 152(%rdi)
  23702. sbbq %r8, %r10
  23703. movq 168(%rdx), %r9
  23704. movq 168(%rsi), %r8
  23705. pextq %rcx, %r9, %r9
  23706. movq %r10, 160(%rdi)
  23707. sbbq %r9, %r8
  23708. movq 176(%rdx), %r10
  23709. movq 176(%rsi), %r9
  23710. pextq %rcx, %r10, %r10
  23711. movq %r8, 168(%rdi)
  23712. sbbq %r10, %r9
  23713. movq 184(%rdx), %r8
  23714. movq 184(%rsi), %r10
  23715. pextq %rcx, %r8, %r8
  23716. movq %r9, 176(%rdi)
  23717. sbbq %r8, %r10
  23718. movq %r10, 184(%rdi)
  23719. sbbq $0x00, %rax
  23720. repz retq
  23721. #ifndef __APPLE__
  23722. .size sp_3072_cond_sub_avx2_24,.-sp_3072_cond_sub_avx2_24
  23723. #endif /* __APPLE__ */
  23724. /* Mul a by digit b into r. (r = a * b)
  23725. *
  23726. * r A single precision integer.
  23727. * a A single precision integer.
  23728. * b A single precision digit.
  23729. */
  23730. #ifndef __APPLE__
  23731. .text
  23732. .globl sp_3072_mul_d_24
  23733. .type sp_3072_mul_d_24,@function
  23734. .align 16
  23735. sp_3072_mul_d_24:
  23736. #else
  23737. .section __TEXT,__text
  23738. .globl _sp_3072_mul_d_24
  23739. .p2align 4
  23740. _sp_3072_mul_d_24:
  23741. #endif /* __APPLE__ */
  23742. movq %rdx, %rcx
  23743. # A[0] * B
  23744. movq %rcx, %rax
  23745. xorq %r10, %r10
  23746. mulq (%rsi)
  23747. movq %rax, %r8
  23748. movq %rdx, %r9
  23749. movq %r8, (%rdi)
  23750. # A[1] * B
  23751. movq %rcx, %rax
  23752. xorq %r8, %r8
  23753. mulq 8(%rsi)
  23754. addq %rax, %r9
  23755. movq %r9, 8(%rdi)
  23756. adcq %rdx, %r10
  23757. adcq $0x00, %r8
  23758. # A[2] * B
  23759. movq %rcx, %rax
  23760. xorq %r9, %r9
  23761. mulq 16(%rsi)
  23762. addq %rax, %r10
  23763. movq %r10, 16(%rdi)
  23764. adcq %rdx, %r8
  23765. adcq $0x00, %r9
  23766. # A[3] * B
  23767. movq %rcx, %rax
  23768. xorq %r10, %r10
  23769. mulq 24(%rsi)
  23770. addq %rax, %r8
  23771. movq %r8, 24(%rdi)
  23772. adcq %rdx, %r9
  23773. adcq $0x00, %r10
  23774. # A[4] * B
  23775. movq %rcx, %rax
  23776. xorq %r8, %r8
  23777. mulq 32(%rsi)
  23778. addq %rax, %r9
  23779. movq %r9, 32(%rdi)
  23780. adcq %rdx, %r10
  23781. adcq $0x00, %r8
  23782. # A[5] * B
  23783. movq %rcx, %rax
  23784. xorq %r9, %r9
  23785. mulq 40(%rsi)
  23786. addq %rax, %r10
  23787. movq %r10, 40(%rdi)
  23788. adcq %rdx, %r8
  23789. adcq $0x00, %r9
  23790. # A[6] * B
  23791. movq %rcx, %rax
  23792. xorq %r10, %r10
  23793. mulq 48(%rsi)
  23794. addq %rax, %r8
  23795. movq %r8, 48(%rdi)
  23796. adcq %rdx, %r9
  23797. adcq $0x00, %r10
  23798. # A[7] * B
  23799. movq %rcx, %rax
  23800. xorq %r8, %r8
  23801. mulq 56(%rsi)
  23802. addq %rax, %r9
  23803. movq %r9, 56(%rdi)
  23804. adcq %rdx, %r10
  23805. adcq $0x00, %r8
  23806. # A[8] * B
  23807. movq %rcx, %rax
  23808. xorq %r9, %r9
  23809. mulq 64(%rsi)
  23810. addq %rax, %r10
  23811. movq %r10, 64(%rdi)
  23812. adcq %rdx, %r8
  23813. adcq $0x00, %r9
  23814. # A[9] * B
  23815. movq %rcx, %rax
  23816. xorq %r10, %r10
  23817. mulq 72(%rsi)
  23818. addq %rax, %r8
  23819. movq %r8, 72(%rdi)
  23820. adcq %rdx, %r9
  23821. adcq $0x00, %r10
  23822. # A[10] * B
  23823. movq %rcx, %rax
  23824. xorq %r8, %r8
  23825. mulq 80(%rsi)
  23826. addq %rax, %r9
  23827. movq %r9, 80(%rdi)
  23828. adcq %rdx, %r10
  23829. adcq $0x00, %r8
  23830. # A[11] * B
  23831. movq %rcx, %rax
  23832. xorq %r9, %r9
  23833. mulq 88(%rsi)
  23834. addq %rax, %r10
  23835. movq %r10, 88(%rdi)
  23836. adcq %rdx, %r8
  23837. adcq $0x00, %r9
  23838. # A[12] * B
  23839. movq %rcx, %rax
  23840. xorq %r10, %r10
  23841. mulq 96(%rsi)
  23842. addq %rax, %r8
  23843. movq %r8, 96(%rdi)
  23844. adcq %rdx, %r9
  23845. adcq $0x00, %r10
  23846. # A[13] * B
  23847. movq %rcx, %rax
  23848. xorq %r8, %r8
  23849. mulq 104(%rsi)
  23850. addq %rax, %r9
  23851. movq %r9, 104(%rdi)
  23852. adcq %rdx, %r10
  23853. adcq $0x00, %r8
  23854. # A[14] * B
  23855. movq %rcx, %rax
  23856. xorq %r9, %r9
  23857. mulq 112(%rsi)
  23858. addq %rax, %r10
  23859. movq %r10, 112(%rdi)
  23860. adcq %rdx, %r8
  23861. adcq $0x00, %r9
  23862. # A[15] * B
  23863. movq %rcx, %rax
  23864. xorq %r10, %r10
  23865. mulq 120(%rsi)
  23866. addq %rax, %r8
  23867. movq %r8, 120(%rdi)
  23868. adcq %rdx, %r9
  23869. adcq $0x00, %r10
  23870. # A[16] * B
  23871. movq %rcx, %rax
  23872. xorq %r8, %r8
  23873. mulq 128(%rsi)
  23874. addq %rax, %r9
  23875. movq %r9, 128(%rdi)
  23876. adcq %rdx, %r10
  23877. adcq $0x00, %r8
  23878. # A[17] * B
  23879. movq %rcx, %rax
  23880. xorq %r9, %r9
  23881. mulq 136(%rsi)
  23882. addq %rax, %r10
  23883. movq %r10, 136(%rdi)
  23884. adcq %rdx, %r8
  23885. adcq $0x00, %r9
  23886. # A[18] * B
  23887. movq %rcx, %rax
  23888. xorq %r10, %r10
  23889. mulq 144(%rsi)
  23890. addq %rax, %r8
  23891. movq %r8, 144(%rdi)
  23892. adcq %rdx, %r9
  23893. adcq $0x00, %r10
  23894. # A[19] * B
  23895. movq %rcx, %rax
  23896. xorq %r8, %r8
  23897. mulq 152(%rsi)
  23898. addq %rax, %r9
  23899. movq %r9, 152(%rdi)
  23900. adcq %rdx, %r10
  23901. adcq $0x00, %r8
  23902. # A[20] * B
  23903. movq %rcx, %rax
  23904. xorq %r9, %r9
  23905. mulq 160(%rsi)
  23906. addq %rax, %r10
  23907. movq %r10, 160(%rdi)
  23908. adcq %rdx, %r8
  23909. adcq $0x00, %r9
  23910. # A[21] * B
  23911. movq %rcx, %rax
  23912. xorq %r10, %r10
  23913. mulq 168(%rsi)
  23914. addq %rax, %r8
  23915. movq %r8, 168(%rdi)
  23916. adcq %rdx, %r9
  23917. adcq $0x00, %r10
  23918. # A[22] * B
  23919. movq %rcx, %rax
  23920. xorq %r8, %r8
  23921. mulq 176(%rsi)
  23922. addq %rax, %r9
  23923. movq %r9, 176(%rdi)
  23924. adcq %rdx, %r10
  23925. adcq $0x00, %r8
  23926. # A[23] * B
  23927. movq %rcx, %rax
  23928. mulq 184(%rsi)
  23929. addq %rax, %r10
  23930. adcq %rdx, %r8
  23931. movq %r10, 184(%rdi)
  23932. movq %r8, 192(%rdi)
  23933. repz retq
  23934. #ifndef __APPLE__
  23935. .size sp_3072_mul_d_24,.-sp_3072_mul_d_24
  23936. #endif /* __APPLE__ */
  23937. #ifdef HAVE_INTEL_AVX2
  23938. /* Mul a by digit b into r. (r = a * b)
  23939. *
  23940. * r A single precision integer.
  23941. * a A single precision integer.
  23942. * b A single precision digit.
  23943. */
  23944. #ifndef __APPLE__
  23945. .text
  23946. .globl sp_3072_mul_d_avx2_24
  23947. .type sp_3072_mul_d_avx2_24,@function
  23948. .align 16
  23949. sp_3072_mul_d_avx2_24:
  23950. #else
  23951. .section __TEXT,__text
  23952. .globl _sp_3072_mul_d_avx2_24
  23953. .p2align 4
  23954. _sp_3072_mul_d_avx2_24:
  23955. #endif /* __APPLE__ */
  23956. movq %rdx, %rax
  23957. # A[0] * B
  23958. movq %rax, %rdx
  23959. xorq %r11, %r11
  23960. mulxq (%rsi), %r9, %r10
  23961. movq %r9, (%rdi)
  23962. # A[1] * B
  23963. mulxq 8(%rsi), %rcx, %r8
  23964. movq %r11, %r9
  23965. adcxq %rcx, %r10
  23966. movq %r10, 8(%rdi)
  23967. adoxq %r8, %r9
  23968. # A[2] * B
  23969. mulxq 16(%rsi), %rcx, %r8
  23970. movq %r11, %r10
  23971. adcxq %rcx, %r9
  23972. movq %r9, 16(%rdi)
  23973. adoxq %r8, %r10
  23974. # A[3] * B
  23975. mulxq 24(%rsi), %rcx, %r8
  23976. movq %r11, %r9
  23977. adcxq %rcx, %r10
  23978. movq %r10, 24(%rdi)
  23979. adoxq %r8, %r9
  23980. # A[4] * B
  23981. mulxq 32(%rsi), %rcx, %r8
  23982. movq %r11, %r10
  23983. adcxq %rcx, %r9
  23984. movq %r9, 32(%rdi)
  23985. adoxq %r8, %r10
  23986. # A[5] * B
  23987. mulxq 40(%rsi), %rcx, %r8
  23988. movq %r11, %r9
  23989. adcxq %rcx, %r10
  23990. movq %r10, 40(%rdi)
  23991. adoxq %r8, %r9
  23992. # A[6] * B
  23993. mulxq 48(%rsi), %rcx, %r8
  23994. movq %r11, %r10
  23995. adcxq %rcx, %r9
  23996. movq %r9, 48(%rdi)
  23997. adoxq %r8, %r10
  23998. # A[7] * B
  23999. mulxq 56(%rsi), %rcx, %r8
  24000. movq %r11, %r9
  24001. adcxq %rcx, %r10
  24002. movq %r10, 56(%rdi)
  24003. adoxq %r8, %r9
  24004. # A[8] * B
  24005. mulxq 64(%rsi), %rcx, %r8
  24006. movq %r11, %r10
  24007. adcxq %rcx, %r9
  24008. movq %r9, 64(%rdi)
  24009. adoxq %r8, %r10
  24010. # A[9] * B
  24011. mulxq 72(%rsi), %rcx, %r8
  24012. movq %r11, %r9
  24013. adcxq %rcx, %r10
  24014. movq %r10, 72(%rdi)
  24015. adoxq %r8, %r9
  24016. # A[10] * B
  24017. mulxq 80(%rsi), %rcx, %r8
  24018. movq %r11, %r10
  24019. adcxq %rcx, %r9
  24020. movq %r9, 80(%rdi)
  24021. adoxq %r8, %r10
  24022. # A[11] * B
  24023. mulxq 88(%rsi), %rcx, %r8
  24024. movq %r11, %r9
  24025. adcxq %rcx, %r10
  24026. movq %r10, 88(%rdi)
  24027. adoxq %r8, %r9
  24028. # A[12] * B
  24029. mulxq 96(%rsi), %rcx, %r8
  24030. movq %r11, %r10
  24031. adcxq %rcx, %r9
  24032. movq %r9, 96(%rdi)
  24033. adoxq %r8, %r10
  24034. # A[13] * B
  24035. mulxq 104(%rsi), %rcx, %r8
  24036. movq %r11, %r9
  24037. adcxq %rcx, %r10
  24038. movq %r10, 104(%rdi)
  24039. adoxq %r8, %r9
  24040. # A[14] * B
  24041. mulxq 112(%rsi), %rcx, %r8
  24042. movq %r11, %r10
  24043. adcxq %rcx, %r9
  24044. movq %r9, 112(%rdi)
  24045. adoxq %r8, %r10
  24046. # A[15] * B
  24047. mulxq 120(%rsi), %rcx, %r8
  24048. movq %r11, %r9
  24049. adcxq %rcx, %r10
  24050. movq %r10, 120(%rdi)
  24051. adoxq %r8, %r9
  24052. # A[16] * B
  24053. mulxq 128(%rsi), %rcx, %r8
  24054. movq %r11, %r10
  24055. adcxq %rcx, %r9
  24056. movq %r9, 128(%rdi)
  24057. adoxq %r8, %r10
  24058. # A[17] * B
  24059. mulxq 136(%rsi), %rcx, %r8
  24060. movq %r11, %r9
  24061. adcxq %rcx, %r10
  24062. movq %r10, 136(%rdi)
  24063. adoxq %r8, %r9
  24064. # A[18] * B
  24065. mulxq 144(%rsi), %rcx, %r8
  24066. movq %r11, %r10
  24067. adcxq %rcx, %r9
  24068. movq %r9, 144(%rdi)
  24069. adoxq %r8, %r10
  24070. # A[19] * B
  24071. mulxq 152(%rsi), %rcx, %r8
  24072. movq %r11, %r9
  24073. adcxq %rcx, %r10
  24074. movq %r10, 152(%rdi)
  24075. adoxq %r8, %r9
  24076. # A[20] * B
  24077. mulxq 160(%rsi), %rcx, %r8
  24078. movq %r11, %r10
  24079. adcxq %rcx, %r9
  24080. movq %r9, 160(%rdi)
  24081. adoxq %r8, %r10
  24082. # A[21] * B
  24083. mulxq 168(%rsi), %rcx, %r8
  24084. movq %r11, %r9
  24085. adcxq %rcx, %r10
  24086. movq %r10, 168(%rdi)
  24087. adoxq %r8, %r9
  24088. # A[22] * B
  24089. mulxq 176(%rsi), %rcx, %r8
  24090. movq %r11, %r10
  24091. adcxq %rcx, %r9
  24092. movq %r9, 176(%rdi)
  24093. adoxq %r8, %r10
  24094. # A[23] * B
  24095. mulxq 184(%rsi), %rcx, %r8
  24096. movq %r11, %r9
  24097. adcxq %rcx, %r10
  24098. adoxq %r8, %r9
  24099. adcxq %r11, %r9
  24100. movq %r10, 184(%rdi)
  24101. movq %r9, 192(%rdi)
  24102. repz retq
  24103. #ifndef __APPLE__
  24104. .size sp_3072_mul_d_avx2_24,.-sp_3072_mul_d_avx2_24
  24105. #endif /* __APPLE__ */
  24106. #endif /* HAVE_INTEL_AVX2 */
  24107. /* Compare a with b in constant time.
  24108. *
  24109. * a A single precision integer.
  24110. * b A single precision integer.
  24111. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  24112. * respectively.
  24113. */
  24114. #ifndef __APPLE__
  24115. .text
  24116. .globl sp_3072_cmp_24
  24117. .type sp_3072_cmp_24,@function
  24118. .align 16
  24119. sp_3072_cmp_24:
  24120. #else
  24121. .section __TEXT,__text
  24122. .globl _sp_3072_cmp_24
  24123. .p2align 4
  24124. _sp_3072_cmp_24:
  24125. #endif /* __APPLE__ */
  24126. xorq %rcx, %rcx
  24127. movq $-1, %rdx
  24128. movq $-1, %rax
  24129. movq $0x01, %r8
  24130. movq 184(%rdi), %r9
  24131. movq 184(%rsi), %r10
  24132. andq %rdx, %r9
  24133. andq %rdx, %r10
  24134. subq %r10, %r9
  24135. cmova %r8, %rax
  24136. cmovc %rdx, %rax
  24137. cmovnz %rcx, %rdx
  24138. movq 176(%rdi), %r9
  24139. movq 176(%rsi), %r10
  24140. andq %rdx, %r9
  24141. andq %rdx, %r10
  24142. subq %r10, %r9
  24143. cmova %r8, %rax
  24144. cmovc %rdx, %rax
  24145. cmovnz %rcx, %rdx
  24146. movq 168(%rdi), %r9
  24147. movq 168(%rsi), %r10
  24148. andq %rdx, %r9
  24149. andq %rdx, %r10
  24150. subq %r10, %r9
  24151. cmova %r8, %rax
  24152. cmovc %rdx, %rax
  24153. cmovnz %rcx, %rdx
  24154. movq 160(%rdi), %r9
  24155. movq 160(%rsi), %r10
  24156. andq %rdx, %r9
  24157. andq %rdx, %r10
  24158. subq %r10, %r9
  24159. cmova %r8, %rax
  24160. cmovc %rdx, %rax
  24161. cmovnz %rcx, %rdx
  24162. movq 152(%rdi), %r9
  24163. movq 152(%rsi), %r10
  24164. andq %rdx, %r9
  24165. andq %rdx, %r10
  24166. subq %r10, %r9
  24167. cmova %r8, %rax
  24168. cmovc %rdx, %rax
  24169. cmovnz %rcx, %rdx
  24170. movq 144(%rdi), %r9
  24171. movq 144(%rsi), %r10
  24172. andq %rdx, %r9
  24173. andq %rdx, %r10
  24174. subq %r10, %r9
  24175. cmova %r8, %rax
  24176. cmovc %rdx, %rax
  24177. cmovnz %rcx, %rdx
  24178. movq 136(%rdi), %r9
  24179. movq 136(%rsi), %r10
  24180. andq %rdx, %r9
  24181. andq %rdx, %r10
  24182. subq %r10, %r9
  24183. cmova %r8, %rax
  24184. cmovc %rdx, %rax
  24185. cmovnz %rcx, %rdx
  24186. movq 128(%rdi), %r9
  24187. movq 128(%rsi), %r10
  24188. andq %rdx, %r9
  24189. andq %rdx, %r10
  24190. subq %r10, %r9
  24191. cmova %r8, %rax
  24192. cmovc %rdx, %rax
  24193. cmovnz %rcx, %rdx
  24194. movq 120(%rdi), %r9
  24195. movq 120(%rsi), %r10
  24196. andq %rdx, %r9
  24197. andq %rdx, %r10
  24198. subq %r10, %r9
  24199. cmova %r8, %rax
  24200. cmovc %rdx, %rax
  24201. cmovnz %rcx, %rdx
  24202. movq 112(%rdi), %r9
  24203. movq 112(%rsi), %r10
  24204. andq %rdx, %r9
  24205. andq %rdx, %r10
  24206. subq %r10, %r9
  24207. cmova %r8, %rax
  24208. cmovc %rdx, %rax
  24209. cmovnz %rcx, %rdx
  24210. movq 104(%rdi), %r9
  24211. movq 104(%rsi), %r10
  24212. andq %rdx, %r9
  24213. andq %rdx, %r10
  24214. subq %r10, %r9
  24215. cmova %r8, %rax
  24216. cmovc %rdx, %rax
  24217. cmovnz %rcx, %rdx
  24218. movq 96(%rdi), %r9
  24219. movq 96(%rsi), %r10
  24220. andq %rdx, %r9
  24221. andq %rdx, %r10
  24222. subq %r10, %r9
  24223. cmova %r8, %rax
  24224. cmovc %rdx, %rax
  24225. cmovnz %rcx, %rdx
  24226. movq 88(%rdi), %r9
  24227. movq 88(%rsi), %r10
  24228. andq %rdx, %r9
  24229. andq %rdx, %r10
  24230. subq %r10, %r9
  24231. cmova %r8, %rax
  24232. cmovc %rdx, %rax
  24233. cmovnz %rcx, %rdx
  24234. movq 80(%rdi), %r9
  24235. movq 80(%rsi), %r10
  24236. andq %rdx, %r9
  24237. andq %rdx, %r10
  24238. subq %r10, %r9
  24239. cmova %r8, %rax
  24240. cmovc %rdx, %rax
  24241. cmovnz %rcx, %rdx
  24242. movq 72(%rdi), %r9
  24243. movq 72(%rsi), %r10
  24244. andq %rdx, %r9
  24245. andq %rdx, %r10
  24246. subq %r10, %r9
  24247. cmova %r8, %rax
  24248. cmovc %rdx, %rax
  24249. cmovnz %rcx, %rdx
  24250. movq 64(%rdi), %r9
  24251. movq 64(%rsi), %r10
  24252. andq %rdx, %r9
  24253. andq %rdx, %r10
  24254. subq %r10, %r9
  24255. cmova %r8, %rax
  24256. cmovc %rdx, %rax
  24257. cmovnz %rcx, %rdx
  24258. movq 56(%rdi), %r9
  24259. movq 56(%rsi), %r10
  24260. andq %rdx, %r9
  24261. andq %rdx, %r10
  24262. subq %r10, %r9
  24263. cmova %r8, %rax
  24264. cmovc %rdx, %rax
  24265. cmovnz %rcx, %rdx
  24266. movq 48(%rdi), %r9
  24267. movq 48(%rsi), %r10
  24268. andq %rdx, %r9
  24269. andq %rdx, %r10
  24270. subq %r10, %r9
  24271. cmova %r8, %rax
  24272. cmovc %rdx, %rax
  24273. cmovnz %rcx, %rdx
  24274. movq 40(%rdi), %r9
  24275. movq 40(%rsi), %r10
  24276. andq %rdx, %r9
  24277. andq %rdx, %r10
  24278. subq %r10, %r9
  24279. cmova %r8, %rax
  24280. cmovc %rdx, %rax
  24281. cmovnz %rcx, %rdx
  24282. movq 32(%rdi), %r9
  24283. movq 32(%rsi), %r10
  24284. andq %rdx, %r9
  24285. andq %rdx, %r10
  24286. subq %r10, %r9
  24287. cmova %r8, %rax
  24288. cmovc %rdx, %rax
  24289. cmovnz %rcx, %rdx
  24290. movq 24(%rdi), %r9
  24291. movq 24(%rsi), %r10
  24292. andq %rdx, %r9
  24293. andq %rdx, %r10
  24294. subq %r10, %r9
  24295. cmova %r8, %rax
  24296. cmovc %rdx, %rax
  24297. cmovnz %rcx, %rdx
  24298. movq 16(%rdi), %r9
  24299. movq 16(%rsi), %r10
  24300. andq %rdx, %r9
  24301. andq %rdx, %r10
  24302. subq %r10, %r9
  24303. cmova %r8, %rax
  24304. cmovc %rdx, %rax
  24305. cmovnz %rcx, %rdx
  24306. movq 8(%rdi), %r9
  24307. movq 8(%rsi), %r10
  24308. andq %rdx, %r9
  24309. andq %rdx, %r10
  24310. subq %r10, %r9
  24311. cmova %r8, %rax
  24312. cmovc %rdx, %rax
  24313. cmovnz %rcx, %rdx
  24314. movq (%rdi), %r9
  24315. movq (%rsi), %r10
  24316. andq %rdx, %r9
  24317. andq %rdx, %r10
  24318. subq %r10, %r9
  24319. cmova %r8, %rax
  24320. cmovc %rdx, %rax
  24321. cmovnz %rcx, %rdx
  24322. xorq %rdx, %rax
  24323. repz retq
  24324. #ifndef __APPLE__
  24325. .size sp_3072_cmp_24,.-sp_3072_cmp_24
  24326. #endif /* __APPLE__ */
  24327. #ifdef HAVE_INTEL_AVX2
  24328. /* Reduce the number back to 3072 bits using Montgomery reduction.
  24329. *
  24330. * a A single precision number to reduce in place.
  24331. * m The single precision number representing the modulus.
  24332. * mp The digit representing the negative inverse of m mod 2^n.
  24333. */
  24334. #ifndef __APPLE__
  24335. .text
  24336. .globl sp_3072_mont_reduce_avx2_24
  24337. .type sp_3072_mont_reduce_avx2_24,@function
  24338. .align 16
  24339. sp_3072_mont_reduce_avx2_24:
  24340. #else
  24341. .section __TEXT,__text
  24342. .globl _sp_3072_mont_reduce_avx2_24
  24343. .p2align 4
  24344. _sp_3072_mont_reduce_avx2_24:
  24345. #endif /* __APPLE__ */
  24346. pushq %r12
  24347. pushq %r13
  24348. pushq %r14
  24349. movq %rdx, %r8
  24350. xorq %r14, %r14
  24351. # i = 24
  24352. movq $24, %r9
  24353. movq (%rdi), %r13
  24354. addq $0x60, %rdi
  24355. xorq %r12, %r12
  24356. L_mont_loop_avx2_24:
  24357. # mu = a[i] * mp
  24358. movq %r13, %rdx
  24359. movq %r13, %r10
  24360. imulq %r8, %rdx
  24361. xorq %r12, %r12
  24362. # a[i+0] += m[0] * mu
  24363. mulxq (%rsi), %rax, %rcx
  24364. movq -88(%rdi), %r13
  24365. adcxq %rax, %r10
  24366. adoxq %rcx, %r13
  24367. # a[i+1] += m[1] * mu
  24368. mulxq 8(%rsi), %rax, %rcx
  24369. movq -80(%rdi), %r10
  24370. adcxq %rax, %r13
  24371. adoxq %rcx, %r10
  24372. # a[i+2] += m[2] * mu
  24373. mulxq 16(%rsi), %rax, %rcx
  24374. movq -72(%rdi), %r11
  24375. adcxq %rax, %r10
  24376. adoxq %rcx, %r11
  24377. movq %r10, -80(%rdi)
  24378. # a[i+3] += m[3] * mu
  24379. mulxq 24(%rsi), %rax, %rcx
  24380. movq -64(%rdi), %r10
  24381. adcxq %rax, %r11
  24382. adoxq %rcx, %r10
  24383. movq %r11, -72(%rdi)
  24384. # a[i+4] += m[4] * mu
  24385. mulxq 32(%rsi), %rax, %rcx
  24386. movq -56(%rdi), %r11
  24387. adcxq %rax, %r10
  24388. adoxq %rcx, %r11
  24389. movq %r10, -64(%rdi)
  24390. # a[i+5] += m[5] * mu
  24391. mulxq 40(%rsi), %rax, %rcx
  24392. movq -48(%rdi), %r10
  24393. adcxq %rax, %r11
  24394. adoxq %rcx, %r10
  24395. movq %r11, -56(%rdi)
  24396. # a[i+6] += m[6] * mu
  24397. mulxq 48(%rsi), %rax, %rcx
  24398. movq -40(%rdi), %r11
  24399. adcxq %rax, %r10
  24400. adoxq %rcx, %r11
  24401. movq %r10, -48(%rdi)
  24402. # a[i+7] += m[7] * mu
  24403. mulxq 56(%rsi), %rax, %rcx
  24404. movq -32(%rdi), %r10
  24405. adcxq %rax, %r11
  24406. adoxq %rcx, %r10
  24407. movq %r11, -40(%rdi)
  24408. # a[i+8] += m[8] * mu
  24409. mulxq 64(%rsi), %rax, %rcx
  24410. movq -24(%rdi), %r11
  24411. adcxq %rax, %r10
  24412. adoxq %rcx, %r11
  24413. movq %r10, -32(%rdi)
  24414. # a[i+9] += m[9] * mu
  24415. mulxq 72(%rsi), %rax, %rcx
  24416. movq -16(%rdi), %r10
  24417. adcxq %rax, %r11
  24418. adoxq %rcx, %r10
  24419. movq %r11, -24(%rdi)
  24420. # a[i+10] += m[10] * mu
  24421. mulxq 80(%rsi), %rax, %rcx
  24422. movq -8(%rdi), %r11
  24423. adcxq %rax, %r10
  24424. adoxq %rcx, %r11
  24425. movq %r10, -16(%rdi)
  24426. # a[i+11] += m[11] * mu
  24427. mulxq 88(%rsi), %rax, %rcx
  24428. movq (%rdi), %r10
  24429. adcxq %rax, %r11
  24430. adoxq %rcx, %r10
  24431. movq %r11, -8(%rdi)
  24432. # a[i+12] += m[12] * mu
  24433. mulxq 96(%rsi), %rax, %rcx
  24434. movq 8(%rdi), %r11
  24435. adcxq %rax, %r10
  24436. adoxq %rcx, %r11
  24437. movq %r10, (%rdi)
  24438. # a[i+13] += m[13] * mu
  24439. mulxq 104(%rsi), %rax, %rcx
  24440. movq 16(%rdi), %r10
  24441. adcxq %rax, %r11
  24442. adoxq %rcx, %r10
  24443. movq %r11, 8(%rdi)
  24444. # a[i+14] += m[14] * mu
  24445. mulxq 112(%rsi), %rax, %rcx
  24446. movq 24(%rdi), %r11
  24447. adcxq %rax, %r10
  24448. adoxq %rcx, %r11
  24449. movq %r10, 16(%rdi)
  24450. # a[i+15] += m[15] * mu
  24451. mulxq 120(%rsi), %rax, %rcx
  24452. movq 32(%rdi), %r10
  24453. adcxq %rax, %r11
  24454. adoxq %rcx, %r10
  24455. movq %r11, 24(%rdi)
  24456. # a[i+16] += m[16] * mu
  24457. mulxq 128(%rsi), %rax, %rcx
  24458. movq 40(%rdi), %r11
  24459. adcxq %rax, %r10
  24460. adoxq %rcx, %r11
  24461. movq %r10, 32(%rdi)
  24462. # a[i+17] += m[17] * mu
  24463. mulxq 136(%rsi), %rax, %rcx
  24464. movq 48(%rdi), %r10
  24465. adcxq %rax, %r11
  24466. adoxq %rcx, %r10
  24467. movq %r11, 40(%rdi)
  24468. # a[i+18] += m[18] * mu
  24469. mulxq 144(%rsi), %rax, %rcx
  24470. movq 56(%rdi), %r11
  24471. adcxq %rax, %r10
  24472. adoxq %rcx, %r11
  24473. movq %r10, 48(%rdi)
  24474. # a[i+19] += m[19] * mu
  24475. mulxq 152(%rsi), %rax, %rcx
  24476. movq 64(%rdi), %r10
  24477. adcxq %rax, %r11
  24478. adoxq %rcx, %r10
  24479. movq %r11, 56(%rdi)
  24480. # a[i+20] += m[20] * mu
  24481. mulxq 160(%rsi), %rax, %rcx
  24482. movq 72(%rdi), %r11
  24483. adcxq %rax, %r10
  24484. adoxq %rcx, %r11
  24485. movq %r10, 64(%rdi)
  24486. # a[i+21] += m[21] * mu
  24487. mulxq 168(%rsi), %rax, %rcx
  24488. movq 80(%rdi), %r10
  24489. adcxq %rax, %r11
  24490. adoxq %rcx, %r10
  24491. movq %r11, 72(%rdi)
  24492. # a[i+22] += m[22] * mu
  24493. mulxq 176(%rsi), %rax, %rcx
  24494. movq 88(%rdi), %r11
  24495. adcxq %rax, %r10
  24496. adoxq %rcx, %r11
  24497. movq %r10, 80(%rdi)
  24498. # a[i+23] += m[23] * mu
  24499. mulxq 184(%rsi), %rax, %rcx
  24500. movq 96(%rdi), %r10
  24501. adcxq %rax, %r11
  24502. adoxq %rcx, %r10
  24503. movq %r11, 88(%rdi)
  24504. adcxq %r14, %r10
  24505. movq %r10, 96(%rdi)
  24506. movq %r12, %r14
  24507. adoxq %r12, %r14
  24508. adcxq %r12, %r14
  24509. # a += 1
  24510. addq $8, %rdi
  24511. # i -= 1
  24512. subq $0x01, %r9
  24513. jnz L_mont_loop_avx2_24
  24514. subq $0x60, %rdi
  24515. negq %r14
  24516. movq %rdi, %r8
  24517. subq $0xc0, %rdi
  24518. movq (%rsi), %rcx
  24519. movq %r13, %rdx
  24520. pextq %r14, %rcx, %rcx
  24521. subq %rcx, %rdx
  24522. movq 8(%rsi), %rcx
  24523. movq 8(%r8), %rax
  24524. pextq %r14, %rcx, %rcx
  24525. movq %rdx, (%rdi)
  24526. sbbq %rcx, %rax
  24527. movq 16(%rsi), %rdx
  24528. movq 16(%r8), %rcx
  24529. pextq %r14, %rdx, %rdx
  24530. movq %rax, 8(%rdi)
  24531. sbbq %rdx, %rcx
  24532. movq 24(%rsi), %rax
  24533. movq 24(%r8), %rdx
  24534. pextq %r14, %rax, %rax
  24535. movq %rcx, 16(%rdi)
  24536. sbbq %rax, %rdx
  24537. movq 32(%rsi), %rcx
  24538. movq 32(%r8), %rax
  24539. pextq %r14, %rcx, %rcx
  24540. movq %rdx, 24(%rdi)
  24541. sbbq %rcx, %rax
  24542. movq 40(%rsi), %rdx
  24543. movq 40(%r8), %rcx
  24544. pextq %r14, %rdx, %rdx
  24545. movq %rax, 32(%rdi)
  24546. sbbq %rdx, %rcx
  24547. movq 48(%rsi), %rax
  24548. movq 48(%r8), %rdx
  24549. pextq %r14, %rax, %rax
  24550. movq %rcx, 40(%rdi)
  24551. sbbq %rax, %rdx
  24552. movq 56(%rsi), %rcx
  24553. movq 56(%r8), %rax
  24554. pextq %r14, %rcx, %rcx
  24555. movq %rdx, 48(%rdi)
  24556. sbbq %rcx, %rax
  24557. movq 64(%rsi), %rdx
  24558. movq 64(%r8), %rcx
  24559. pextq %r14, %rdx, %rdx
  24560. movq %rax, 56(%rdi)
  24561. sbbq %rdx, %rcx
  24562. movq 72(%rsi), %rax
  24563. movq 72(%r8), %rdx
  24564. pextq %r14, %rax, %rax
  24565. movq %rcx, 64(%rdi)
  24566. sbbq %rax, %rdx
  24567. movq 80(%rsi), %rcx
  24568. movq 80(%r8), %rax
  24569. pextq %r14, %rcx, %rcx
  24570. movq %rdx, 72(%rdi)
  24571. sbbq %rcx, %rax
  24572. movq 88(%rsi), %rdx
  24573. movq 88(%r8), %rcx
  24574. pextq %r14, %rdx, %rdx
  24575. movq %rax, 80(%rdi)
  24576. sbbq %rdx, %rcx
  24577. movq 96(%rsi), %rax
  24578. movq 96(%r8), %rdx
  24579. pextq %r14, %rax, %rax
  24580. movq %rcx, 88(%rdi)
  24581. sbbq %rax, %rdx
  24582. movq 104(%rsi), %rcx
  24583. movq 104(%r8), %rax
  24584. pextq %r14, %rcx, %rcx
  24585. movq %rdx, 96(%rdi)
  24586. sbbq %rcx, %rax
  24587. movq 112(%rsi), %rdx
  24588. movq 112(%r8), %rcx
  24589. pextq %r14, %rdx, %rdx
  24590. movq %rax, 104(%rdi)
  24591. sbbq %rdx, %rcx
  24592. movq 120(%rsi), %rax
  24593. movq 120(%r8), %rdx
  24594. pextq %r14, %rax, %rax
  24595. movq %rcx, 112(%rdi)
  24596. sbbq %rax, %rdx
  24597. movq 128(%rsi), %rcx
  24598. movq 128(%r8), %rax
  24599. pextq %r14, %rcx, %rcx
  24600. movq %rdx, 120(%rdi)
  24601. sbbq %rcx, %rax
  24602. movq 136(%rsi), %rdx
  24603. movq 136(%r8), %rcx
  24604. pextq %r14, %rdx, %rdx
  24605. movq %rax, 128(%rdi)
  24606. sbbq %rdx, %rcx
  24607. movq 144(%rsi), %rax
  24608. movq 144(%r8), %rdx
  24609. pextq %r14, %rax, %rax
  24610. movq %rcx, 136(%rdi)
  24611. sbbq %rax, %rdx
  24612. movq 152(%rsi), %rcx
  24613. movq 152(%r8), %rax
  24614. pextq %r14, %rcx, %rcx
  24615. movq %rdx, 144(%rdi)
  24616. sbbq %rcx, %rax
  24617. movq 160(%rsi), %rdx
  24618. movq 160(%r8), %rcx
  24619. pextq %r14, %rdx, %rdx
  24620. movq %rax, 152(%rdi)
  24621. sbbq %rdx, %rcx
  24622. movq 168(%rsi), %rax
  24623. movq 168(%r8), %rdx
  24624. pextq %r14, %rax, %rax
  24625. movq %rcx, 160(%rdi)
  24626. sbbq %rax, %rdx
  24627. movq 176(%rsi), %rcx
  24628. movq 176(%r8), %rax
  24629. pextq %r14, %rcx, %rcx
  24630. movq %rdx, 168(%rdi)
  24631. sbbq %rcx, %rax
  24632. movq 184(%rsi), %rdx
  24633. movq 184(%r8), %rcx
  24634. pextq %r14, %rdx, %rdx
  24635. movq %rax, 176(%rdi)
  24636. sbbq %rdx, %rcx
  24637. movq %rcx, 184(%rdi)
  24638. popq %r14
  24639. popq %r13
  24640. popq %r12
  24641. repz retq
  24642. #ifndef __APPLE__
  24643. .size sp_3072_mont_reduce_avx2_24,.-sp_3072_mont_reduce_avx2_24
  24644. #endif /* __APPLE__ */
  24645. #endif /* HAVE_INTEL_AVX2 */
  24646. /* Conditionally subtract b from a using the mask m.
  24647. * m is -1 to subtract and 0 when not copying.
  24648. *
  24649. * r A single precision number representing condition subtract result.
  24650. * a A single precision number to subtract from.
  24651. * b A single precision number to subtract.
  24652. * m Mask value to apply.
  24653. */
  24654. #ifndef __APPLE__
  24655. .text
  24656. .globl sp_3072_cond_sub_48
  24657. .type sp_3072_cond_sub_48,@function
  24658. .align 16
  24659. sp_3072_cond_sub_48:
  24660. #else
  24661. .section __TEXT,__text
  24662. .globl _sp_3072_cond_sub_48
  24663. .p2align 4
  24664. _sp_3072_cond_sub_48:
  24665. #endif /* __APPLE__ */
  24666. subq $0x180, %rsp
  24667. movq $0x00, %rax
  24668. movq (%rdx), %r8
  24669. movq 8(%rdx), %r9
  24670. andq %rcx, %r8
  24671. andq %rcx, %r9
  24672. movq %r8, (%rsp)
  24673. movq %r9, 8(%rsp)
  24674. movq 16(%rdx), %r8
  24675. movq 24(%rdx), %r9
  24676. andq %rcx, %r8
  24677. andq %rcx, %r9
  24678. movq %r8, 16(%rsp)
  24679. movq %r9, 24(%rsp)
  24680. movq 32(%rdx), %r8
  24681. movq 40(%rdx), %r9
  24682. andq %rcx, %r8
  24683. andq %rcx, %r9
  24684. movq %r8, 32(%rsp)
  24685. movq %r9, 40(%rsp)
  24686. movq 48(%rdx), %r8
  24687. movq 56(%rdx), %r9
  24688. andq %rcx, %r8
  24689. andq %rcx, %r9
  24690. movq %r8, 48(%rsp)
  24691. movq %r9, 56(%rsp)
  24692. movq 64(%rdx), %r8
  24693. movq 72(%rdx), %r9
  24694. andq %rcx, %r8
  24695. andq %rcx, %r9
  24696. movq %r8, 64(%rsp)
  24697. movq %r9, 72(%rsp)
  24698. movq 80(%rdx), %r8
  24699. movq 88(%rdx), %r9
  24700. andq %rcx, %r8
  24701. andq %rcx, %r9
  24702. movq %r8, 80(%rsp)
  24703. movq %r9, 88(%rsp)
  24704. movq 96(%rdx), %r8
  24705. movq 104(%rdx), %r9
  24706. andq %rcx, %r8
  24707. andq %rcx, %r9
  24708. movq %r8, 96(%rsp)
  24709. movq %r9, 104(%rsp)
  24710. movq 112(%rdx), %r8
  24711. movq 120(%rdx), %r9
  24712. andq %rcx, %r8
  24713. andq %rcx, %r9
  24714. movq %r8, 112(%rsp)
  24715. movq %r9, 120(%rsp)
  24716. movq 128(%rdx), %r8
  24717. movq 136(%rdx), %r9
  24718. andq %rcx, %r8
  24719. andq %rcx, %r9
  24720. movq %r8, 128(%rsp)
  24721. movq %r9, 136(%rsp)
  24722. movq 144(%rdx), %r8
  24723. movq 152(%rdx), %r9
  24724. andq %rcx, %r8
  24725. andq %rcx, %r9
  24726. movq %r8, 144(%rsp)
  24727. movq %r9, 152(%rsp)
  24728. movq 160(%rdx), %r8
  24729. movq 168(%rdx), %r9
  24730. andq %rcx, %r8
  24731. andq %rcx, %r9
  24732. movq %r8, 160(%rsp)
  24733. movq %r9, 168(%rsp)
  24734. movq 176(%rdx), %r8
  24735. movq 184(%rdx), %r9
  24736. andq %rcx, %r8
  24737. andq %rcx, %r9
  24738. movq %r8, 176(%rsp)
  24739. movq %r9, 184(%rsp)
  24740. movq 192(%rdx), %r8
  24741. movq 200(%rdx), %r9
  24742. andq %rcx, %r8
  24743. andq %rcx, %r9
  24744. movq %r8, 192(%rsp)
  24745. movq %r9, 200(%rsp)
  24746. movq 208(%rdx), %r8
  24747. movq 216(%rdx), %r9
  24748. andq %rcx, %r8
  24749. andq %rcx, %r9
  24750. movq %r8, 208(%rsp)
  24751. movq %r9, 216(%rsp)
  24752. movq 224(%rdx), %r8
  24753. movq 232(%rdx), %r9
  24754. andq %rcx, %r8
  24755. andq %rcx, %r9
  24756. movq %r8, 224(%rsp)
  24757. movq %r9, 232(%rsp)
  24758. movq 240(%rdx), %r8
  24759. movq 248(%rdx), %r9
  24760. andq %rcx, %r8
  24761. andq %rcx, %r9
  24762. movq %r8, 240(%rsp)
  24763. movq %r9, 248(%rsp)
  24764. movq 256(%rdx), %r8
  24765. movq 264(%rdx), %r9
  24766. andq %rcx, %r8
  24767. andq %rcx, %r9
  24768. movq %r8, 256(%rsp)
  24769. movq %r9, 264(%rsp)
  24770. movq 272(%rdx), %r8
  24771. movq 280(%rdx), %r9
  24772. andq %rcx, %r8
  24773. andq %rcx, %r9
  24774. movq %r8, 272(%rsp)
  24775. movq %r9, 280(%rsp)
  24776. movq 288(%rdx), %r8
  24777. movq 296(%rdx), %r9
  24778. andq %rcx, %r8
  24779. andq %rcx, %r9
  24780. movq %r8, 288(%rsp)
  24781. movq %r9, 296(%rsp)
  24782. movq 304(%rdx), %r8
  24783. movq 312(%rdx), %r9
  24784. andq %rcx, %r8
  24785. andq %rcx, %r9
  24786. movq %r8, 304(%rsp)
  24787. movq %r9, 312(%rsp)
  24788. movq 320(%rdx), %r8
  24789. movq 328(%rdx), %r9
  24790. andq %rcx, %r8
  24791. andq %rcx, %r9
  24792. movq %r8, 320(%rsp)
  24793. movq %r9, 328(%rsp)
  24794. movq 336(%rdx), %r8
  24795. movq 344(%rdx), %r9
  24796. andq %rcx, %r8
  24797. andq %rcx, %r9
  24798. movq %r8, 336(%rsp)
  24799. movq %r9, 344(%rsp)
  24800. movq 352(%rdx), %r8
  24801. movq 360(%rdx), %r9
  24802. andq %rcx, %r8
  24803. andq %rcx, %r9
  24804. movq %r8, 352(%rsp)
  24805. movq %r9, 360(%rsp)
  24806. movq 368(%rdx), %r8
  24807. movq 376(%rdx), %r9
  24808. andq %rcx, %r8
  24809. andq %rcx, %r9
  24810. movq %r8, 368(%rsp)
  24811. movq %r9, 376(%rsp)
  24812. movq (%rsi), %r8
  24813. movq (%rsp), %rdx
  24814. subq %rdx, %r8
  24815. movq 8(%rsi), %r9
  24816. movq 8(%rsp), %rdx
  24817. sbbq %rdx, %r9
  24818. movq %r8, (%rdi)
  24819. movq 16(%rsi), %r8
  24820. movq 16(%rsp), %rdx
  24821. sbbq %rdx, %r8
  24822. movq %r9, 8(%rdi)
  24823. movq 24(%rsi), %r9
  24824. movq 24(%rsp), %rdx
  24825. sbbq %rdx, %r9
  24826. movq %r8, 16(%rdi)
  24827. movq 32(%rsi), %r8
  24828. movq 32(%rsp), %rdx
  24829. sbbq %rdx, %r8
  24830. movq %r9, 24(%rdi)
  24831. movq 40(%rsi), %r9
  24832. movq 40(%rsp), %rdx
  24833. sbbq %rdx, %r9
  24834. movq %r8, 32(%rdi)
  24835. movq 48(%rsi), %r8
  24836. movq 48(%rsp), %rdx
  24837. sbbq %rdx, %r8
  24838. movq %r9, 40(%rdi)
  24839. movq 56(%rsi), %r9
  24840. movq 56(%rsp), %rdx
  24841. sbbq %rdx, %r9
  24842. movq %r8, 48(%rdi)
  24843. movq 64(%rsi), %r8
  24844. movq 64(%rsp), %rdx
  24845. sbbq %rdx, %r8
  24846. movq %r9, 56(%rdi)
  24847. movq 72(%rsi), %r9
  24848. movq 72(%rsp), %rdx
  24849. sbbq %rdx, %r9
  24850. movq %r8, 64(%rdi)
  24851. movq 80(%rsi), %r8
  24852. movq 80(%rsp), %rdx
  24853. sbbq %rdx, %r8
  24854. movq %r9, 72(%rdi)
  24855. movq 88(%rsi), %r9
  24856. movq 88(%rsp), %rdx
  24857. sbbq %rdx, %r9
  24858. movq %r8, 80(%rdi)
  24859. movq 96(%rsi), %r8
  24860. movq 96(%rsp), %rdx
  24861. sbbq %rdx, %r8
  24862. movq %r9, 88(%rdi)
  24863. movq 104(%rsi), %r9
  24864. movq 104(%rsp), %rdx
  24865. sbbq %rdx, %r9
  24866. movq %r8, 96(%rdi)
  24867. movq 112(%rsi), %r8
  24868. movq 112(%rsp), %rdx
  24869. sbbq %rdx, %r8
  24870. movq %r9, 104(%rdi)
  24871. movq 120(%rsi), %r9
  24872. movq 120(%rsp), %rdx
  24873. sbbq %rdx, %r9
  24874. movq %r8, 112(%rdi)
  24875. movq 128(%rsi), %r8
  24876. movq 128(%rsp), %rdx
  24877. sbbq %rdx, %r8
  24878. movq %r9, 120(%rdi)
  24879. movq 136(%rsi), %r9
  24880. movq 136(%rsp), %rdx
  24881. sbbq %rdx, %r9
  24882. movq %r8, 128(%rdi)
  24883. movq 144(%rsi), %r8
  24884. movq 144(%rsp), %rdx
  24885. sbbq %rdx, %r8
  24886. movq %r9, 136(%rdi)
  24887. movq 152(%rsi), %r9
  24888. movq 152(%rsp), %rdx
  24889. sbbq %rdx, %r9
  24890. movq %r8, 144(%rdi)
  24891. movq 160(%rsi), %r8
  24892. movq 160(%rsp), %rdx
  24893. sbbq %rdx, %r8
  24894. movq %r9, 152(%rdi)
  24895. movq 168(%rsi), %r9
  24896. movq 168(%rsp), %rdx
  24897. sbbq %rdx, %r9
  24898. movq %r8, 160(%rdi)
  24899. movq 176(%rsi), %r8
  24900. movq 176(%rsp), %rdx
  24901. sbbq %rdx, %r8
  24902. movq %r9, 168(%rdi)
  24903. movq 184(%rsi), %r9
  24904. movq 184(%rsp), %rdx
  24905. sbbq %rdx, %r9
  24906. movq %r8, 176(%rdi)
  24907. movq 192(%rsi), %r8
  24908. movq 192(%rsp), %rdx
  24909. sbbq %rdx, %r8
  24910. movq %r9, 184(%rdi)
  24911. movq 200(%rsi), %r9
  24912. movq 200(%rsp), %rdx
  24913. sbbq %rdx, %r9
  24914. movq %r8, 192(%rdi)
  24915. movq 208(%rsi), %r8
  24916. movq 208(%rsp), %rdx
  24917. sbbq %rdx, %r8
  24918. movq %r9, 200(%rdi)
  24919. movq 216(%rsi), %r9
  24920. movq 216(%rsp), %rdx
  24921. sbbq %rdx, %r9
  24922. movq %r8, 208(%rdi)
  24923. movq 224(%rsi), %r8
  24924. movq 224(%rsp), %rdx
  24925. sbbq %rdx, %r8
  24926. movq %r9, 216(%rdi)
  24927. movq 232(%rsi), %r9
  24928. movq 232(%rsp), %rdx
  24929. sbbq %rdx, %r9
  24930. movq %r8, 224(%rdi)
  24931. movq 240(%rsi), %r8
  24932. movq 240(%rsp), %rdx
  24933. sbbq %rdx, %r8
  24934. movq %r9, 232(%rdi)
  24935. movq 248(%rsi), %r9
  24936. movq 248(%rsp), %rdx
  24937. sbbq %rdx, %r9
  24938. movq %r8, 240(%rdi)
  24939. movq 256(%rsi), %r8
  24940. movq 256(%rsp), %rdx
  24941. sbbq %rdx, %r8
  24942. movq %r9, 248(%rdi)
  24943. movq 264(%rsi), %r9
  24944. movq 264(%rsp), %rdx
  24945. sbbq %rdx, %r9
  24946. movq %r8, 256(%rdi)
  24947. movq 272(%rsi), %r8
  24948. movq 272(%rsp), %rdx
  24949. sbbq %rdx, %r8
  24950. movq %r9, 264(%rdi)
  24951. movq 280(%rsi), %r9
  24952. movq 280(%rsp), %rdx
  24953. sbbq %rdx, %r9
  24954. movq %r8, 272(%rdi)
  24955. movq 288(%rsi), %r8
  24956. movq 288(%rsp), %rdx
  24957. sbbq %rdx, %r8
  24958. movq %r9, 280(%rdi)
  24959. movq 296(%rsi), %r9
  24960. movq 296(%rsp), %rdx
  24961. sbbq %rdx, %r9
  24962. movq %r8, 288(%rdi)
  24963. movq 304(%rsi), %r8
  24964. movq 304(%rsp), %rdx
  24965. sbbq %rdx, %r8
  24966. movq %r9, 296(%rdi)
  24967. movq 312(%rsi), %r9
  24968. movq 312(%rsp), %rdx
  24969. sbbq %rdx, %r9
  24970. movq %r8, 304(%rdi)
  24971. movq 320(%rsi), %r8
  24972. movq 320(%rsp), %rdx
  24973. sbbq %rdx, %r8
  24974. movq %r9, 312(%rdi)
  24975. movq 328(%rsi), %r9
  24976. movq 328(%rsp), %rdx
  24977. sbbq %rdx, %r9
  24978. movq %r8, 320(%rdi)
  24979. movq 336(%rsi), %r8
  24980. movq 336(%rsp), %rdx
  24981. sbbq %rdx, %r8
  24982. movq %r9, 328(%rdi)
  24983. movq 344(%rsi), %r9
  24984. movq 344(%rsp), %rdx
  24985. sbbq %rdx, %r9
  24986. movq %r8, 336(%rdi)
  24987. movq 352(%rsi), %r8
  24988. movq 352(%rsp), %rdx
  24989. sbbq %rdx, %r8
  24990. movq %r9, 344(%rdi)
  24991. movq 360(%rsi), %r9
  24992. movq 360(%rsp), %rdx
  24993. sbbq %rdx, %r9
  24994. movq %r8, 352(%rdi)
  24995. movq 368(%rsi), %r8
  24996. movq 368(%rsp), %rdx
  24997. sbbq %rdx, %r8
  24998. movq %r9, 360(%rdi)
  24999. movq 376(%rsi), %r9
  25000. movq 376(%rsp), %rdx
  25001. sbbq %rdx, %r9
  25002. movq %r8, 368(%rdi)
  25003. movq %r9, 376(%rdi)
  25004. sbbq $0x00, %rax
  25005. addq $0x180, %rsp
  25006. repz retq
  25007. #ifndef __APPLE__
  25008. .size sp_3072_cond_sub_48,.-sp_3072_cond_sub_48
  25009. #endif /* __APPLE__ */
  25010. /* Reduce the number back to 3072 bits using Montgomery reduction.
  25011. *
  25012. * a A single precision number to reduce in place.
  25013. * m The single precision number representing the modulus.
  25014. * mp The digit representing the negative inverse of m mod 2^n.
  25015. */
  25016. #ifndef __APPLE__
  25017. .text
  25018. .globl sp_3072_mont_reduce_48
  25019. .type sp_3072_mont_reduce_48,@function
  25020. .align 16
  25021. sp_3072_mont_reduce_48:
  25022. #else
  25023. .section __TEXT,__text
  25024. .globl _sp_3072_mont_reduce_48
  25025. .p2align 4
  25026. _sp_3072_mont_reduce_48:
  25027. #endif /* __APPLE__ */
  25028. pushq %r12
  25029. pushq %r13
  25030. pushq %r14
  25031. pushq %r15
  25032. movq %rdx, %rcx
  25033. xorq %r15, %r15
  25034. # i = 48
  25035. movq $48, %r8
  25036. movq (%rdi), %r13
  25037. movq 8(%rdi), %r14
  25038. L_mont_loop_48:
  25039. # mu = a[i] * mp
  25040. movq %r13, %r11
  25041. imulq %rcx, %r11
  25042. # a[i+0] += m[0] * mu
  25043. movq %r11, %rax
  25044. xorq %r10, %r10
  25045. mulq (%rsi)
  25046. addq %rax, %r13
  25047. adcq %rdx, %r10
  25048. # a[i+1] += m[1] * mu
  25049. movq %r11, %rax
  25050. xorq %r9, %r9
  25051. mulq 8(%rsi)
  25052. movq %r14, %r13
  25053. addq %rax, %r13
  25054. adcq %rdx, %r9
  25055. addq %r10, %r13
  25056. adcq $0x00, %r9
  25057. # a[i+2] += m[2] * mu
  25058. movq %r11, %rax
  25059. xorq %r10, %r10
  25060. mulq 16(%rsi)
  25061. movq 16(%rdi), %r14
  25062. addq %rax, %r14
  25063. adcq %rdx, %r10
  25064. addq %r9, %r14
  25065. adcq $0x00, %r10
  25066. # a[i+3] += m[3] * mu
  25067. movq %r11, %rax
  25068. xorq %r9, %r9
  25069. mulq 24(%rsi)
  25070. movq 24(%rdi), %r12
  25071. addq %rax, %r12
  25072. adcq %rdx, %r9
  25073. addq %r10, %r12
  25074. movq %r12, 24(%rdi)
  25075. adcq $0x00, %r9
  25076. # a[i+4] += m[4] * mu
  25077. movq %r11, %rax
  25078. xorq %r10, %r10
  25079. mulq 32(%rsi)
  25080. movq 32(%rdi), %r12
  25081. addq %rax, %r12
  25082. adcq %rdx, %r10
  25083. addq %r9, %r12
  25084. movq %r12, 32(%rdi)
  25085. adcq $0x00, %r10
  25086. # a[i+5] += m[5] * mu
  25087. movq %r11, %rax
  25088. xorq %r9, %r9
  25089. mulq 40(%rsi)
  25090. movq 40(%rdi), %r12
  25091. addq %rax, %r12
  25092. adcq %rdx, %r9
  25093. addq %r10, %r12
  25094. movq %r12, 40(%rdi)
  25095. adcq $0x00, %r9
  25096. # a[i+6] += m[6] * mu
  25097. movq %r11, %rax
  25098. xorq %r10, %r10
  25099. mulq 48(%rsi)
  25100. movq 48(%rdi), %r12
  25101. addq %rax, %r12
  25102. adcq %rdx, %r10
  25103. addq %r9, %r12
  25104. movq %r12, 48(%rdi)
  25105. adcq $0x00, %r10
  25106. # a[i+7] += m[7] * mu
  25107. movq %r11, %rax
  25108. xorq %r9, %r9
  25109. mulq 56(%rsi)
  25110. movq 56(%rdi), %r12
  25111. addq %rax, %r12
  25112. adcq %rdx, %r9
  25113. addq %r10, %r12
  25114. movq %r12, 56(%rdi)
  25115. adcq $0x00, %r9
  25116. # a[i+8] += m[8] * mu
  25117. movq %r11, %rax
  25118. xorq %r10, %r10
  25119. mulq 64(%rsi)
  25120. movq 64(%rdi), %r12
  25121. addq %rax, %r12
  25122. adcq %rdx, %r10
  25123. addq %r9, %r12
  25124. movq %r12, 64(%rdi)
  25125. adcq $0x00, %r10
  25126. # a[i+9] += m[9] * mu
  25127. movq %r11, %rax
  25128. xorq %r9, %r9
  25129. mulq 72(%rsi)
  25130. movq 72(%rdi), %r12
  25131. addq %rax, %r12
  25132. adcq %rdx, %r9
  25133. addq %r10, %r12
  25134. movq %r12, 72(%rdi)
  25135. adcq $0x00, %r9
  25136. # a[i+10] += m[10] * mu
  25137. movq %r11, %rax
  25138. xorq %r10, %r10
  25139. mulq 80(%rsi)
  25140. movq 80(%rdi), %r12
  25141. addq %rax, %r12
  25142. adcq %rdx, %r10
  25143. addq %r9, %r12
  25144. movq %r12, 80(%rdi)
  25145. adcq $0x00, %r10
  25146. # a[i+11] += m[11] * mu
  25147. movq %r11, %rax
  25148. xorq %r9, %r9
  25149. mulq 88(%rsi)
  25150. movq 88(%rdi), %r12
  25151. addq %rax, %r12
  25152. adcq %rdx, %r9
  25153. addq %r10, %r12
  25154. movq %r12, 88(%rdi)
  25155. adcq $0x00, %r9
  25156. # a[i+12] += m[12] * mu
  25157. movq %r11, %rax
  25158. xorq %r10, %r10
  25159. mulq 96(%rsi)
  25160. movq 96(%rdi), %r12
  25161. addq %rax, %r12
  25162. adcq %rdx, %r10
  25163. addq %r9, %r12
  25164. movq %r12, 96(%rdi)
  25165. adcq $0x00, %r10
  25166. # a[i+13] += m[13] * mu
  25167. movq %r11, %rax
  25168. xorq %r9, %r9
  25169. mulq 104(%rsi)
  25170. movq 104(%rdi), %r12
  25171. addq %rax, %r12
  25172. adcq %rdx, %r9
  25173. addq %r10, %r12
  25174. movq %r12, 104(%rdi)
  25175. adcq $0x00, %r9
  25176. # a[i+14] += m[14] * mu
  25177. movq %r11, %rax
  25178. xorq %r10, %r10
  25179. mulq 112(%rsi)
  25180. movq 112(%rdi), %r12
  25181. addq %rax, %r12
  25182. adcq %rdx, %r10
  25183. addq %r9, %r12
  25184. movq %r12, 112(%rdi)
  25185. adcq $0x00, %r10
  25186. # a[i+15] += m[15] * mu
  25187. movq %r11, %rax
  25188. xorq %r9, %r9
  25189. mulq 120(%rsi)
  25190. movq 120(%rdi), %r12
  25191. addq %rax, %r12
  25192. adcq %rdx, %r9
  25193. addq %r10, %r12
  25194. movq %r12, 120(%rdi)
  25195. adcq $0x00, %r9
  25196. # a[i+16] += m[16] * mu
  25197. movq %r11, %rax
  25198. xorq %r10, %r10
  25199. mulq 128(%rsi)
  25200. movq 128(%rdi), %r12
  25201. addq %rax, %r12
  25202. adcq %rdx, %r10
  25203. addq %r9, %r12
  25204. movq %r12, 128(%rdi)
  25205. adcq $0x00, %r10
  25206. # a[i+17] += m[17] * mu
  25207. movq %r11, %rax
  25208. xorq %r9, %r9
  25209. mulq 136(%rsi)
  25210. movq 136(%rdi), %r12
  25211. addq %rax, %r12
  25212. adcq %rdx, %r9
  25213. addq %r10, %r12
  25214. movq %r12, 136(%rdi)
  25215. adcq $0x00, %r9
  25216. # a[i+18] += m[18] * mu
  25217. movq %r11, %rax
  25218. xorq %r10, %r10
  25219. mulq 144(%rsi)
  25220. movq 144(%rdi), %r12
  25221. addq %rax, %r12
  25222. adcq %rdx, %r10
  25223. addq %r9, %r12
  25224. movq %r12, 144(%rdi)
  25225. adcq $0x00, %r10
  25226. # a[i+19] += m[19] * mu
  25227. movq %r11, %rax
  25228. xorq %r9, %r9
  25229. mulq 152(%rsi)
  25230. movq 152(%rdi), %r12
  25231. addq %rax, %r12
  25232. adcq %rdx, %r9
  25233. addq %r10, %r12
  25234. movq %r12, 152(%rdi)
  25235. adcq $0x00, %r9
  25236. # a[i+20] += m[20] * mu
  25237. movq %r11, %rax
  25238. xorq %r10, %r10
  25239. mulq 160(%rsi)
  25240. movq 160(%rdi), %r12
  25241. addq %rax, %r12
  25242. adcq %rdx, %r10
  25243. addq %r9, %r12
  25244. movq %r12, 160(%rdi)
  25245. adcq $0x00, %r10
  25246. # a[i+21] += m[21] * mu
  25247. movq %r11, %rax
  25248. xorq %r9, %r9
  25249. mulq 168(%rsi)
  25250. movq 168(%rdi), %r12
  25251. addq %rax, %r12
  25252. adcq %rdx, %r9
  25253. addq %r10, %r12
  25254. movq %r12, 168(%rdi)
  25255. adcq $0x00, %r9
  25256. # a[i+22] += m[22] * mu
  25257. movq %r11, %rax
  25258. xorq %r10, %r10
  25259. mulq 176(%rsi)
  25260. movq 176(%rdi), %r12
  25261. addq %rax, %r12
  25262. adcq %rdx, %r10
  25263. addq %r9, %r12
  25264. movq %r12, 176(%rdi)
  25265. adcq $0x00, %r10
  25266. # a[i+23] += m[23] * mu
  25267. movq %r11, %rax
  25268. xorq %r9, %r9
  25269. mulq 184(%rsi)
  25270. movq 184(%rdi), %r12
  25271. addq %rax, %r12
  25272. adcq %rdx, %r9
  25273. addq %r10, %r12
  25274. movq %r12, 184(%rdi)
  25275. adcq $0x00, %r9
  25276. # a[i+24] += m[24] * mu
  25277. movq %r11, %rax
  25278. xorq %r10, %r10
  25279. mulq 192(%rsi)
  25280. movq 192(%rdi), %r12
  25281. addq %rax, %r12
  25282. adcq %rdx, %r10
  25283. addq %r9, %r12
  25284. movq %r12, 192(%rdi)
  25285. adcq $0x00, %r10
  25286. # a[i+25] += m[25] * mu
  25287. movq %r11, %rax
  25288. xorq %r9, %r9
  25289. mulq 200(%rsi)
  25290. movq 200(%rdi), %r12
  25291. addq %rax, %r12
  25292. adcq %rdx, %r9
  25293. addq %r10, %r12
  25294. movq %r12, 200(%rdi)
  25295. adcq $0x00, %r9
  25296. # a[i+26] += m[26] * mu
  25297. movq %r11, %rax
  25298. xorq %r10, %r10
  25299. mulq 208(%rsi)
  25300. movq 208(%rdi), %r12
  25301. addq %rax, %r12
  25302. adcq %rdx, %r10
  25303. addq %r9, %r12
  25304. movq %r12, 208(%rdi)
  25305. adcq $0x00, %r10
  25306. # a[i+27] += m[27] * mu
  25307. movq %r11, %rax
  25308. xorq %r9, %r9
  25309. mulq 216(%rsi)
  25310. movq 216(%rdi), %r12
  25311. addq %rax, %r12
  25312. adcq %rdx, %r9
  25313. addq %r10, %r12
  25314. movq %r12, 216(%rdi)
  25315. adcq $0x00, %r9
  25316. # a[i+28] += m[28] * mu
  25317. movq %r11, %rax
  25318. xorq %r10, %r10
  25319. mulq 224(%rsi)
  25320. movq 224(%rdi), %r12
  25321. addq %rax, %r12
  25322. adcq %rdx, %r10
  25323. addq %r9, %r12
  25324. movq %r12, 224(%rdi)
  25325. adcq $0x00, %r10
  25326. # a[i+29] += m[29] * mu
  25327. movq %r11, %rax
  25328. xorq %r9, %r9
  25329. mulq 232(%rsi)
  25330. movq 232(%rdi), %r12
  25331. addq %rax, %r12
  25332. adcq %rdx, %r9
  25333. addq %r10, %r12
  25334. movq %r12, 232(%rdi)
  25335. adcq $0x00, %r9
  25336. # a[i+30] += m[30] * mu
  25337. movq %r11, %rax
  25338. xorq %r10, %r10
  25339. mulq 240(%rsi)
  25340. movq 240(%rdi), %r12
  25341. addq %rax, %r12
  25342. adcq %rdx, %r10
  25343. addq %r9, %r12
  25344. movq %r12, 240(%rdi)
  25345. adcq $0x00, %r10
  25346. # a[i+31] += m[31] * mu
  25347. movq %r11, %rax
  25348. xorq %r9, %r9
  25349. mulq 248(%rsi)
  25350. movq 248(%rdi), %r12
  25351. addq %rax, %r12
  25352. adcq %rdx, %r9
  25353. addq %r10, %r12
  25354. movq %r12, 248(%rdi)
  25355. adcq $0x00, %r9
  25356. # a[i+32] += m[32] * mu
  25357. movq %r11, %rax
  25358. xorq %r10, %r10
  25359. mulq 256(%rsi)
  25360. movq 256(%rdi), %r12
  25361. addq %rax, %r12
  25362. adcq %rdx, %r10
  25363. addq %r9, %r12
  25364. movq %r12, 256(%rdi)
  25365. adcq $0x00, %r10
  25366. # a[i+33] += m[33] * mu
  25367. movq %r11, %rax
  25368. xorq %r9, %r9
  25369. mulq 264(%rsi)
  25370. movq 264(%rdi), %r12
  25371. addq %rax, %r12
  25372. adcq %rdx, %r9
  25373. addq %r10, %r12
  25374. movq %r12, 264(%rdi)
  25375. adcq $0x00, %r9
  25376. # a[i+34] += m[34] * mu
  25377. movq %r11, %rax
  25378. xorq %r10, %r10
  25379. mulq 272(%rsi)
  25380. movq 272(%rdi), %r12
  25381. addq %rax, %r12
  25382. adcq %rdx, %r10
  25383. addq %r9, %r12
  25384. movq %r12, 272(%rdi)
  25385. adcq $0x00, %r10
  25386. # a[i+35] += m[35] * mu
  25387. movq %r11, %rax
  25388. xorq %r9, %r9
  25389. mulq 280(%rsi)
  25390. movq 280(%rdi), %r12
  25391. addq %rax, %r12
  25392. adcq %rdx, %r9
  25393. addq %r10, %r12
  25394. movq %r12, 280(%rdi)
  25395. adcq $0x00, %r9
  25396. # a[i+36] += m[36] * mu
  25397. movq %r11, %rax
  25398. xorq %r10, %r10
  25399. mulq 288(%rsi)
  25400. movq 288(%rdi), %r12
  25401. addq %rax, %r12
  25402. adcq %rdx, %r10
  25403. addq %r9, %r12
  25404. movq %r12, 288(%rdi)
  25405. adcq $0x00, %r10
  25406. # a[i+37] += m[37] * mu
  25407. movq %r11, %rax
  25408. xorq %r9, %r9
  25409. mulq 296(%rsi)
  25410. movq 296(%rdi), %r12
  25411. addq %rax, %r12
  25412. adcq %rdx, %r9
  25413. addq %r10, %r12
  25414. movq %r12, 296(%rdi)
  25415. adcq $0x00, %r9
  25416. # a[i+38] += m[38] * mu
  25417. movq %r11, %rax
  25418. xorq %r10, %r10
  25419. mulq 304(%rsi)
  25420. movq 304(%rdi), %r12
  25421. addq %rax, %r12
  25422. adcq %rdx, %r10
  25423. addq %r9, %r12
  25424. movq %r12, 304(%rdi)
  25425. adcq $0x00, %r10
  25426. # a[i+39] += m[39] * mu
  25427. movq %r11, %rax
  25428. xorq %r9, %r9
  25429. mulq 312(%rsi)
  25430. movq 312(%rdi), %r12
  25431. addq %rax, %r12
  25432. adcq %rdx, %r9
  25433. addq %r10, %r12
  25434. movq %r12, 312(%rdi)
  25435. adcq $0x00, %r9
  25436. # a[i+40] += m[40] * mu
  25437. movq %r11, %rax
  25438. xorq %r10, %r10
  25439. mulq 320(%rsi)
  25440. movq 320(%rdi), %r12
  25441. addq %rax, %r12
  25442. adcq %rdx, %r10
  25443. addq %r9, %r12
  25444. movq %r12, 320(%rdi)
  25445. adcq $0x00, %r10
  25446. # a[i+41] += m[41] * mu
  25447. movq %r11, %rax
  25448. xorq %r9, %r9
  25449. mulq 328(%rsi)
  25450. movq 328(%rdi), %r12
  25451. addq %rax, %r12
  25452. adcq %rdx, %r9
  25453. addq %r10, %r12
  25454. movq %r12, 328(%rdi)
  25455. adcq $0x00, %r9
  25456. # a[i+42] += m[42] * mu
  25457. movq %r11, %rax
  25458. xorq %r10, %r10
  25459. mulq 336(%rsi)
  25460. movq 336(%rdi), %r12
  25461. addq %rax, %r12
  25462. adcq %rdx, %r10
  25463. addq %r9, %r12
  25464. movq %r12, 336(%rdi)
  25465. adcq $0x00, %r10
  25466. # a[i+43] += m[43] * mu
  25467. movq %r11, %rax
  25468. xorq %r9, %r9
  25469. mulq 344(%rsi)
  25470. movq 344(%rdi), %r12
  25471. addq %rax, %r12
  25472. adcq %rdx, %r9
  25473. addq %r10, %r12
  25474. movq %r12, 344(%rdi)
  25475. adcq $0x00, %r9
  25476. # a[i+44] += m[44] * mu
  25477. movq %r11, %rax
  25478. xorq %r10, %r10
  25479. mulq 352(%rsi)
  25480. movq 352(%rdi), %r12
  25481. addq %rax, %r12
  25482. adcq %rdx, %r10
  25483. addq %r9, %r12
  25484. movq %r12, 352(%rdi)
  25485. adcq $0x00, %r10
  25486. # a[i+45] += m[45] * mu
  25487. movq %r11, %rax
  25488. xorq %r9, %r9
  25489. mulq 360(%rsi)
  25490. movq 360(%rdi), %r12
  25491. addq %rax, %r12
  25492. adcq %rdx, %r9
  25493. addq %r10, %r12
  25494. movq %r12, 360(%rdi)
  25495. adcq $0x00, %r9
  25496. # a[i+46] += m[46] * mu
  25497. movq %r11, %rax
  25498. xorq %r10, %r10
  25499. mulq 368(%rsi)
  25500. movq 368(%rdi), %r12
  25501. addq %rax, %r12
  25502. adcq %rdx, %r10
  25503. addq %r9, %r12
  25504. movq %r12, 368(%rdi)
  25505. adcq $0x00, %r10
  25506. # a[i+47] += m[47] * mu
  25507. movq %r11, %rax
  25508. mulq 376(%rsi)
  25509. movq 376(%rdi), %r12
  25510. addq %rax, %r10
  25511. adcq %r15, %rdx
  25512. movq $0x00, %r15
  25513. adcq $0x00, %r15
  25514. addq %r10, %r12
  25515. movq %r12, 376(%rdi)
  25516. adcq %rdx, 384(%rdi)
  25517. adcq $0x00, %r15
  25518. # i -= 1
  25519. addq $8, %rdi
  25520. decq %r8
  25521. jnz L_mont_loop_48
  25522. movq %r13, (%rdi)
  25523. movq %r14, 8(%rdi)
  25524. negq %r15
  25525. movq %r15, %rcx
  25526. movq %rsi, %rdx
  25527. movq %rdi, %rsi
  25528. movq %rdi, %rdi
  25529. subq $0x180, %rdi
  25530. #ifndef __APPLE__
  25531. callq sp_3072_cond_sub_48@plt
  25532. #else
  25533. callq _sp_3072_cond_sub_48
  25534. #endif /* __APPLE__ */
  25535. popq %r15
  25536. popq %r14
  25537. popq %r13
  25538. popq %r12
  25539. repz retq
  25540. #ifndef __APPLE__
  25541. .size sp_3072_mont_reduce_48,.-sp_3072_mont_reduce_48
  25542. #endif /* __APPLE__ */
  25543. /* Conditionally subtract b from a using the mask m.
  25544. * m is -1 to subtract and 0 when not copying.
  25545. *
  25546. * r A single precision number representing condition subtract result.
  25547. * a A single precision number to subtract from.
  25548. * b A single precision number to subtract.
  25549. * m Mask value to apply.
  25550. */
  25551. #ifndef __APPLE__
  25552. .text
  25553. .globl sp_3072_cond_sub_avx2_48
  25554. .type sp_3072_cond_sub_avx2_48,@function
  25555. .align 16
  25556. sp_3072_cond_sub_avx2_48:
  25557. #else
  25558. .section __TEXT,__text
  25559. .globl _sp_3072_cond_sub_avx2_48
  25560. .p2align 4
  25561. _sp_3072_cond_sub_avx2_48:
  25562. #endif /* __APPLE__ */
  25563. movq $0x00, %rax
  25564. movq (%rdx), %r10
  25565. movq (%rsi), %r8
  25566. pextq %rcx, %r10, %r10
  25567. subq %r10, %r8
  25568. movq 8(%rdx), %r10
  25569. movq 8(%rsi), %r9
  25570. pextq %rcx, %r10, %r10
  25571. movq %r8, (%rdi)
  25572. sbbq %r10, %r9
  25573. movq 16(%rdx), %r8
  25574. movq 16(%rsi), %r10
  25575. pextq %rcx, %r8, %r8
  25576. movq %r9, 8(%rdi)
  25577. sbbq %r8, %r10
  25578. movq 24(%rdx), %r9
  25579. movq 24(%rsi), %r8
  25580. pextq %rcx, %r9, %r9
  25581. movq %r10, 16(%rdi)
  25582. sbbq %r9, %r8
  25583. movq 32(%rdx), %r10
  25584. movq 32(%rsi), %r9
  25585. pextq %rcx, %r10, %r10
  25586. movq %r8, 24(%rdi)
  25587. sbbq %r10, %r9
  25588. movq 40(%rdx), %r8
  25589. movq 40(%rsi), %r10
  25590. pextq %rcx, %r8, %r8
  25591. movq %r9, 32(%rdi)
  25592. sbbq %r8, %r10
  25593. movq 48(%rdx), %r9
  25594. movq 48(%rsi), %r8
  25595. pextq %rcx, %r9, %r9
  25596. movq %r10, 40(%rdi)
  25597. sbbq %r9, %r8
  25598. movq 56(%rdx), %r10
  25599. movq 56(%rsi), %r9
  25600. pextq %rcx, %r10, %r10
  25601. movq %r8, 48(%rdi)
  25602. sbbq %r10, %r9
  25603. movq 64(%rdx), %r8
  25604. movq 64(%rsi), %r10
  25605. pextq %rcx, %r8, %r8
  25606. movq %r9, 56(%rdi)
  25607. sbbq %r8, %r10
  25608. movq 72(%rdx), %r9
  25609. movq 72(%rsi), %r8
  25610. pextq %rcx, %r9, %r9
  25611. movq %r10, 64(%rdi)
  25612. sbbq %r9, %r8
  25613. movq 80(%rdx), %r10
  25614. movq 80(%rsi), %r9
  25615. pextq %rcx, %r10, %r10
  25616. movq %r8, 72(%rdi)
  25617. sbbq %r10, %r9
  25618. movq 88(%rdx), %r8
  25619. movq 88(%rsi), %r10
  25620. pextq %rcx, %r8, %r8
  25621. movq %r9, 80(%rdi)
  25622. sbbq %r8, %r10
  25623. movq 96(%rdx), %r9
  25624. movq 96(%rsi), %r8
  25625. pextq %rcx, %r9, %r9
  25626. movq %r10, 88(%rdi)
  25627. sbbq %r9, %r8
  25628. movq 104(%rdx), %r10
  25629. movq 104(%rsi), %r9
  25630. pextq %rcx, %r10, %r10
  25631. movq %r8, 96(%rdi)
  25632. sbbq %r10, %r9
  25633. movq 112(%rdx), %r8
  25634. movq 112(%rsi), %r10
  25635. pextq %rcx, %r8, %r8
  25636. movq %r9, 104(%rdi)
  25637. sbbq %r8, %r10
  25638. movq 120(%rdx), %r9
  25639. movq 120(%rsi), %r8
  25640. pextq %rcx, %r9, %r9
  25641. movq %r10, 112(%rdi)
  25642. sbbq %r9, %r8
  25643. movq 128(%rdx), %r10
  25644. movq 128(%rsi), %r9
  25645. pextq %rcx, %r10, %r10
  25646. movq %r8, 120(%rdi)
  25647. sbbq %r10, %r9
  25648. movq 136(%rdx), %r8
  25649. movq 136(%rsi), %r10
  25650. pextq %rcx, %r8, %r8
  25651. movq %r9, 128(%rdi)
  25652. sbbq %r8, %r10
  25653. movq 144(%rdx), %r9
  25654. movq 144(%rsi), %r8
  25655. pextq %rcx, %r9, %r9
  25656. movq %r10, 136(%rdi)
  25657. sbbq %r9, %r8
  25658. movq 152(%rdx), %r10
  25659. movq 152(%rsi), %r9
  25660. pextq %rcx, %r10, %r10
  25661. movq %r8, 144(%rdi)
  25662. sbbq %r10, %r9
  25663. movq 160(%rdx), %r8
  25664. movq 160(%rsi), %r10
  25665. pextq %rcx, %r8, %r8
  25666. movq %r9, 152(%rdi)
  25667. sbbq %r8, %r10
  25668. movq 168(%rdx), %r9
  25669. movq 168(%rsi), %r8
  25670. pextq %rcx, %r9, %r9
  25671. movq %r10, 160(%rdi)
  25672. sbbq %r9, %r8
  25673. movq 176(%rdx), %r10
  25674. movq 176(%rsi), %r9
  25675. pextq %rcx, %r10, %r10
  25676. movq %r8, 168(%rdi)
  25677. sbbq %r10, %r9
  25678. movq 184(%rdx), %r8
  25679. movq 184(%rsi), %r10
  25680. pextq %rcx, %r8, %r8
  25681. movq %r9, 176(%rdi)
  25682. sbbq %r8, %r10
  25683. movq 192(%rdx), %r9
  25684. movq 192(%rsi), %r8
  25685. pextq %rcx, %r9, %r9
  25686. movq %r10, 184(%rdi)
  25687. sbbq %r9, %r8
  25688. movq 200(%rdx), %r10
  25689. movq 200(%rsi), %r9
  25690. pextq %rcx, %r10, %r10
  25691. movq %r8, 192(%rdi)
  25692. sbbq %r10, %r9
  25693. movq 208(%rdx), %r8
  25694. movq 208(%rsi), %r10
  25695. pextq %rcx, %r8, %r8
  25696. movq %r9, 200(%rdi)
  25697. sbbq %r8, %r10
  25698. movq 216(%rdx), %r9
  25699. movq 216(%rsi), %r8
  25700. pextq %rcx, %r9, %r9
  25701. movq %r10, 208(%rdi)
  25702. sbbq %r9, %r8
  25703. movq 224(%rdx), %r10
  25704. movq 224(%rsi), %r9
  25705. pextq %rcx, %r10, %r10
  25706. movq %r8, 216(%rdi)
  25707. sbbq %r10, %r9
  25708. movq 232(%rdx), %r8
  25709. movq 232(%rsi), %r10
  25710. pextq %rcx, %r8, %r8
  25711. movq %r9, 224(%rdi)
  25712. sbbq %r8, %r10
  25713. movq 240(%rdx), %r9
  25714. movq 240(%rsi), %r8
  25715. pextq %rcx, %r9, %r9
  25716. movq %r10, 232(%rdi)
  25717. sbbq %r9, %r8
  25718. movq 248(%rdx), %r10
  25719. movq 248(%rsi), %r9
  25720. pextq %rcx, %r10, %r10
  25721. movq %r8, 240(%rdi)
  25722. sbbq %r10, %r9
  25723. movq 256(%rdx), %r8
  25724. movq 256(%rsi), %r10
  25725. pextq %rcx, %r8, %r8
  25726. movq %r9, 248(%rdi)
  25727. sbbq %r8, %r10
  25728. movq 264(%rdx), %r9
  25729. movq 264(%rsi), %r8
  25730. pextq %rcx, %r9, %r9
  25731. movq %r10, 256(%rdi)
  25732. sbbq %r9, %r8
  25733. movq 272(%rdx), %r10
  25734. movq 272(%rsi), %r9
  25735. pextq %rcx, %r10, %r10
  25736. movq %r8, 264(%rdi)
  25737. sbbq %r10, %r9
  25738. movq 280(%rdx), %r8
  25739. movq 280(%rsi), %r10
  25740. pextq %rcx, %r8, %r8
  25741. movq %r9, 272(%rdi)
  25742. sbbq %r8, %r10
  25743. movq 288(%rdx), %r9
  25744. movq 288(%rsi), %r8
  25745. pextq %rcx, %r9, %r9
  25746. movq %r10, 280(%rdi)
  25747. sbbq %r9, %r8
  25748. movq 296(%rdx), %r10
  25749. movq 296(%rsi), %r9
  25750. pextq %rcx, %r10, %r10
  25751. movq %r8, 288(%rdi)
  25752. sbbq %r10, %r9
  25753. movq 304(%rdx), %r8
  25754. movq 304(%rsi), %r10
  25755. pextq %rcx, %r8, %r8
  25756. movq %r9, 296(%rdi)
  25757. sbbq %r8, %r10
  25758. movq 312(%rdx), %r9
  25759. movq 312(%rsi), %r8
  25760. pextq %rcx, %r9, %r9
  25761. movq %r10, 304(%rdi)
  25762. sbbq %r9, %r8
  25763. movq 320(%rdx), %r10
  25764. movq 320(%rsi), %r9
  25765. pextq %rcx, %r10, %r10
  25766. movq %r8, 312(%rdi)
  25767. sbbq %r10, %r9
  25768. movq 328(%rdx), %r8
  25769. movq 328(%rsi), %r10
  25770. pextq %rcx, %r8, %r8
  25771. movq %r9, 320(%rdi)
  25772. sbbq %r8, %r10
  25773. movq 336(%rdx), %r9
  25774. movq 336(%rsi), %r8
  25775. pextq %rcx, %r9, %r9
  25776. movq %r10, 328(%rdi)
  25777. sbbq %r9, %r8
  25778. movq 344(%rdx), %r10
  25779. movq 344(%rsi), %r9
  25780. pextq %rcx, %r10, %r10
  25781. movq %r8, 336(%rdi)
  25782. sbbq %r10, %r9
  25783. movq 352(%rdx), %r8
  25784. movq 352(%rsi), %r10
  25785. pextq %rcx, %r8, %r8
  25786. movq %r9, 344(%rdi)
  25787. sbbq %r8, %r10
  25788. movq 360(%rdx), %r9
  25789. movq 360(%rsi), %r8
  25790. pextq %rcx, %r9, %r9
  25791. movq %r10, 352(%rdi)
  25792. sbbq %r9, %r8
  25793. movq 368(%rdx), %r10
  25794. movq 368(%rsi), %r9
  25795. pextq %rcx, %r10, %r10
  25796. movq %r8, 360(%rdi)
  25797. sbbq %r10, %r9
  25798. movq 376(%rdx), %r8
  25799. movq 376(%rsi), %r10
  25800. pextq %rcx, %r8, %r8
  25801. movq %r9, 368(%rdi)
  25802. sbbq %r8, %r10
  25803. movq %r10, 376(%rdi)
  25804. sbbq $0x00, %rax
  25805. repz retq
  25806. #ifndef __APPLE__
  25807. .size sp_3072_cond_sub_avx2_48,.-sp_3072_cond_sub_avx2_48
  25808. #endif /* __APPLE__ */
  25809. #ifdef HAVE_INTEL_AVX2
  25810. /* Mul a by digit b into r. (r = a * b)
  25811. *
  25812. * r A single precision integer.
  25813. * a A single precision integer.
  25814. * b A single precision digit.
  25815. */
  25816. #ifndef __APPLE__
  25817. .text
  25818. .globl sp_3072_mul_d_avx2_48
  25819. .type sp_3072_mul_d_avx2_48,@function
  25820. .align 16
  25821. sp_3072_mul_d_avx2_48:
  25822. #else
  25823. .section __TEXT,__text
  25824. .globl _sp_3072_mul_d_avx2_48
  25825. .p2align 4
  25826. _sp_3072_mul_d_avx2_48:
  25827. #endif /* __APPLE__ */
  25828. movq %rdx, %rax
  25829. # A[0] * B
  25830. movq %rax, %rdx
  25831. xorq %r11, %r11
  25832. mulxq (%rsi), %r9, %r10
  25833. movq %r9, (%rdi)
  25834. # A[1] * B
  25835. mulxq 8(%rsi), %rcx, %r8
  25836. movq %r11, %r9
  25837. adcxq %rcx, %r10
  25838. movq %r10, 8(%rdi)
  25839. adoxq %r8, %r9
  25840. # A[2] * B
  25841. mulxq 16(%rsi), %rcx, %r8
  25842. movq %r11, %r10
  25843. adcxq %rcx, %r9
  25844. movq %r9, 16(%rdi)
  25845. adoxq %r8, %r10
  25846. # A[3] * B
  25847. mulxq 24(%rsi), %rcx, %r8
  25848. movq %r11, %r9
  25849. adcxq %rcx, %r10
  25850. movq %r10, 24(%rdi)
  25851. adoxq %r8, %r9
  25852. # A[4] * B
  25853. mulxq 32(%rsi), %rcx, %r8
  25854. movq %r11, %r10
  25855. adcxq %rcx, %r9
  25856. movq %r9, 32(%rdi)
  25857. adoxq %r8, %r10
  25858. # A[5] * B
  25859. mulxq 40(%rsi), %rcx, %r8
  25860. movq %r11, %r9
  25861. adcxq %rcx, %r10
  25862. movq %r10, 40(%rdi)
  25863. adoxq %r8, %r9
  25864. # A[6] * B
  25865. mulxq 48(%rsi), %rcx, %r8
  25866. movq %r11, %r10
  25867. adcxq %rcx, %r9
  25868. movq %r9, 48(%rdi)
  25869. adoxq %r8, %r10
  25870. # A[7] * B
  25871. mulxq 56(%rsi), %rcx, %r8
  25872. movq %r11, %r9
  25873. adcxq %rcx, %r10
  25874. movq %r10, 56(%rdi)
  25875. adoxq %r8, %r9
  25876. # A[8] * B
  25877. mulxq 64(%rsi), %rcx, %r8
  25878. movq %r11, %r10
  25879. adcxq %rcx, %r9
  25880. movq %r9, 64(%rdi)
  25881. adoxq %r8, %r10
  25882. # A[9] * B
  25883. mulxq 72(%rsi), %rcx, %r8
  25884. movq %r11, %r9
  25885. adcxq %rcx, %r10
  25886. movq %r10, 72(%rdi)
  25887. adoxq %r8, %r9
  25888. # A[10] * B
  25889. mulxq 80(%rsi), %rcx, %r8
  25890. movq %r11, %r10
  25891. adcxq %rcx, %r9
  25892. movq %r9, 80(%rdi)
  25893. adoxq %r8, %r10
  25894. # A[11] * B
  25895. mulxq 88(%rsi), %rcx, %r8
  25896. movq %r11, %r9
  25897. adcxq %rcx, %r10
  25898. movq %r10, 88(%rdi)
  25899. adoxq %r8, %r9
  25900. # A[12] * B
  25901. mulxq 96(%rsi), %rcx, %r8
  25902. movq %r11, %r10
  25903. adcxq %rcx, %r9
  25904. movq %r9, 96(%rdi)
  25905. adoxq %r8, %r10
  25906. # A[13] * B
  25907. mulxq 104(%rsi), %rcx, %r8
  25908. movq %r11, %r9
  25909. adcxq %rcx, %r10
  25910. movq %r10, 104(%rdi)
  25911. adoxq %r8, %r9
  25912. # A[14] * B
  25913. mulxq 112(%rsi), %rcx, %r8
  25914. movq %r11, %r10
  25915. adcxq %rcx, %r9
  25916. movq %r9, 112(%rdi)
  25917. adoxq %r8, %r10
  25918. # A[15] * B
  25919. mulxq 120(%rsi), %rcx, %r8
  25920. movq %r11, %r9
  25921. adcxq %rcx, %r10
  25922. movq %r10, 120(%rdi)
  25923. adoxq %r8, %r9
  25924. # A[16] * B
  25925. mulxq 128(%rsi), %rcx, %r8
  25926. movq %r11, %r10
  25927. adcxq %rcx, %r9
  25928. movq %r9, 128(%rdi)
  25929. adoxq %r8, %r10
  25930. # A[17] * B
  25931. mulxq 136(%rsi), %rcx, %r8
  25932. movq %r11, %r9
  25933. adcxq %rcx, %r10
  25934. movq %r10, 136(%rdi)
  25935. adoxq %r8, %r9
  25936. # A[18] * B
  25937. mulxq 144(%rsi), %rcx, %r8
  25938. movq %r11, %r10
  25939. adcxq %rcx, %r9
  25940. movq %r9, 144(%rdi)
  25941. adoxq %r8, %r10
  25942. # A[19] * B
  25943. mulxq 152(%rsi), %rcx, %r8
  25944. movq %r11, %r9
  25945. adcxq %rcx, %r10
  25946. movq %r10, 152(%rdi)
  25947. adoxq %r8, %r9
  25948. # A[20] * B
  25949. mulxq 160(%rsi), %rcx, %r8
  25950. movq %r11, %r10
  25951. adcxq %rcx, %r9
  25952. movq %r9, 160(%rdi)
  25953. adoxq %r8, %r10
  25954. # A[21] * B
  25955. mulxq 168(%rsi), %rcx, %r8
  25956. movq %r11, %r9
  25957. adcxq %rcx, %r10
  25958. movq %r10, 168(%rdi)
  25959. adoxq %r8, %r9
  25960. # A[22] * B
  25961. mulxq 176(%rsi), %rcx, %r8
  25962. movq %r11, %r10
  25963. adcxq %rcx, %r9
  25964. movq %r9, 176(%rdi)
  25965. adoxq %r8, %r10
  25966. # A[23] * B
  25967. mulxq 184(%rsi), %rcx, %r8
  25968. movq %r11, %r9
  25969. adcxq %rcx, %r10
  25970. movq %r10, 184(%rdi)
  25971. adoxq %r8, %r9
  25972. # A[24] * B
  25973. mulxq 192(%rsi), %rcx, %r8
  25974. movq %r11, %r10
  25975. adcxq %rcx, %r9
  25976. movq %r9, 192(%rdi)
  25977. adoxq %r8, %r10
  25978. # A[25] * B
  25979. mulxq 200(%rsi), %rcx, %r8
  25980. movq %r11, %r9
  25981. adcxq %rcx, %r10
  25982. movq %r10, 200(%rdi)
  25983. adoxq %r8, %r9
  25984. # A[26] * B
  25985. mulxq 208(%rsi), %rcx, %r8
  25986. movq %r11, %r10
  25987. adcxq %rcx, %r9
  25988. movq %r9, 208(%rdi)
  25989. adoxq %r8, %r10
  25990. # A[27] * B
  25991. mulxq 216(%rsi), %rcx, %r8
  25992. movq %r11, %r9
  25993. adcxq %rcx, %r10
  25994. movq %r10, 216(%rdi)
  25995. adoxq %r8, %r9
  25996. # A[28] * B
  25997. mulxq 224(%rsi), %rcx, %r8
  25998. movq %r11, %r10
  25999. adcxq %rcx, %r9
  26000. movq %r9, 224(%rdi)
  26001. adoxq %r8, %r10
  26002. # A[29] * B
  26003. mulxq 232(%rsi), %rcx, %r8
  26004. movq %r11, %r9
  26005. adcxq %rcx, %r10
  26006. movq %r10, 232(%rdi)
  26007. adoxq %r8, %r9
  26008. # A[30] * B
  26009. mulxq 240(%rsi), %rcx, %r8
  26010. movq %r11, %r10
  26011. adcxq %rcx, %r9
  26012. movq %r9, 240(%rdi)
  26013. adoxq %r8, %r10
  26014. # A[31] * B
  26015. mulxq 248(%rsi), %rcx, %r8
  26016. movq %r11, %r9
  26017. adcxq %rcx, %r10
  26018. movq %r10, 248(%rdi)
  26019. adoxq %r8, %r9
  26020. # A[32] * B
  26021. mulxq 256(%rsi), %rcx, %r8
  26022. movq %r11, %r10
  26023. adcxq %rcx, %r9
  26024. movq %r9, 256(%rdi)
  26025. adoxq %r8, %r10
  26026. # A[33] * B
  26027. mulxq 264(%rsi), %rcx, %r8
  26028. movq %r11, %r9
  26029. adcxq %rcx, %r10
  26030. movq %r10, 264(%rdi)
  26031. adoxq %r8, %r9
  26032. # A[34] * B
  26033. mulxq 272(%rsi), %rcx, %r8
  26034. movq %r11, %r10
  26035. adcxq %rcx, %r9
  26036. movq %r9, 272(%rdi)
  26037. adoxq %r8, %r10
  26038. # A[35] * B
  26039. mulxq 280(%rsi), %rcx, %r8
  26040. movq %r11, %r9
  26041. adcxq %rcx, %r10
  26042. movq %r10, 280(%rdi)
  26043. adoxq %r8, %r9
  26044. # A[36] * B
  26045. mulxq 288(%rsi), %rcx, %r8
  26046. movq %r11, %r10
  26047. adcxq %rcx, %r9
  26048. movq %r9, 288(%rdi)
  26049. adoxq %r8, %r10
  26050. # A[37] * B
  26051. mulxq 296(%rsi), %rcx, %r8
  26052. movq %r11, %r9
  26053. adcxq %rcx, %r10
  26054. movq %r10, 296(%rdi)
  26055. adoxq %r8, %r9
  26056. # A[38] * B
  26057. mulxq 304(%rsi), %rcx, %r8
  26058. movq %r11, %r10
  26059. adcxq %rcx, %r9
  26060. movq %r9, 304(%rdi)
  26061. adoxq %r8, %r10
  26062. # A[39] * B
  26063. mulxq 312(%rsi), %rcx, %r8
  26064. movq %r11, %r9
  26065. adcxq %rcx, %r10
  26066. movq %r10, 312(%rdi)
  26067. adoxq %r8, %r9
  26068. # A[40] * B
  26069. mulxq 320(%rsi), %rcx, %r8
  26070. movq %r11, %r10
  26071. adcxq %rcx, %r9
  26072. movq %r9, 320(%rdi)
  26073. adoxq %r8, %r10
  26074. # A[41] * B
  26075. mulxq 328(%rsi), %rcx, %r8
  26076. movq %r11, %r9
  26077. adcxq %rcx, %r10
  26078. movq %r10, 328(%rdi)
  26079. adoxq %r8, %r9
  26080. # A[42] * B
  26081. mulxq 336(%rsi), %rcx, %r8
  26082. movq %r11, %r10
  26083. adcxq %rcx, %r9
  26084. movq %r9, 336(%rdi)
  26085. adoxq %r8, %r10
  26086. # A[43] * B
  26087. mulxq 344(%rsi), %rcx, %r8
  26088. movq %r11, %r9
  26089. adcxq %rcx, %r10
  26090. movq %r10, 344(%rdi)
  26091. adoxq %r8, %r9
  26092. # A[44] * B
  26093. mulxq 352(%rsi), %rcx, %r8
  26094. movq %r11, %r10
  26095. adcxq %rcx, %r9
  26096. movq %r9, 352(%rdi)
  26097. adoxq %r8, %r10
  26098. # A[45] * B
  26099. mulxq 360(%rsi), %rcx, %r8
  26100. movq %r11, %r9
  26101. adcxq %rcx, %r10
  26102. movq %r10, 360(%rdi)
  26103. adoxq %r8, %r9
  26104. # A[46] * B
  26105. mulxq 368(%rsi), %rcx, %r8
  26106. movq %r11, %r10
  26107. adcxq %rcx, %r9
  26108. movq %r9, 368(%rdi)
  26109. adoxq %r8, %r10
  26110. # A[47] * B
  26111. mulxq 376(%rsi), %rcx, %r8
  26112. movq %r11, %r9
  26113. adcxq %rcx, %r10
  26114. adoxq %r8, %r9
  26115. adcxq %r11, %r9
  26116. movq %r10, 376(%rdi)
  26117. movq %r9, 384(%rdi)
  26118. repz retq
  26119. #ifndef __APPLE__
  26120. .size sp_3072_mul_d_avx2_48,.-sp_3072_mul_d_avx2_48
  26121. #endif /* __APPLE__ */
  26122. #endif /* HAVE_INTEL_AVX2 */
  26123. /* Compare a with b in constant time.
  26124. *
  26125. * a A single precision integer.
  26126. * b A single precision integer.
  26127. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  26128. * respectively.
  26129. */
  26130. #ifndef __APPLE__
  26131. .text
  26132. .globl sp_3072_cmp_48
  26133. .type sp_3072_cmp_48,@function
  26134. .align 16
  26135. sp_3072_cmp_48:
  26136. #else
  26137. .section __TEXT,__text
  26138. .globl _sp_3072_cmp_48
  26139. .p2align 4
  26140. _sp_3072_cmp_48:
  26141. #endif /* __APPLE__ */
  26142. xorq %rcx, %rcx
  26143. movq $-1, %rdx
  26144. movq $-1, %rax
  26145. movq $0x01, %r8
  26146. movq 376(%rdi), %r9
  26147. movq 376(%rsi), %r10
  26148. andq %rdx, %r9
  26149. andq %rdx, %r10
  26150. subq %r10, %r9
  26151. cmova %r8, %rax
  26152. cmovc %rdx, %rax
  26153. cmovnz %rcx, %rdx
  26154. movq 368(%rdi), %r9
  26155. movq 368(%rsi), %r10
  26156. andq %rdx, %r9
  26157. andq %rdx, %r10
  26158. subq %r10, %r9
  26159. cmova %r8, %rax
  26160. cmovc %rdx, %rax
  26161. cmovnz %rcx, %rdx
  26162. movq 360(%rdi), %r9
  26163. movq 360(%rsi), %r10
  26164. andq %rdx, %r9
  26165. andq %rdx, %r10
  26166. subq %r10, %r9
  26167. cmova %r8, %rax
  26168. cmovc %rdx, %rax
  26169. cmovnz %rcx, %rdx
  26170. movq 352(%rdi), %r9
  26171. movq 352(%rsi), %r10
  26172. andq %rdx, %r9
  26173. andq %rdx, %r10
  26174. subq %r10, %r9
  26175. cmova %r8, %rax
  26176. cmovc %rdx, %rax
  26177. cmovnz %rcx, %rdx
  26178. movq 344(%rdi), %r9
  26179. movq 344(%rsi), %r10
  26180. andq %rdx, %r9
  26181. andq %rdx, %r10
  26182. subq %r10, %r9
  26183. cmova %r8, %rax
  26184. cmovc %rdx, %rax
  26185. cmovnz %rcx, %rdx
  26186. movq 336(%rdi), %r9
  26187. movq 336(%rsi), %r10
  26188. andq %rdx, %r9
  26189. andq %rdx, %r10
  26190. subq %r10, %r9
  26191. cmova %r8, %rax
  26192. cmovc %rdx, %rax
  26193. cmovnz %rcx, %rdx
  26194. movq 328(%rdi), %r9
  26195. movq 328(%rsi), %r10
  26196. andq %rdx, %r9
  26197. andq %rdx, %r10
  26198. subq %r10, %r9
  26199. cmova %r8, %rax
  26200. cmovc %rdx, %rax
  26201. cmovnz %rcx, %rdx
  26202. movq 320(%rdi), %r9
  26203. movq 320(%rsi), %r10
  26204. andq %rdx, %r9
  26205. andq %rdx, %r10
  26206. subq %r10, %r9
  26207. cmova %r8, %rax
  26208. cmovc %rdx, %rax
  26209. cmovnz %rcx, %rdx
  26210. movq 312(%rdi), %r9
  26211. movq 312(%rsi), %r10
  26212. andq %rdx, %r9
  26213. andq %rdx, %r10
  26214. subq %r10, %r9
  26215. cmova %r8, %rax
  26216. cmovc %rdx, %rax
  26217. cmovnz %rcx, %rdx
  26218. movq 304(%rdi), %r9
  26219. movq 304(%rsi), %r10
  26220. andq %rdx, %r9
  26221. andq %rdx, %r10
  26222. subq %r10, %r9
  26223. cmova %r8, %rax
  26224. cmovc %rdx, %rax
  26225. cmovnz %rcx, %rdx
  26226. movq 296(%rdi), %r9
  26227. movq 296(%rsi), %r10
  26228. andq %rdx, %r9
  26229. andq %rdx, %r10
  26230. subq %r10, %r9
  26231. cmova %r8, %rax
  26232. cmovc %rdx, %rax
  26233. cmovnz %rcx, %rdx
  26234. movq 288(%rdi), %r9
  26235. movq 288(%rsi), %r10
  26236. andq %rdx, %r9
  26237. andq %rdx, %r10
  26238. subq %r10, %r9
  26239. cmova %r8, %rax
  26240. cmovc %rdx, %rax
  26241. cmovnz %rcx, %rdx
  26242. movq 280(%rdi), %r9
  26243. movq 280(%rsi), %r10
  26244. andq %rdx, %r9
  26245. andq %rdx, %r10
  26246. subq %r10, %r9
  26247. cmova %r8, %rax
  26248. cmovc %rdx, %rax
  26249. cmovnz %rcx, %rdx
  26250. movq 272(%rdi), %r9
  26251. movq 272(%rsi), %r10
  26252. andq %rdx, %r9
  26253. andq %rdx, %r10
  26254. subq %r10, %r9
  26255. cmova %r8, %rax
  26256. cmovc %rdx, %rax
  26257. cmovnz %rcx, %rdx
  26258. movq 264(%rdi), %r9
  26259. movq 264(%rsi), %r10
  26260. andq %rdx, %r9
  26261. andq %rdx, %r10
  26262. subq %r10, %r9
  26263. cmova %r8, %rax
  26264. cmovc %rdx, %rax
  26265. cmovnz %rcx, %rdx
  26266. movq 256(%rdi), %r9
  26267. movq 256(%rsi), %r10
  26268. andq %rdx, %r9
  26269. andq %rdx, %r10
  26270. subq %r10, %r9
  26271. cmova %r8, %rax
  26272. cmovc %rdx, %rax
  26273. cmovnz %rcx, %rdx
  26274. movq 248(%rdi), %r9
  26275. movq 248(%rsi), %r10
  26276. andq %rdx, %r9
  26277. andq %rdx, %r10
  26278. subq %r10, %r9
  26279. cmova %r8, %rax
  26280. cmovc %rdx, %rax
  26281. cmovnz %rcx, %rdx
  26282. movq 240(%rdi), %r9
  26283. movq 240(%rsi), %r10
  26284. andq %rdx, %r9
  26285. andq %rdx, %r10
  26286. subq %r10, %r9
  26287. cmova %r8, %rax
  26288. cmovc %rdx, %rax
  26289. cmovnz %rcx, %rdx
  26290. movq 232(%rdi), %r9
  26291. movq 232(%rsi), %r10
  26292. andq %rdx, %r9
  26293. andq %rdx, %r10
  26294. subq %r10, %r9
  26295. cmova %r8, %rax
  26296. cmovc %rdx, %rax
  26297. cmovnz %rcx, %rdx
  26298. movq 224(%rdi), %r9
  26299. movq 224(%rsi), %r10
  26300. andq %rdx, %r9
  26301. andq %rdx, %r10
  26302. subq %r10, %r9
  26303. cmova %r8, %rax
  26304. cmovc %rdx, %rax
  26305. cmovnz %rcx, %rdx
  26306. movq 216(%rdi), %r9
  26307. movq 216(%rsi), %r10
  26308. andq %rdx, %r9
  26309. andq %rdx, %r10
  26310. subq %r10, %r9
  26311. cmova %r8, %rax
  26312. cmovc %rdx, %rax
  26313. cmovnz %rcx, %rdx
  26314. movq 208(%rdi), %r9
  26315. movq 208(%rsi), %r10
  26316. andq %rdx, %r9
  26317. andq %rdx, %r10
  26318. subq %r10, %r9
  26319. cmova %r8, %rax
  26320. cmovc %rdx, %rax
  26321. cmovnz %rcx, %rdx
  26322. movq 200(%rdi), %r9
  26323. movq 200(%rsi), %r10
  26324. andq %rdx, %r9
  26325. andq %rdx, %r10
  26326. subq %r10, %r9
  26327. cmova %r8, %rax
  26328. cmovc %rdx, %rax
  26329. cmovnz %rcx, %rdx
  26330. movq 192(%rdi), %r9
  26331. movq 192(%rsi), %r10
  26332. andq %rdx, %r9
  26333. andq %rdx, %r10
  26334. subq %r10, %r9
  26335. cmova %r8, %rax
  26336. cmovc %rdx, %rax
  26337. cmovnz %rcx, %rdx
  26338. movq 184(%rdi), %r9
  26339. movq 184(%rsi), %r10
  26340. andq %rdx, %r9
  26341. andq %rdx, %r10
  26342. subq %r10, %r9
  26343. cmova %r8, %rax
  26344. cmovc %rdx, %rax
  26345. cmovnz %rcx, %rdx
  26346. movq 176(%rdi), %r9
  26347. movq 176(%rsi), %r10
  26348. andq %rdx, %r9
  26349. andq %rdx, %r10
  26350. subq %r10, %r9
  26351. cmova %r8, %rax
  26352. cmovc %rdx, %rax
  26353. cmovnz %rcx, %rdx
  26354. movq 168(%rdi), %r9
  26355. movq 168(%rsi), %r10
  26356. andq %rdx, %r9
  26357. andq %rdx, %r10
  26358. subq %r10, %r9
  26359. cmova %r8, %rax
  26360. cmovc %rdx, %rax
  26361. cmovnz %rcx, %rdx
  26362. movq 160(%rdi), %r9
  26363. movq 160(%rsi), %r10
  26364. andq %rdx, %r9
  26365. andq %rdx, %r10
  26366. subq %r10, %r9
  26367. cmova %r8, %rax
  26368. cmovc %rdx, %rax
  26369. cmovnz %rcx, %rdx
  26370. movq 152(%rdi), %r9
  26371. movq 152(%rsi), %r10
  26372. andq %rdx, %r9
  26373. andq %rdx, %r10
  26374. subq %r10, %r9
  26375. cmova %r8, %rax
  26376. cmovc %rdx, %rax
  26377. cmovnz %rcx, %rdx
  26378. movq 144(%rdi), %r9
  26379. movq 144(%rsi), %r10
  26380. andq %rdx, %r9
  26381. andq %rdx, %r10
  26382. subq %r10, %r9
  26383. cmova %r8, %rax
  26384. cmovc %rdx, %rax
  26385. cmovnz %rcx, %rdx
  26386. movq 136(%rdi), %r9
  26387. movq 136(%rsi), %r10
  26388. andq %rdx, %r9
  26389. andq %rdx, %r10
  26390. subq %r10, %r9
  26391. cmova %r8, %rax
  26392. cmovc %rdx, %rax
  26393. cmovnz %rcx, %rdx
  26394. movq 128(%rdi), %r9
  26395. movq 128(%rsi), %r10
  26396. andq %rdx, %r9
  26397. andq %rdx, %r10
  26398. subq %r10, %r9
  26399. cmova %r8, %rax
  26400. cmovc %rdx, %rax
  26401. cmovnz %rcx, %rdx
  26402. movq 120(%rdi), %r9
  26403. movq 120(%rsi), %r10
  26404. andq %rdx, %r9
  26405. andq %rdx, %r10
  26406. subq %r10, %r9
  26407. cmova %r8, %rax
  26408. cmovc %rdx, %rax
  26409. cmovnz %rcx, %rdx
  26410. movq 112(%rdi), %r9
  26411. movq 112(%rsi), %r10
  26412. andq %rdx, %r9
  26413. andq %rdx, %r10
  26414. subq %r10, %r9
  26415. cmova %r8, %rax
  26416. cmovc %rdx, %rax
  26417. cmovnz %rcx, %rdx
  26418. movq 104(%rdi), %r9
  26419. movq 104(%rsi), %r10
  26420. andq %rdx, %r9
  26421. andq %rdx, %r10
  26422. subq %r10, %r9
  26423. cmova %r8, %rax
  26424. cmovc %rdx, %rax
  26425. cmovnz %rcx, %rdx
  26426. movq 96(%rdi), %r9
  26427. movq 96(%rsi), %r10
  26428. andq %rdx, %r9
  26429. andq %rdx, %r10
  26430. subq %r10, %r9
  26431. cmova %r8, %rax
  26432. cmovc %rdx, %rax
  26433. cmovnz %rcx, %rdx
  26434. movq 88(%rdi), %r9
  26435. movq 88(%rsi), %r10
  26436. andq %rdx, %r9
  26437. andq %rdx, %r10
  26438. subq %r10, %r9
  26439. cmova %r8, %rax
  26440. cmovc %rdx, %rax
  26441. cmovnz %rcx, %rdx
  26442. movq 80(%rdi), %r9
  26443. movq 80(%rsi), %r10
  26444. andq %rdx, %r9
  26445. andq %rdx, %r10
  26446. subq %r10, %r9
  26447. cmova %r8, %rax
  26448. cmovc %rdx, %rax
  26449. cmovnz %rcx, %rdx
  26450. movq 72(%rdi), %r9
  26451. movq 72(%rsi), %r10
  26452. andq %rdx, %r9
  26453. andq %rdx, %r10
  26454. subq %r10, %r9
  26455. cmova %r8, %rax
  26456. cmovc %rdx, %rax
  26457. cmovnz %rcx, %rdx
  26458. movq 64(%rdi), %r9
  26459. movq 64(%rsi), %r10
  26460. andq %rdx, %r9
  26461. andq %rdx, %r10
  26462. subq %r10, %r9
  26463. cmova %r8, %rax
  26464. cmovc %rdx, %rax
  26465. cmovnz %rcx, %rdx
  26466. movq 56(%rdi), %r9
  26467. movq 56(%rsi), %r10
  26468. andq %rdx, %r9
  26469. andq %rdx, %r10
  26470. subq %r10, %r9
  26471. cmova %r8, %rax
  26472. cmovc %rdx, %rax
  26473. cmovnz %rcx, %rdx
  26474. movq 48(%rdi), %r9
  26475. movq 48(%rsi), %r10
  26476. andq %rdx, %r9
  26477. andq %rdx, %r10
  26478. subq %r10, %r9
  26479. cmova %r8, %rax
  26480. cmovc %rdx, %rax
  26481. cmovnz %rcx, %rdx
  26482. movq 40(%rdi), %r9
  26483. movq 40(%rsi), %r10
  26484. andq %rdx, %r9
  26485. andq %rdx, %r10
  26486. subq %r10, %r9
  26487. cmova %r8, %rax
  26488. cmovc %rdx, %rax
  26489. cmovnz %rcx, %rdx
  26490. movq 32(%rdi), %r9
  26491. movq 32(%rsi), %r10
  26492. andq %rdx, %r9
  26493. andq %rdx, %r10
  26494. subq %r10, %r9
  26495. cmova %r8, %rax
  26496. cmovc %rdx, %rax
  26497. cmovnz %rcx, %rdx
  26498. movq 24(%rdi), %r9
  26499. movq 24(%rsi), %r10
  26500. andq %rdx, %r9
  26501. andq %rdx, %r10
  26502. subq %r10, %r9
  26503. cmova %r8, %rax
  26504. cmovc %rdx, %rax
  26505. cmovnz %rcx, %rdx
  26506. movq 16(%rdi), %r9
  26507. movq 16(%rsi), %r10
  26508. andq %rdx, %r9
  26509. andq %rdx, %r10
  26510. subq %r10, %r9
  26511. cmova %r8, %rax
  26512. cmovc %rdx, %rax
  26513. cmovnz %rcx, %rdx
  26514. movq 8(%rdi), %r9
  26515. movq 8(%rsi), %r10
  26516. andq %rdx, %r9
  26517. andq %rdx, %r10
  26518. subq %r10, %r9
  26519. cmova %r8, %rax
  26520. cmovc %rdx, %rax
  26521. cmovnz %rcx, %rdx
  26522. movq (%rdi), %r9
  26523. movq (%rsi), %r10
  26524. andq %rdx, %r9
  26525. andq %rdx, %r10
  26526. subq %r10, %r9
  26527. cmova %r8, %rax
  26528. cmovc %rdx, %rax
  26529. cmovnz %rcx, %rdx
  26530. xorq %rdx, %rax
  26531. repz retq
  26532. #ifndef __APPLE__
  26533. .size sp_3072_cmp_48,.-sp_3072_cmp_48
  26534. #endif /* __APPLE__ */
  26535. /* Sub b from a into r. (r = a - b)
  26536. *
  26537. * r A single precision integer.
  26538. * a A single precision integer.
  26539. * b A single precision integer.
  26540. */
  26541. #ifndef __APPLE__
  26542. .text
  26543. .globl sp_3072_sub_48
  26544. .type sp_3072_sub_48,@function
  26545. .align 16
  26546. sp_3072_sub_48:
  26547. #else
  26548. .section __TEXT,__text
  26549. .globl _sp_3072_sub_48
  26550. .p2align 4
  26551. _sp_3072_sub_48:
  26552. #endif /* __APPLE__ */
  26553. movq (%rsi), %rcx
  26554. xorq %rax, %rax
  26555. subq (%rdx), %rcx
  26556. movq 8(%rsi), %r8
  26557. movq %rcx, (%rdi)
  26558. sbbq 8(%rdx), %r8
  26559. movq 16(%rsi), %rcx
  26560. movq %r8, 8(%rdi)
  26561. sbbq 16(%rdx), %rcx
  26562. movq 24(%rsi), %r8
  26563. movq %rcx, 16(%rdi)
  26564. sbbq 24(%rdx), %r8
  26565. movq 32(%rsi), %rcx
  26566. movq %r8, 24(%rdi)
  26567. sbbq 32(%rdx), %rcx
  26568. movq 40(%rsi), %r8
  26569. movq %rcx, 32(%rdi)
  26570. sbbq 40(%rdx), %r8
  26571. movq 48(%rsi), %rcx
  26572. movq %r8, 40(%rdi)
  26573. sbbq 48(%rdx), %rcx
  26574. movq 56(%rsi), %r8
  26575. movq %rcx, 48(%rdi)
  26576. sbbq 56(%rdx), %r8
  26577. movq 64(%rsi), %rcx
  26578. movq %r8, 56(%rdi)
  26579. sbbq 64(%rdx), %rcx
  26580. movq 72(%rsi), %r8
  26581. movq %rcx, 64(%rdi)
  26582. sbbq 72(%rdx), %r8
  26583. movq 80(%rsi), %rcx
  26584. movq %r8, 72(%rdi)
  26585. sbbq 80(%rdx), %rcx
  26586. movq 88(%rsi), %r8
  26587. movq %rcx, 80(%rdi)
  26588. sbbq 88(%rdx), %r8
  26589. movq 96(%rsi), %rcx
  26590. movq %r8, 88(%rdi)
  26591. sbbq 96(%rdx), %rcx
  26592. movq 104(%rsi), %r8
  26593. movq %rcx, 96(%rdi)
  26594. sbbq 104(%rdx), %r8
  26595. movq 112(%rsi), %rcx
  26596. movq %r8, 104(%rdi)
  26597. sbbq 112(%rdx), %rcx
  26598. movq 120(%rsi), %r8
  26599. movq %rcx, 112(%rdi)
  26600. sbbq 120(%rdx), %r8
  26601. movq 128(%rsi), %rcx
  26602. movq %r8, 120(%rdi)
  26603. sbbq 128(%rdx), %rcx
  26604. movq 136(%rsi), %r8
  26605. movq %rcx, 128(%rdi)
  26606. sbbq 136(%rdx), %r8
  26607. movq 144(%rsi), %rcx
  26608. movq %r8, 136(%rdi)
  26609. sbbq 144(%rdx), %rcx
  26610. movq 152(%rsi), %r8
  26611. movq %rcx, 144(%rdi)
  26612. sbbq 152(%rdx), %r8
  26613. movq 160(%rsi), %rcx
  26614. movq %r8, 152(%rdi)
  26615. sbbq 160(%rdx), %rcx
  26616. movq 168(%rsi), %r8
  26617. movq %rcx, 160(%rdi)
  26618. sbbq 168(%rdx), %r8
  26619. movq 176(%rsi), %rcx
  26620. movq %r8, 168(%rdi)
  26621. sbbq 176(%rdx), %rcx
  26622. movq 184(%rsi), %r8
  26623. movq %rcx, 176(%rdi)
  26624. sbbq 184(%rdx), %r8
  26625. movq 192(%rsi), %rcx
  26626. movq %r8, 184(%rdi)
  26627. sbbq 192(%rdx), %rcx
  26628. movq 200(%rsi), %r8
  26629. movq %rcx, 192(%rdi)
  26630. sbbq 200(%rdx), %r8
  26631. movq 208(%rsi), %rcx
  26632. movq %r8, 200(%rdi)
  26633. sbbq 208(%rdx), %rcx
  26634. movq 216(%rsi), %r8
  26635. movq %rcx, 208(%rdi)
  26636. sbbq 216(%rdx), %r8
  26637. movq 224(%rsi), %rcx
  26638. movq %r8, 216(%rdi)
  26639. sbbq 224(%rdx), %rcx
  26640. movq 232(%rsi), %r8
  26641. movq %rcx, 224(%rdi)
  26642. sbbq 232(%rdx), %r8
  26643. movq 240(%rsi), %rcx
  26644. movq %r8, 232(%rdi)
  26645. sbbq 240(%rdx), %rcx
  26646. movq 248(%rsi), %r8
  26647. movq %rcx, 240(%rdi)
  26648. sbbq 248(%rdx), %r8
  26649. movq 256(%rsi), %rcx
  26650. movq %r8, 248(%rdi)
  26651. sbbq 256(%rdx), %rcx
  26652. movq 264(%rsi), %r8
  26653. movq %rcx, 256(%rdi)
  26654. sbbq 264(%rdx), %r8
  26655. movq 272(%rsi), %rcx
  26656. movq %r8, 264(%rdi)
  26657. sbbq 272(%rdx), %rcx
  26658. movq 280(%rsi), %r8
  26659. movq %rcx, 272(%rdi)
  26660. sbbq 280(%rdx), %r8
  26661. movq 288(%rsi), %rcx
  26662. movq %r8, 280(%rdi)
  26663. sbbq 288(%rdx), %rcx
  26664. movq 296(%rsi), %r8
  26665. movq %rcx, 288(%rdi)
  26666. sbbq 296(%rdx), %r8
  26667. movq 304(%rsi), %rcx
  26668. movq %r8, 296(%rdi)
  26669. sbbq 304(%rdx), %rcx
  26670. movq 312(%rsi), %r8
  26671. movq %rcx, 304(%rdi)
  26672. sbbq 312(%rdx), %r8
  26673. movq 320(%rsi), %rcx
  26674. movq %r8, 312(%rdi)
  26675. sbbq 320(%rdx), %rcx
  26676. movq 328(%rsi), %r8
  26677. movq %rcx, 320(%rdi)
  26678. sbbq 328(%rdx), %r8
  26679. movq 336(%rsi), %rcx
  26680. movq %r8, 328(%rdi)
  26681. sbbq 336(%rdx), %rcx
  26682. movq 344(%rsi), %r8
  26683. movq %rcx, 336(%rdi)
  26684. sbbq 344(%rdx), %r8
  26685. movq 352(%rsi), %rcx
  26686. movq %r8, 344(%rdi)
  26687. sbbq 352(%rdx), %rcx
  26688. movq 360(%rsi), %r8
  26689. movq %rcx, 352(%rdi)
  26690. sbbq 360(%rdx), %r8
  26691. movq 368(%rsi), %rcx
  26692. movq %r8, 360(%rdi)
  26693. sbbq 368(%rdx), %rcx
  26694. movq 376(%rsi), %r8
  26695. movq %rcx, 368(%rdi)
  26696. sbbq 376(%rdx), %r8
  26697. movq %r8, 376(%rdi)
  26698. sbbq $0x00, %rax
  26699. repz retq
  26700. #ifndef __APPLE__
  26701. .size sp_3072_sub_48,.-sp_3072_sub_48
  26702. #endif /* __APPLE__ */
  26703. #ifdef HAVE_INTEL_AVX2
  26704. /* Reduce the number back to 3072 bits using Montgomery reduction.
  26705. *
  26706. * a A single precision number to reduce in place.
  26707. * m The single precision number representing the modulus.
  26708. * mp The digit representing the negative inverse of m mod 2^n.
  26709. */
  26710. #ifndef __APPLE__
  26711. .text
  26712. .globl sp_3072_mont_reduce_avx2_48
  26713. .type sp_3072_mont_reduce_avx2_48,@function
  26714. .align 16
  26715. sp_3072_mont_reduce_avx2_48:
  26716. #else
  26717. .section __TEXT,__text
  26718. .globl _sp_3072_mont_reduce_avx2_48
  26719. .p2align 4
  26720. _sp_3072_mont_reduce_avx2_48:
  26721. #endif /* __APPLE__ */
  26722. pushq %r12
  26723. pushq %r13
  26724. pushq %r14
  26725. movq %rdx, %r8
  26726. xorq %r14, %r14
  26727. # i = 48
  26728. movq $48, %r9
  26729. movq (%rdi), %r13
  26730. addq $0xc0, %rdi
  26731. xorq %r12, %r12
  26732. L_mont_loop_avx2_48:
  26733. # mu = a[i] * mp
  26734. movq %r13, %rdx
  26735. movq %r13, %r10
  26736. imulq %r8, %rdx
  26737. xorq %r12, %r12
  26738. # a[i+0] += m[0] * mu
  26739. mulxq (%rsi), %rax, %rcx
  26740. movq -184(%rdi), %r13
  26741. adcxq %rax, %r10
  26742. adoxq %rcx, %r13
  26743. # a[i+1] += m[1] * mu
  26744. mulxq 8(%rsi), %rax, %rcx
  26745. movq -176(%rdi), %r10
  26746. adcxq %rax, %r13
  26747. adoxq %rcx, %r10
  26748. # a[i+2] += m[2] * mu
  26749. mulxq 16(%rsi), %rax, %rcx
  26750. movq -168(%rdi), %r11
  26751. adcxq %rax, %r10
  26752. adoxq %rcx, %r11
  26753. movq %r10, -176(%rdi)
  26754. # a[i+3] += m[3] * mu
  26755. mulxq 24(%rsi), %rax, %rcx
  26756. movq -160(%rdi), %r10
  26757. adcxq %rax, %r11
  26758. adoxq %rcx, %r10
  26759. movq %r11, -168(%rdi)
  26760. # a[i+4] += m[4] * mu
  26761. mulxq 32(%rsi), %rax, %rcx
  26762. movq -152(%rdi), %r11
  26763. adcxq %rax, %r10
  26764. adoxq %rcx, %r11
  26765. movq %r10, -160(%rdi)
  26766. # a[i+5] += m[5] * mu
  26767. mulxq 40(%rsi), %rax, %rcx
  26768. movq -144(%rdi), %r10
  26769. adcxq %rax, %r11
  26770. adoxq %rcx, %r10
  26771. movq %r11, -152(%rdi)
  26772. # a[i+6] += m[6] * mu
  26773. mulxq 48(%rsi), %rax, %rcx
  26774. movq -136(%rdi), %r11
  26775. adcxq %rax, %r10
  26776. adoxq %rcx, %r11
  26777. movq %r10, -144(%rdi)
  26778. # a[i+7] += m[7] * mu
  26779. mulxq 56(%rsi), %rax, %rcx
  26780. movq -128(%rdi), %r10
  26781. adcxq %rax, %r11
  26782. adoxq %rcx, %r10
  26783. movq %r11, -136(%rdi)
  26784. # a[i+8] += m[8] * mu
  26785. mulxq 64(%rsi), %rax, %rcx
  26786. movq -120(%rdi), %r11
  26787. adcxq %rax, %r10
  26788. adoxq %rcx, %r11
  26789. movq %r10, -128(%rdi)
  26790. # a[i+9] += m[9] * mu
  26791. mulxq 72(%rsi), %rax, %rcx
  26792. movq -112(%rdi), %r10
  26793. adcxq %rax, %r11
  26794. adoxq %rcx, %r10
  26795. movq %r11, -120(%rdi)
  26796. # a[i+10] += m[10] * mu
  26797. mulxq 80(%rsi), %rax, %rcx
  26798. movq -104(%rdi), %r11
  26799. adcxq %rax, %r10
  26800. adoxq %rcx, %r11
  26801. movq %r10, -112(%rdi)
  26802. # a[i+11] += m[11] * mu
  26803. mulxq 88(%rsi), %rax, %rcx
  26804. movq -96(%rdi), %r10
  26805. adcxq %rax, %r11
  26806. adoxq %rcx, %r10
  26807. movq %r11, -104(%rdi)
  26808. # a[i+12] += m[12] * mu
  26809. mulxq 96(%rsi), %rax, %rcx
  26810. movq -88(%rdi), %r11
  26811. adcxq %rax, %r10
  26812. adoxq %rcx, %r11
  26813. movq %r10, -96(%rdi)
  26814. # a[i+13] += m[13] * mu
  26815. mulxq 104(%rsi), %rax, %rcx
  26816. movq -80(%rdi), %r10
  26817. adcxq %rax, %r11
  26818. adoxq %rcx, %r10
  26819. movq %r11, -88(%rdi)
  26820. # a[i+14] += m[14] * mu
  26821. mulxq 112(%rsi), %rax, %rcx
  26822. movq -72(%rdi), %r11
  26823. adcxq %rax, %r10
  26824. adoxq %rcx, %r11
  26825. movq %r10, -80(%rdi)
  26826. # a[i+15] += m[15] * mu
  26827. mulxq 120(%rsi), %rax, %rcx
  26828. movq -64(%rdi), %r10
  26829. adcxq %rax, %r11
  26830. adoxq %rcx, %r10
  26831. movq %r11, -72(%rdi)
  26832. # a[i+16] += m[16] * mu
  26833. mulxq 128(%rsi), %rax, %rcx
  26834. movq -56(%rdi), %r11
  26835. adcxq %rax, %r10
  26836. adoxq %rcx, %r11
  26837. movq %r10, -64(%rdi)
  26838. # a[i+17] += m[17] * mu
  26839. mulxq 136(%rsi), %rax, %rcx
  26840. movq -48(%rdi), %r10
  26841. adcxq %rax, %r11
  26842. adoxq %rcx, %r10
  26843. movq %r11, -56(%rdi)
  26844. # a[i+18] += m[18] * mu
  26845. mulxq 144(%rsi), %rax, %rcx
  26846. movq -40(%rdi), %r11
  26847. adcxq %rax, %r10
  26848. adoxq %rcx, %r11
  26849. movq %r10, -48(%rdi)
  26850. # a[i+19] += m[19] * mu
  26851. mulxq 152(%rsi), %rax, %rcx
  26852. movq -32(%rdi), %r10
  26853. adcxq %rax, %r11
  26854. adoxq %rcx, %r10
  26855. movq %r11, -40(%rdi)
  26856. # a[i+20] += m[20] * mu
  26857. mulxq 160(%rsi), %rax, %rcx
  26858. movq -24(%rdi), %r11
  26859. adcxq %rax, %r10
  26860. adoxq %rcx, %r11
  26861. movq %r10, -32(%rdi)
  26862. # a[i+21] += m[21] * mu
  26863. mulxq 168(%rsi), %rax, %rcx
  26864. movq -16(%rdi), %r10
  26865. adcxq %rax, %r11
  26866. adoxq %rcx, %r10
  26867. movq %r11, -24(%rdi)
  26868. # a[i+22] += m[22] * mu
  26869. mulxq 176(%rsi), %rax, %rcx
  26870. movq -8(%rdi), %r11
  26871. adcxq %rax, %r10
  26872. adoxq %rcx, %r11
  26873. movq %r10, -16(%rdi)
  26874. # a[i+23] += m[23] * mu
  26875. mulxq 184(%rsi), %rax, %rcx
  26876. movq (%rdi), %r10
  26877. adcxq %rax, %r11
  26878. adoxq %rcx, %r10
  26879. movq %r11, -8(%rdi)
  26880. # a[i+24] += m[24] * mu
  26881. mulxq 192(%rsi), %rax, %rcx
  26882. movq 8(%rdi), %r11
  26883. adcxq %rax, %r10
  26884. adoxq %rcx, %r11
  26885. movq %r10, (%rdi)
  26886. # a[i+25] += m[25] * mu
  26887. mulxq 200(%rsi), %rax, %rcx
  26888. movq 16(%rdi), %r10
  26889. adcxq %rax, %r11
  26890. adoxq %rcx, %r10
  26891. movq %r11, 8(%rdi)
  26892. # a[i+26] += m[26] * mu
  26893. mulxq 208(%rsi), %rax, %rcx
  26894. movq 24(%rdi), %r11
  26895. adcxq %rax, %r10
  26896. adoxq %rcx, %r11
  26897. movq %r10, 16(%rdi)
  26898. # a[i+27] += m[27] * mu
  26899. mulxq 216(%rsi), %rax, %rcx
  26900. movq 32(%rdi), %r10
  26901. adcxq %rax, %r11
  26902. adoxq %rcx, %r10
  26903. movq %r11, 24(%rdi)
  26904. # a[i+28] += m[28] * mu
  26905. mulxq 224(%rsi), %rax, %rcx
  26906. movq 40(%rdi), %r11
  26907. adcxq %rax, %r10
  26908. adoxq %rcx, %r11
  26909. movq %r10, 32(%rdi)
  26910. # a[i+29] += m[29] * mu
  26911. mulxq 232(%rsi), %rax, %rcx
  26912. movq 48(%rdi), %r10
  26913. adcxq %rax, %r11
  26914. adoxq %rcx, %r10
  26915. movq %r11, 40(%rdi)
  26916. # a[i+30] += m[30] * mu
  26917. mulxq 240(%rsi), %rax, %rcx
  26918. movq 56(%rdi), %r11
  26919. adcxq %rax, %r10
  26920. adoxq %rcx, %r11
  26921. movq %r10, 48(%rdi)
  26922. # a[i+31] += m[31] * mu
  26923. mulxq 248(%rsi), %rax, %rcx
  26924. movq 64(%rdi), %r10
  26925. adcxq %rax, %r11
  26926. adoxq %rcx, %r10
  26927. movq %r11, 56(%rdi)
  26928. # a[i+32] += m[32] * mu
  26929. mulxq 256(%rsi), %rax, %rcx
  26930. movq 72(%rdi), %r11
  26931. adcxq %rax, %r10
  26932. adoxq %rcx, %r11
  26933. movq %r10, 64(%rdi)
  26934. # a[i+33] += m[33] * mu
  26935. mulxq 264(%rsi), %rax, %rcx
  26936. movq 80(%rdi), %r10
  26937. adcxq %rax, %r11
  26938. adoxq %rcx, %r10
  26939. movq %r11, 72(%rdi)
  26940. # a[i+34] += m[34] * mu
  26941. mulxq 272(%rsi), %rax, %rcx
  26942. movq 88(%rdi), %r11
  26943. adcxq %rax, %r10
  26944. adoxq %rcx, %r11
  26945. movq %r10, 80(%rdi)
  26946. # a[i+35] += m[35] * mu
  26947. mulxq 280(%rsi), %rax, %rcx
  26948. movq 96(%rdi), %r10
  26949. adcxq %rax, %r11
  26950. adoxq %rcx, %r10
  26951. movq %r11, 88(%rdi)
  26952. # a[i+36] += m[36] * mu
  26953. mulxq 288(%rsi), %rax, %rcx
  26954. movq 104(%rdi), %r11
  26955. adcxq %rax, %r10
  26956. adoxq %rcx, %r11
  26957. movq %r10, 96(%rdi)
  26958. # a[i+37] += m[37] * mu
  26959. mulxq 296(%rsi), %rax, %rcx
  26960. movq 112(%rdi), %r10
  26961. adcxq %rax, %r11
  26962. adoxq %rcx, %r10
  26963. movq %r11, 104(%rdi)
  26964. # a[i+38] += m[38] * mu
  26965. mulxq 304(%rsi), %rax, %rcx
  26966. movq 120(%rdi), %r11
  26967. adcxq %rax, %r10
  26968. adoxq %rcx, %r11
  26969. movq %r10, 112(%rdi)
  26970. # a[i+39] += m[39] * mu
  26971. mulxq 312(%rsi), %rax, %rcx
  26972. movq 128(%rdi), %r10
  26973. adcxq %rax, %r11
  26974. adoxq %rcx, %r10
  26975. movq %r11, 120(%rdi)
  26976. # a[i+40] += m[40] * mu
  26977. mulxq 320(%rsi), %rax, %rcx
  26978. movq 136(%rdi), %r11
  26979. adcxq %rax, %r10
  26980. adoxq %rcx, %r11
  26981. movq %r10, 128(%rdi)
  26982. # a[i+41] += m[41] * mu
  26983. mulxq 328(%rsi), %rax, %rcx
  26984. movq 144(%rdi), %r10
  26985. adcxq %rax, %r11
  26986. adoxq %rcx, %r10
  26987. movq %r11, 136(%rdi)
  26988. # a[i+42] += m[42] * mu
  26989. mulxq 336(%rsi), %rax, %rcx
  26990. movq 152(%rdi), %r11
  26991. adcxq %rax, %r10
  26992. adoxq %rcx, %r11
  26993. movq %r10, 144(%rdi)
  26994. # a[i+43] += m[43] * mu
  26995. mulxq 344(%rsi), %rax, %rcx
  26996. movq 160(%rdi), %r10
  26997. adcxq %rax, %r11
  26998. adoxq %rcx, %r10
  26999. movq %r11, 152(%rdi)
  27000. # a[i+44] += m[44] * mu
  27001. mulxq 352(%rsi), %rax, %rcx
  27002. movq 168(%rdi), %r11
  27003. adcxq %rax, %r10
  27004. adoxq %rcx, %r11
  27005. movq %r10, 160(%rdi)
  27006. # a[i+45] += m[45] * mu
  27007. mulxq 360(%rsi), %rax, %rcx
  27008. movq 176(%rdi), %r10
  27009. adcxq %rax, %r11
  27010. adoxq %rcx, %r10
  27011. movq %r11, 168(%rdi)
  27012. # a[i+46] += m[46] * mu
  27013. mulxq 368(%rsi), %rax, %rcx
  27014. movq 184(%rdi), %r11
  27015. adcxq %rax, %r10
  27016. adoxq %rcx, %r11
  27017. movq %r10, 176(%rdi)
  27018. # a[i+47] += m[47] * mu
  27019. mulxq 376(%rsi), %rax, %rcx
  27020. movq 192(%rdi), %r10
  27021. adcxq %rax, %r11
  27022. adoxq %rcx, %r10
  27023. movq %r11, 184(%rdi)
  27024. adcxq %r14, %r10
  27025. movq %r10, 192(%rdi)
  27026. movq %r12, %r14
  27027. adoxq %r12, %r14
  27028. adcxq %r12, %r14
  27029. # a += 1
  27030. addq $8, %rdi
  27031. # i -= 1
  27032. subq $0x01, %r9
  27033. jnz L_mont_loop_avx2_48
  27034. subq $0xc0, %rdi
  27035. negq %r14
  27036. movq %rdi, %r8
  27037. subq $0x180, %rdi
  27038. movq (%rsi), %rcx
  27039. movq %r13, %rdx
  27040. pextq %r14, %rcx, %rcx
  27041. subq %rcx, %rdx
  27042. movq 8(%rsi), %rcx
  27043. movq 8(%r8), %rax
  27044. pextq %r14, %rcx, %rcx
  27045. movq %rdx, (%rdi)
  27046. sbbq %rcx, %rax
  27047. movq 16(%rsi), %rdx
  27048. movq 16(%r8), %rcx
  27049. pextq %r14, %rdx, %rdx
  27050. movq %rax, 8(%rdi)
  27051. sbbq %rdx, %rcx
  27052. movq 24(%rsi), %rax
  27053. movq 24(%r8), %rdx
  27054. pextq %r14, %rax, %rax
  27055. movq %rcx, 16(%rdi)
  27056. sbbq %rax, %rdx
  27057. movq 32(%rsi), %rcx
  27058. movq 32(%r8), %rax
  27059. pextq %r14, %rcx, %rcx
  27060. movq %rdx, 24(%rdi)
  27061. sbbq %rcx, %rax
  27062. movq 40(%rsi), %rdx
  27063. movq 40(%r8), %rcx
  27064. pextq %r14, %rdx, %rdx
  27065. movq %rax, 32(%rdi)
  27066. sbbq %rdx, %rcx
  27067. movq 48(%rsi), %rax
  27068. movq 48(%r8), %rdx
  27069. pextq %r14, %rax, %rax
  27070. movq %rcx, 40(%rdi)
  27071. sbbq %rax, %rdx
  27072. movq 56(%rsi), %rcx
  27073. movq 56(%r8), %rax
  27074. pextq %r14, %rcx, %rcx
  27075. movq %rdx, 48(%rdi)
  27076. sbbq %rcx, %rax
  27077. movq 64(%rsi), %rdx
  27078. movq 64(%r8), %rcx
  27079. pextq %r14, %rdx, %rdx
  27080. movq %rax, 56(%rdi)
  27081. sbbq %rdx, %rcx
  27082. movq 72(%rsi), %rax
  27083. movq 72(%r8), %rdx
  27084. pextq %r14, %rax, %rax
  27085. movq %rcx, 64(%rdi)
  27086. sbbq %rax, %rdx
  27087. movq 80(%rsi), %rcx
  27088. movq 80(%r8), %rax
  27089. pextq %r14, %rcx, %rcx
  27090. movq %rdx, 72(%rdi)
  27091. sbbq %rcx, %rax
  27092. movq 88(%rsi), %rdx
  27093. movq 88(%r8), %rcx
  27094. pextq %r14, %rdx, %rdx
  27095. movq %rax, 80(%rdi)
  27096. sbbq %rdx, %rcx
  27097. movq 96(%rsi), %rax
  27098. movq 96(%r8), %rdx
  27099. pextq %r14, %rax, %rax
  27100. movq %rcx, 88(%rdi)
  27101. sbbq %rax, %rdx
  27102. movq 104(%rsi), %rcx
  27103. movq 104(%r8), %rax
  27104. pextq %r14, %rcx, %rcx
  27105. movq %rdx, 96(%rdi)
  27106. sbbq %rcx, %rax
  27107. movq 112(%rsi), %rdx
  27108. movq 112(%r8), %rcx
  27109. pextq %r14, %rdx, %rdx
  27110. movq %rax, 104(%rdi)
  27111. sbbq %rdx, %rcx
  27112. movq 120(%rsi), %rax
  27113. movq 120(%r8), %rdx
  27114. pextq %r14, %rax, %rax
  27115. movq %rcx, 112(%rdi)
  27116. sbbq %rax, %rdx
  27117. movq 128(%rsi), %rcx
  27118. movq 128(%r8), %rax
  27119. pextq %r14, %rcx, %rcx
  27120. movq %rdx, 120(%rdi)
  27121. sbbq %rcx, %rax
  27122. movq 136(%rsi), %rdx
  27123. movq 136(%r8), %rcx
  27124. pextq %r14, %rdx, %rdx
  27125. movq %rax, 128(%rdi)
  27126. sbbq %rdx, %rcx
  27127. movq 144(%rsi), %rax
  27128. movq 144(%r8), %rdx
  27129. pextq %r14, %rax, %rax
  27130. movq %rcx, 136(%rdi)
  27131. sbbq %rax, %rdx
  27132. movq 152(%rsi), %rcx
  27133. movq 152(%r8), %rax
  27134. pextq %r14, %rcx, %rcx
  27135. movq %rdx, 144(%rdi)
  27136. sbbq %rcx, %rax
  27137. movq 160(%rsi), %rdx
  27138. movq 160(%r8), %rcx
  27139. pextq %r14, %rdx, %rdx
  27140. movq %rax, 152(%rdi)
  27141. sbbq %rdx, %rcx
  27142. movq 168(%rsi), %rax
  27143. movq 168(%r8), %rdx
  27144. pextq %r14, %rax, %rax
  27145. movq %rcx, 160(%rdi)
  27146. sbbq %rax, %rdx
  27147. movq 176(%rsi), %rcx
  27148. movq 176(%r8), %rax
  27149. pextq %r14, %rcx, %rcx
  27150. movq %rdx, 168(%rdi)
  27151. sbbq %rcx, %rax
  27152. movq 184(%rsi), %rdx
  27153. movq 184(%r8), %rcx
  27154. pextq %r14, %rdx, %rdx
  27155. movq %rax, 176(%rdi)
  27156. sbbq %rdx, %rcx
  27157. movq 192(%rsi), %rax
  27158. movq 192(%r8), %rdx
  27159. pextq %r14, %rax, %rax
  27160. movq %rcx, 184(%rdi)
  27161. sbbq %rax, %rdx
  27162. movq 200(%rsi), %rcx
  27163. movq 200(%r8), %rax
  27164. pextq %r14, %rcx, %rcx
  27165. movq %rdx, 192(%rdi)
  27166. sbbq %rcx, %rax
  27167. movq 208(%rsi), %rdx
  27168. movq 208(%r8), %rcx
  27169. pextq %r14, %rdx, %rdx
  27170. movq %rax, 200(%rdi)
  27171. sbbq %rdx, %rcx
  27172. movq 216(%rsi), %rax
  27173. movq 216(%r8), %rdx
  27174. pextq %r14, %rax, %rax
  27175. movq %rcx, 208(%rdi)
  27176. sbbq %rax, %rdx
  27177. movq 224(%rsi), %rcx
  27178. movq 224(%r8), %rax
  27179. pextq %r14, %rcx, %rcx
  27180. movq %rdx, 216(%rdi)
  27181. sbbq %rcx, %rax
  27182. movq 232(%rsi), %rdx
  27183. movq 232(%r8), %rcx
  27184. pextq %r14, %rdx, %rdx
  27185. movq %rax, 224(%rdi)
  27186. sbbq %rdx, %rcx
  27187. movq 240(%rsi), %rax
  27188. movq 240(%r8), %rdx
  27189. pextq %r14, %rax, %rax
  27190. movq %rcx, 232(%rdi)
  27191. sbbq %rax, %rdx
  27192. movq 248(%rsi), %rcx
  27193. movq 248(%r8), %rax
  27194. pextq %r14, %rcx, %rcx
  27195. movq %rdx, 240(%rdi)
  27196. sbbq %rcx, %rax
  27197. movq 256(%rsi), %rdx
  27198. movq 256(%r8), %rcx
  27199. pextq %r14, %rdx, %rdx
  27200. movq %rax, 248(%rdi)
  27201. sbbq %rdx, %rcx
  27202. movq 264(%rsi), %rax
  27203. movq 264(%r8), %rdx
  27204. pextq %r14, %rax, %rax
  27205. movq %rcx, 256(%rdi)
  27206. sbbq %rax, %rdx
  27207. movq 272(%rsi), %rcx
  27208. movq 272(%r8), %rax
  27209. pextq %r14, %rcx, %rcx
  27210. movq %rdx, 264(%rdi)
  27211. sbbq %rcx, %rax
  27212. movq 280(%rsi), %rdx
  27213. movq 280(%r8), %rcx
  27214. pextq %r14, %rdx, %rdx
  27215. movq %rax, 272(%rdi)
  27216. sbbq %rdx, %rcx
  27217. movq 288(%rsi), %rax
  27218. movq 288(%r8), %rdx
  27219. pextq %r14, %rax, %rax
  27220. movq %rcx, 280(%rdi)
  27221. sbbq %rax, %rdx
  27222. movq 296(%rsi), %rcx
  27223. movq 296(%r8), %rax
  27224. pextq %r14, %rcx, %rcx
  27225. movq %rdx, 288(%rdi)
  27226. sbbq %rcx, %rax
  27227. movq 304(%rsi), %rdx
  27228. movq 304(%r8), %rcx
  27229. pextq %r14, %rdx, %rdx
  27230. movq %rax, 296(%rdi)
  27231. sbbq %rdx, %rcx
  27232. movq 312(%rsi), %rax
  27233. movq 312(%r8), %rdx
  27234. pextq %r14, %rax, %rax
  27235. movq %rcx, 304(%rdi)
  27236. sbbq %rax, %rdx
  27237. movq 320(%rsi), %rcx
  27238. movq 320(%r8), %rax
  27239. pextq %r14, %rcx, %rcx
  27240. movq %rdx, 312(%rdi)
  27241. sbbq %rcx, %rax
  27242. movq 328(%rsi), %rdx
  27243. movq 328(%r8), %rcx
  27244. pextq %r14, %rdx, %rdx
  27245. movq %rax, 320(%rdi)
  27246. sbbq %rdx, %rcx
  27247. movq 336(%rsi), %rax
  27248. movq 336(%r8), %rdx
  27249. pextq %r14, %rax, %rax
  27250. movq %rcx, 328(%rdi)
  27251. sbbq %rax, %rdx
  27252. movq 344(%rsi), %rcx
  27253. movq 344(%r8), %rax
  27254. pextq %r14, %rcx, %rcx
  27255. movq %rdx, 336(%rdi)
  27256. sbbq %rcx, %rax
  27257. movq 352(%rsi), %rdx
  27258. movq 352(%r8), %rcx
  27259. pextq %r14, %rdx, %rdx
  27260. movq %rax, 344(%rdi)
  27261. sbbq %rdx, %rcx
  27262. movq 360(%rsi), %rax
  27263. movq 360(%r8), %rdx
  27264. pextq %r14, %rax, %rax
  27265. movq %rcx, 352(%rdi)
  27266. sbbq %rax, %rdx
  27267. movq 368(%rsi), %rcx
  27268. movq 368(%r8), %rax
  27269. pextq %r14, %rcx, %rcx
  27270. movq %rdx, 360(%rdi)
  27271. sbbq %rcx, %rax
  27272. movq 376(%rsi), %rdx
  27273. movq 376(%r8), %rcx
  27274. pextq %r14, %rdx, %rdx
  27275. movq %rax, 368(%rdi)
  27276. sbbq %rdx, %rcx
  27277. movq %rcx, 376(%rdi)
  27278. popq %r14
  27279. popq %r13
  27280. popq %r12
  27281. repz retq
  27282. #ifndef __APPLE__
  27283. .size sp_3072_mont_reduce_avx2_48,.-sp_3072_mont_reduce_avx2_48
  27284. #endif /* __APPLE__ */
  27285. #endif /* HAVE_INTEL_AVX2 */
  27286. /* Conditionally add a and b using the mask m.
  27287. * m is -1 to add and 0 when not.
  27288. *
  27289. * r A single precision number representing conditional add result.
  27290. * a A single precision number to add with.
  27291. * b A single precision number to add.
  27292. * m Mask value to apply.
  27293. */
  27294. #ifndef __APPLE__
  27295. .text
  27296. .globl sp_3072_cond_add_24
  27297. .type sp_3072_cond_add_24,@function
  27298. .align 16
  27299. sp_3072_cond_add_24:
  27300. #else
  27301. .section __TEXT,__text
  27302. .globl _sp_3072_cond_add_24
  27303. .p2align 4
  27304. _sp_3072_cond_add_24:
  27305. #endif /* __APPLE__ */
  27306. subq $0xc0, %rsp
  27307. movq $0x00, %rax
  27308. movq (%rdx), %r8
  27309. movq 8(%rdx), %r9
  27310. andq %rcx, %r8
  27311. andq %rcx, %r9
  27312. movq %r8, (%rsp)
  27313. movq %r9, 8(%rsp)
  27314. movq 16(%rdx), %r8
  27315. movq 24(%rdx), %r9
  27316. andq %rcx, %r8
  27317. andq %rcx, %r9
  27318. movq %r8, 16(%rsp)
  27319. movq %r9, 24(%rsp)
  27320. movq 32(%rdx), %r8
  27321. movq 40(%rdx), %r9
  27322. andq %rcx, %r8
  27323. andq %rcx, %r9
  27324. movq %r8, 32(%rsp)
  27325. movq %r9, 40(%rsp)
  27326. movq 48(%rdx), %r8
  27327. movq 56(%rdx), %r9
  27328. andq %rcx, %r8
  27329. andq %rcx, %r9
  27330. movq %r8, 48(%rsp)
  27331. movq %r9, 56(%rsp)
  27332. movq 64(%rdx), %r8
  27333. movq 72(%rdx), %r9
  27334. andq %rcx, %r8
  27335. andq %rcx, %r9
  27336. movq %r8, 64(%rsp)
  27337. movq %r9, 72(%rsp)
  27338. movq 80(%rdx), %r8
  27339. movq 88(%rdx), %r9
  27340. andq %rcx, %r8
  27341. andq %rcx, %r9
  27342. movq %r8, 80(%rsp)
  27343. movq %r9, 88(%rsp)
  27344. movq 96(%rdx), %r8
  27345. movq 104(%rdx), %r9
  27346. andq %rcx, %r8
  27347. andq %rcx, %r9
  27348. movq %r8, 96(%rsp)
  27349. movq %r9, 104(%rsp)
  27350. movq 112(%rdx), %r8
  27351. movq 120(%rdx), %r9
  27352. andq %rcx, %r8
  27353. andq %rcx, %r9
  27354. movq %r8, 112(%rsp)
  27355. movq %r9, 120(%rsp)
  27356. movq 128(%rdx), %r8
  27357. movq 136(%rdx), %r9
  27358. andq %rcx, %r8
  27359. andq %rcx, %r9
  27360. movq %r8, 128(%rsp)
  27361. movq %r9, 136(%rsp)
  27362. movq 144(%rdx), %r8
  27363. movq 152(%rdx), %r9
  27364. andq %rcx, %r8
  27365. andq %rcx, %r9
  27366. movq %r8, 144(%rsp)
  27367. movq %r9, 152(%rsp)
  27368. movq 160(%rdx), %r8
  27369. movq 168(%rdx), %r9
  27370. andq %rcx, %r8
  27371. andq %rcx, %r9
  27372. movq %r8, 160(%rsp)
  27373. movq %r9, 168(%rsp)
  27374. movq 176(%rdx), %r8
  27375. movq 184(%rdx), %r9
  27376. andq %rcx, %r8
  27377. andq %rcx, %r9
  27378. movq %r8, 176(%rsp)
  27379. movq %r9, 184(%rsp)
  27380. movq (%rsi), %r8
  27381. movq (%rsp), %rdx
  27382. addq %rdx, %r8
  27383. movq 8(%rsi), %r9
  27384. movq 8(%rsp), %rdx
  27385. adcq %rdx, %r9
  27386. movq %r8, (%rdi)
  27387. movq 16(%rsi), %r8
  27388. movq 16(%rsp), %rdx
  27389. adcq %rdx, %r8
  27390. movq %r9, 8(%rdi)
  27391. movq 24(%rsi), %r9
  27392. movq 24(%rsp), %rdx
  27393. adcq %rdx, %r9
  27394. movq %r8, 16(%rdi)
  27395. movq 32(%rsi), %r8
  27396. movq 32(%rsp), %rdx
  27397. adcq %rdx, %r8
  27398. movq %r9, 24(%rdi)
  27399. movq 40(%rsi), %r9
  27400. movq 40(%rsp), %rdx
  27401. adcq %rdx, %r9
  27402. movq %r8, 32(%rdi)
  27403. movq 48(%rsi), %r8
  27404. movq 48(%rsp), %rdx
  27405. adcq %rdx, %r8
  27406. movq %r9, 40(%rdi)
  27407. movq 56(%rsi), %r9
  27408. movq 56(%rsp), %rdx
  27409. adcq %rdx, %r9
  27410. movq %r8, 48(%rdi)
  27411. movq 64(%rsi), %r8
  27412. movq 64(%rsp), %rdx
  27413. adcq %rdx, %r8
  27414. movq %r9, 56(%rdi)
  27415. movq 72(%rsi), %r9
  27416. movq 72(%rsp), %rdx
  27417. adcq %rdx, %r9
  27418. movq %r8, 64(%rdi)
  27419. movq 80(%rsi), %r8
  27420. movq 80(%rsp), %rdx
  27421. adcq %rdx, %r8
  27422. movq %r9, 72(%rdi)
  27423. movq 88(%rsi), %r9
  27424. movq 88(%rsp), %rdx
  27425. adcq %rdx, %r9
  27426. movq %r8, 80(%rdi)
  27427. movq 96(%rsi), %r8
  27428. movq 96(%rsp), %rdx
  27429. adcq %rdx, %r8
  27430. movq %r9, 88(%rdi)
  27431. movq 104(%rsi), %r9
  27432. movq 104(%rsp), %rdx
  27433. adcq %rdx, %r9
  27434. movq %r8, 96(%rdi)
  27435. movq 112(%rsi), %r8
  27436. movq 112(%rsp), %rdx
  27437. adcq %rdx, %r8
  27438. movq %r9, 104(%rdi)
  27439. movq 120(%rsi), %r9
  27440. movq 120(%rsp), %rdx
  27441. adcq %rdx, %r9
  27442. movq %r8, 112(%rdi)
  27443. movq 128(%rsi), %r8
  27444. movq 128(%rsp), %rdx
  27445. adcq %rdx, %r8
  27446. movq %r9, 120(%rdi)
  27447. movq 136(%rsi), %r9
  27448. movq 136(%rsp), %rdx
  27449. adcq %rdx, %r9
  27450. movq %r8, 128(%rdi)
  27451. movq 144(%rsi), %r8
  27452. movq 144(%rsp), %rdx
  27453. adcq %rdx, %r8
  27454. movq %r9, 136(%rdi)
  27455. movq 152(%rsi), %r9
  27456. movq 152(%rsp), %rdx
  27457. adcq %rdx, %r9
  27458. movq %r8, 144(%rdi)
  27459. movq 160(%rsi), %r8
  27460. movq 160(%rsp), %rdx
  27461. adcq %rdx, %r8
  27462. movq %r9, 152(%rdi)
  27463. movq 168(%rsi), %r9
  27464. movq 168(%rsp), %rdx
  27465. adcq %rdx, %r9
  27466. movq %r8, 160(%rdi)
  27467. movq 176(%rsi), %r8
  27468. movq 176(%rsp), %rdx
  27469. adcq %rdx, %r8
  27470. movq %r9, 168(%rdi)
  27471. movq 184(%rsi), %r9
  27472. movq 184(%rsp), %rdx
  27473. adcq %rdx, %r9
  27474. movq %r8, 176(%rdi)
  27475. movq %r9, 184(%rdi)
  27476. adcq $0x00, %rax
  27477. addq $0xc0, %rsp
  27478. repz retq
  27479. #ifndef __APPLE__
  27480. .size sp_3072_cond_add_24,.-sp_3072_cond_add_24
  27481. #endif /* __APPLE__ */
  27482. /* Conditionally add a and b using the mask m.
  27483. * m is -1 to add and 0 when not.
  27484. *
  27485. * r A single precision number representing conditional add result.
  27486. * a A single precision number to add with.
  27487. * b A single precision number to add.
  27488. * m Mask value to apply.
  27489. */
  27490. #ifndef __APPLE__
  27491. .text
  27492. .globl sp_3072_cond_add_avx2_24
  27493. .type sp_3072_cond_add_avx2_24,@function
  27494. .align 16
  27495. sp_3072_cond_add_avx2_24:
  27496. #else
  27497. .section __TEXT,__text
  27498. .globl _sp_3072_cond_add_avx2_24
  27499. .p2align 4
  27500. _sp_3072_cond_add_avx2_24:
  27501. #endif /* __APPLE__ */
  27502. movq $0x00, %rax
  27503. movq (%rdx), %r10
  27504. movq (%rsi), %r8
  27505. pextq %rcx, %r10, %r10
  27506. addq %r10, %r8
  27507. movq 8(%rdx), %r10
  27508. movq 8(%rsi), %r9
  27509. pextq %rcx, %r10, %r10
  27510. movq %r8, (%rdi)
  27511. adcq %r10, %r9
  27512. movq 16(%rdx), %r8
  27513. movq 16(%rsi), %r10
  27514. pextq %rcx, %r8, %r8
  27515. movq %r9, 8(%rdi)
  27516. adcq %r8, %r10
  27517. movq 24(%rdx), %r9
  27518. movq 24(%rsi), %r8
  27519. pextq %rcx, %r9, %r9
  27520. movq %r10, 16(%rdi)
  27521. adcq %r9, %r8
  27522. movq 32(%rdx), %r10
  27523. movq 32(%rsi), %r9
  27524. pextq %rcx, %r10, %r10
  27525. movq %r8, 24(%rdi)
  27526. adcq %r10, %r9
  27527. movq 40(%rdx), %r8
  27528. movq 40(%rsi), %r10
  27529. pextq %rcx, %r8, %r8
  27530. movq %r9, 32(%rdi)
  27531. adcq %r8, %r10
  27532. movq 48(%rdx), %r9
  27533. movq 48(%rsi), %r8
  27534. pextq %rcx, %r9, %r9
  27535. movq %r10, 40(%rdi)
  27536. adcq %r9, %r8
  27537. movq 56(%rdx), %r10
  27538. movq 56(%rsi), %r9
  27539. pextq %rcx, %r10, %r10
  27540. movq %r8, 48(%rdi)
  27541. adcq %r10, %r9
  27542. movq 64(%rdx), %r8
  27543. movq 64(%rsi), %r10
  27544. pextq %rcx, %r8, %r8
  27545. movq %r9, 56(%rdi)
  27546. adcq %r8, %r10
  27547. movq 72(%rdx), %r9
  27548. movq 72(%rsi), %r8
  27549. pextq %rcx, %r9, %r9
  27550. movq %r10, 64(%rdi)
  27551. adcq %r9, %r8
  27552. movq 80(%rdx), %r10
  27553. movq 80(%rsi), %r9
  27554. pextq %rcx, %r10, %r10
  27555. movq %r8, 72(%rdi)
  27556. adcq %r10, %r9
  27557. movq 88(%rdx), %r8
  27558. movq 88(%rsi), %r10
  27559. pextq %rcx, %r8, %r8
  27560. movq %r9, 80(%rdi)
  27561. adcq %r8, %r10
  27562. movq 96(%rdx), %r9
  27563. movq 96(%rsi), %r8
  27564. pextq %rcx, %r9, %r9
  27565. movq %r10, 88(%rdi)
  27566. adcq %r9, %r8
  27567. movq 104(%rdx), %r10
  27568. movq 104(%rsi), %r9
  27569. pextq %rcx, %r10, %r10
  27570. movq %r8, 96(%rdi)
  27571. adcq %r10, %r9
  27572. movq 112(%rdx), %r8
  27573. movq 112(%rsi), %r10
  27574. pextq %rcx, %r8, %r8
  27575. movq %r9, 104(%rdi)
  27576. adcq %r8, %r10
  27577. movq 120(%rdx), %r9
  27578. movq 120(%rsi), %r8
  27579. pextq %rcx, %r9, %r9
  27580. movq %r10, 112(%rdi)
  27581. adcq %r9, %r8
  27582. movq 128(%rdx), %r10
  27583. movq 128(%rsi), %r9
  27584. pextq %rcx, %r10, %r10
  27585. movq %r8, 120(%rdi)
  27586. adcq %r10, %r9
  27587. movq 136(%rdx), %r8
  27588. movq 136(%rsi), %r10
  27589. pextq %rcx, %r8, %r8
  27590. movq %r9, 128(%rdi)
  27591. adcq %r8, %r10
  27592. movq 144(%rdx), %r9
  27593. movq 144(%rsi), %r8
  27594. pextq %rcx, %r9, %r9
  27595. movq %r10, 136(%rdi)
  27596. adcq %r9, %r8
  27597. movq 152(%rdx), %r10
  27598. movq 152(%rsi), %r9
  27599. pextq %rcx, %r10, %r10
  27600. movq %r8, 144(%rdi)
  27601. adcq %r10, %r9
  27602. movq 160(%rdx), %r8
  27603. movq 160(%rsi), %r10
  27604. pextq %rcx, %r8, %r8
  27605. movq %r9, 152(%rdi)
  27606. adcq %r8, %r10
  27607. movq 168(%rdx), %r9
  27608. movq 168(%rsi), %r8
  27609. pextq %rcx, %r9, %r9
  27610. movq %r10, 160(%rdi)
  27611. adcq %r9, %r8
  27612. movq 176(%rdx), %r10
  27613. movq 176(%rsi), %r9
  27614. pextq %rcx, %r10, %r10
  27615. movq %r8, 168(%rdi)
  27616. adcq %r10, %r9
  27617. movq 184(%rdx), %r8
  27618. movq 184(%rsi), %r10
  27619. pextq %rcx, %r8, %r8
  27620. movq %r9, 176(%rdi)
  27621. adcq %r8, %r10
  27622. movq %r10, 184(%rdi)
  27623. adcq $0x00, %rax
  27624. repz retq
  27625. #ifndef __APPLE__
  27626. .size sp_3072_cond_add_avx2_24,.-sp_3072_cond_add_avx2_24
  27627. #endif /* __APPLE__ */
  27628. /* Shift number left by n bit. (r = a << n)
  27629. *
  27630. * r Result of left shift by n.
  27631. * a Number to shift.
  27632. * n Amoutnt o shift.
  27633. */
  27634. #ifndef __APPLE__
  27635. .text
  27636. .globl sp_3072_lshift_48
  27637. .type sp_3072_lshift_48,@function
  27638. .align 16
  27639. sp_3072_lshift_48:
  27640. #else
  27641. .section __TEXT,__text
  27642. .globl _sp_3072_lshift_48
  27643. .p2align 4
  27644. _sp_3072_lshift_48:
  27645. #endif /* __APPLE__ */
  27646. movb %dl, %cl
  27647. movq $0x00, %r10
  27648. movq 344(%rsi), %r11
  27649. movq 352(%rsi), %rdx
  27650. movq 360(%rsi), %rax
  27651. movq 368(%rsi), %r8
  27652. movq 376(%rsi), %r9
  27653. shldq %cl, %r9, %r10
  27654. shldq %cl, %r8, %r9
  27655. shldq %cl, %rax, %r8
  27656. shldq %cl, %rdx, %rax
  27657. shldq %cl, %r11, %rdx
  27658. movq %rdx, 352(%rdi)
  27659. movq %rax, 360(%rdi)
  27660. movq %r8, 368(%rdi)
  27661. movq %r9, 376(%rdi)
  27662. movq %r10, 384(%rdi)
  27663. movq 312(%rsi), %r9
  27664. movq 320(%rsi), %rdx
  27665. movq 328(%rsi), %rax
  27666. movq 336(%rsi), %r8
  27667. shldq %cl, %r8, %r11
  27668. shldq %cl, %rax, %r8
  27669. shldq %cl, %rdx, %rax
  27670. shldq %cl, %r9, %rdx
  27671. movq %rdx, 320(%rdi)
  27672. movq %rax, 328(%rdi)
  27673. movq %r8, 336(%rdi)
  27674. movq %r11, 344(%rdi)
  27675. movq 280(%rsi), %r11
  27676. movq 288(%rsi), %rdx
  27677. movq 296(%rsi), %rax
  27678. movq 304(%rsi), %r8
  27679. shldq %cl, %r8, %r9
  27680. shldq %cl, %rax, %r8
  27681. shldq %cl, %rdx, %rax
  27682. shldq %cl, %r11, %rdx
  27683. movq %rdx, 288(%rdi)
  27684. movq %rax, 296(%rdi)
  27685. movq %r8, 304(%rdi)
  27686. movq %r9, 312(%rdi)
  27687. movq 248(%rsi), %r9
  27688. movq 256(%rsi), %rdx
  27689. movq 264(%rsi), %rax
  27690. movq 272(%rsi), %r8
  27691. shldq %cl, %r8, %r11
  27692. shldq %cl, %rax, %r8
  27693. shldq %cl, %rdx, %rax
  27694. shldq %cl, %r9, %rdx
  27695. movq %rdx, 256(%rdi)
  27696. movq %rax, 264(%rdi)
  27697. movq %r8, 272(%rdi)
  27698. movq %r11, 280(%rdi)
  27699. movq 216(%rsi), %r11
  27700. movq 224(%rsi), %rdx
  27701. movq 232(%rsi), %rax
  27702. movq 240(%rsi), %r8
  27703. shldq %cl, %r8, %r9
  27704. shldq %cl, %rax, %r8
  27705. shldq %cl, %rdx, %rax
  27706. shldq %cl, %r11, %rdx
  27707. movq %rdx, 224(%rdi)
  27708. movq %rax, 232(%rdi)
  27709. movq %r8, 240(%rdi)
  27710. movq %r9, 248(%rdi)
  27711. movq 184(%rsi), %r9
  27712. movq 192(%rsi), %rdx
  27713. movq 200(%rsi), %rax
  27714. movq 208(%rsi), %r8
  27715. shldq %cl, %r8, %r11
  27716. shldq %cl, %rax, %r8
  27717. shldq %cl, %rdx, %rax
  27718. shldq %cl, %r9, %rdx
  27719. movq %rdx, 192(%rdi)
  27720. movq %rax, 200(%rdi)
  27721. movq %r8, 208(%rdi)
  27722. movq %r11, 216(%rdi)
  27723. movq 152(%rsi), %r11
  27724. movq 160(%rsi), %rdx
  27725. movq 168(%rsi), %rax
  27726. movq 176(%rsi), %r8
  27727. shldq %cl, %r8, %r9
  27728. shldq %cl, %rax, %r8
  27729. shldq %cl, %rdx, %rax
  27730. shldq %cl, %r11, %rdx
  27731. movq %rdx, 160(%rdi)
  27732. movq %rax, 168(%rdi)
  27733. movq %r8, 176(%rdi)
  27734. movq %r9, 184(%rdi)
  27735. movq 120(%rsi), %r9
  27736. movq 128(%rsi), %rdx
  27737. movq 136(%rsi), %rax
  27738. movq 144(%rsi), %r8
  27739. shldq %cl, %r8, %r11
  27740. shldq %cl, %rax, %r8
  27741. shldq %cl, %rdx, %rax
  27742. shldq %cl, %r9, %rdx
  27743. movq %rdx, 128(%rdi)
  27744. movq %rax, 136(%rdi)
  27745. movq %r8, 144(%rdi)
  27746. movq %r11, 152(%rdi)
  27747. movq 88(%rsi), %r11
  27748. movq 96(%rsi), %rdx
  27749. movq 104(%rsi), %rax
  27750. movq 112(%rsi), %r8
  27751. shldq %cl, %r8, %r9
  27752. shldq %cl, %rax, %r8
  27753. shldq %cl, %rdx, %rax
  27754. shldq %cl, %r11, %rdx
  27755. movq %rdx, 96(%rdi)
  27756. movq %rax, 104(%rdi)
  27757. movq %r8, 112(%rdi)
  27758. movq %r9, 120(%rdi)
  27759. movq 56(%rsi), %r9
  27760. movq 64(%rsi), %rdx
  27761. movq 72(%rsi), %rax
  27762. movq 80(%rsi), %r8
  27763. shldq %cl, %r8, %r11
  27764. shldq %cl, %rax, %r8
  27765. shldq %cl, %rdx, %rax
  27766. shldq %cl, %r9, %rdx
  27767. movq %rdx, 64(%rdi)
  27768. movq %rax, 72(%rdi)
  27769. movq %r8, 80(%rdi)
  27770. movq %r11, 88(%rdi)
  27771. movq 24(%rsi), %r11
  27772. movq 32(%rsi), %rdx
  27773. movq 40(%rsi), %rax
  27774. movq 48(%rsi), %r8
  27775. shldq %cl, %r8, %r9
  27776. shldq %cl, %rax, %r8
  27777. shldq %cl, %rdx, %rax
  27778. shldq %cl, %r11, %rdx
  27779. movq %rdx, 32(%rdi)
  27780. movq %rax, 40(%rdi)
  27781. movq %r8, 48(%rdi)
  27782. movq %r9, 56(%rdi)
  27783. movq (%rsi), %rdx
  27784. movq 8(%rsi), %rax
  27785. movq 16(%rsi), %r8
  27786. shldq %cl, %r8, %r11
  27787. shldq %cl, %rax, %r8
  27788. shldq %cl, %rdx, %rax
  27789. shlq %cl, %rdx
  27790. movq %rdx, (%rdi)
  27791. movq %rax, 8(%rdi)
  27792. movq %r8, 16(%rdi)
  27793. movq %r11, 24(%rdi)
  27794. repz retq
  27795. #endif /* !WOLFSSL_SP_NO_3072 */
  27796. #endif /* !WOLFSSL_SP_NO_3072 */
  27797. #ifdef WOLFSSL_SP_4096
  27798. #ifdef WOLFSSL_SP_4096
  27799. /* Read big endian unsigned byte array into r.
  27800. * Uses the bswap instruction.
  27801. *
  27802. * r A single precision integer.
  27803. * size Maximum number of bytes to convert
  27804. * a Byte array.
  27805. * n Number of bytes in array to read.
  27806. */
  27807. #ifndef __APPLE__
  27808. .text
  27809. .globl sp_4096_from_bin_bswap
  27810. .type sp_4096_from_bin_bswap,@function
  27811. .align 16
  27812. sp_4096_from_bin_bswap:
  27813. #else
  27814. .section __TEXT,__text
  27815. .globl _sp_4096_from_bin_bswap
  27816. .p2align 4
  27817. _sp_4096_from_bin_bswap:
  27818. #endif /* __APPLE__ */
  27819. movq %rdx, %r9
  27820. movq %rdi, %r10
  27821. addq %rcx, %r9
  27822. addq $0x200, %r10
  27823. xorq %r11, %r11
  27824. jmp L_4096_from_bin_bswap_64_end
  27825. L_4096_from_bin_bswap_64_start:
  27826. subq $0x40, %r9
  27827. movq 56(%r9), %rax
  27828. movq 48(%r9), %r8
  27829. bswapq %rax
  27830. bswapq %r8
  27831. movq %rax, (%rdi)
  27832. movq %r8, 8(%rdi)
  27833. movq 40(%r9), %rax
  27834. movq 32(%r9), %r8
  27835. bswapq %rax
  27836. bswapq %r8
  27837. movq %rax, 16(%rdi)
  27838. movq %r8, 24(%rdi)
  27839. movq 24(%r9), %rax
  27840. movq 16(%r9), %r8
  27841. bswapq %rax
  27842. bswapq %r8
  27843. movq %rax, 32(%rdi)
  27844. movq %r8, 40(%rdi)
  27845. movq 8(%r9), %rax
  27846. movq (%r9), %r8
  27847. bswapq %rax
  27848. bswapq %r8
  27849. movq %rax, 48(%rdi)
  27850. movq %r8, 56(%rdi)
  27851. addq $0x40, %rdi
  27852. subq $0x40, %rcx
  27853. L_4096_from_bin_bswap_64_end:
  27854. cmpq $63, %rcx
  27855. jg L_4096_from_bin_bswap_64_start
  27856. jmp L_4096_from_bin_bswap_8_end
  27857. L_4096_from_bin_bswap_8_start:
  27858. subq $8, %r9
  27859. movq (%r9), %rax
  27860. bswapq %rax
  27861. movq %rax, (%rdi)
  27862. addq $8, %rdi
  27863. subq $8, %rcx
  27864. L_4096_from_bin_bswap_8_end:
  27865. cmpq $7, %rcx
  27866. jg L_4096_from_bin_bswap_8_start
  27867. cmpq %r11, %rcx
  27868. je L_4096_from_bin_bswap_hi_end
  27869. movq %r11, %r8
  27870. movq %r11, %rax
  27871. L_4096_from_bin_bswap_hi_start:
  27872. movb (%rdx), %al
  27873. shlq $8, %r8
  27874. incq %rdx
  27875. addq %rax, %r8
  27876. decq %rcx
  27877. jg L_4096_from_bin_bswap_hi_start
  27878. movq %r8, (%rdi)
  27879. addq $8, %rdi
  27880. L_4096_from_bin_bswap_hi_end:
  27881. cmpq %r10, %rdi
  27882. je L_4096_from_bin_bswap_zero_end
  27883. L_4096_from_bin_bswap_zero_start:
  27884. movq %r11, (%rdi)
  27885. addq $8, %rdi
  27886. cmpq %r10, %rdi
  27887. jl L_4096_from_bin_bswap_zero_start
  27888. L_4096_from_bin_bswap_zero_end:
  27889. repz retq
  27890. #ifndef __APPLE__
  27891. .size sp_4096_from_bin_bswap,.-sp_4096_from_bin_bswap
  27892. #endif /* __APPLE__ */
  27893. /* Read big endian unsigned byte array into r.
  27894. * Uses the movbe instruction which is an optional instruction.
  27895. *
  27896. * r A single precision integer.
  27897. * size Maximum number of bytes to convert
  27898. * a Byte array.
  27899. * n Number of bytes in array to read.
  27900. */
  27901. #ifndef __APPLE__
  27902. .text
  27903. .globl sp_4096_from_bin_movbe
  27904. .type sp_4096_from_bin_movbe,@function
  27905. .align 16
  27906. sp_4096_from_bin_movbe:
  27907. #else
  27908. .section __TEXT,__text
  27909. .globl _sp_4096_from_bin_movbe
  27910. .p2align 4
  27911. _sp_4096_from_bin_movbe:
  27912. #endif /* __APPLE__ */
  27913. movq %rdx, %r9
  27914. movq %rdi, %r10
  27915. addq %rcx, %r9
  27916. addq $0x200, %r10
  27917. xorq %r11, %r11
  27918. jmp L_4096_from_bin_movbe_64_end
  27919. L_4096_from_bin_movbe_64_start:
  27920. subq $0x40, %r9
  27921. movbeq 56(%r9), %rax
  27922. movbeq 48(%r9), %r8
  27923. movq %rax, (%rdi)
  27924. movq %r8, 8(%rdi)
  27925. movbeq 40(%r9), %rax
  27926. movbeq 32(%r9), %r8
  27927. movq %rax, 16(%rdi)
  27928. movq %r8, 24(%rdi)
  27929. movbeq 24(%r9), %rax
  27930. movbeq 16(%r9), %r8
  27931. movq %rax, 32(%rdi)
  27932. movq %r8, 40(%rdi)
  27933. movbeq 8(%r9), %rax
  27934. movbeq (%r9), %r8
  27935. movq %rax, 48(%rdi)
  27936. movq %r8, 56(%rdi)
  27937. addq $0x40, %rdi
  27938. subq $0x40, %rcx
  27939. L_4096_from_bin_movbe_64_end:
  27940. cmpq $63, %rcx
  27941. jg L_4096_from_bin_movbe_64_start
  27942. jmp L_4096_from_bin_movbe_8_end
  27943. L_4096_from_bin_movbe_8_start:
  27944. subq $8, %r9
  27945. movbeq (%r9), %rax
  27946. movq %rax, (%rdi)
  27947. addq $8, %rdi
  27948. subq $8, %rcx
  27949. L_4096_from_bin_movbe_8_end:
  27950. cmpq $7, %rcx
  27951. jg L_4096_from_bin_movbe_8_start
  27952. cmpq %r11, %rcx
  27953. je L_4096_from_bin_movbe_hi_end
  27954. movq %r11, %r8
  27955. movq %r11, %rax
  27956. L_4096_from_bin_movbe_hi_start:
  27957. movb (%rdx), %al
  27958. shlq $8, %r8
  27959. incq %rdx
  27960. addq %rax, %r8
  27961. decq %rcx
  27962. jg L_4096_from_bin_movbe_hi_start
  27963. movq %r8, (%rdi)
  27964. addq $8, %rdi
  27965. L_4096_from_bin_movbe_hi_end:
  27966. cmpq %r10, %rdi
  27967. je L_4096_from_bin_movbe_zero_end
  27968. L_4096_from_bin_movbe_zero_start:
  27969. movq %r11, (%rdi)
  27970. addq $8, %rdi
  27971. cmpq %r10, %rdi
  27972. jl L_4096_from_bin_movbe_zero_start
  27973. L_4096_from_bin_movbe_zero_end:
  27974. repz retq
  27975. #ifndef __APPLE__
  27976. .size sp_4096_from_bin_movbe,.-sp_4096_from_bin_movbe
  27977. #endif /* __APPLE__ */
  27978. /* Write r as big endian to byte array.
  27979. * Fixed length number of bytes written: 512
  27980. * Uses the bswap instruction.
  27981. *
  27982. * r A single precision integer.
  27983. * a Byte array.
  27984. */
  27985. #ifndef __APPLE__
  27986. .text
  27987. .globl sp_4096_to_bin_bswap
  27988. .type sp_4096_to_bin_bswap,@function
  27989. .align 16
  27990. sp_4096_to_bin_bswap:
  27991. #else
  27992. .section __TEXT,__text
  27993. .globl _sp_4096_to_bin_bswap
  27994. .p2align 4
  27995. _sp_4096_to_bin_bswap:
  27996. #endif /* __APPLE__ */
  27997. movq 504(%rdi), %rdx
  27998. movq 496(%rdi), %rax
  27999. bswapq %rdx
  28000. bswapq %rax
  28001. movq %rdx, (%rsi)
  28002. movq %rax, 8(%rsi)
  28003. movq 488(%rdi), %rdx
  28004. movq 480(%rdi), %rax
  28005. bswapq %rdx
  28006. bswapq %rax
  28007. movq %rdx, 16(%rsi)
  28008. movq %rax, 24(%rsi)
  28009. movq 472(%rdi), %rdx
  28010. movq 464(%rdi), %rax
  28011. bswapq %rdx
  28012. bswapq %rax
  28013. movq %rdx, 32(%rsi)
  28014. movq %rax, 40(%rsi)
  28015. movq 456(%rdi), %rdx
  28016. movq 448(%rdi), %rax
  28017. bswapq %rdx
  28018. bswapq %rax
  28019. movq %rdx, 48(%rsi)
  28020. movq %rax, 56(%rsi)
  28021. movq 440(%rdi), %rdx
  28022. movq 432(%rdi), %rax
  28023. bswapq %rdx
  28024. bswapq %rax
  28025. movq %rdx, 64(%rsi)
  28026. movq %rax, 72(%rsi)
  28027. movq 424(%rdi), %rdx
  28028. movq 416(%rdi), %rax
  28029. bswapq %rdx
  28030. bswapq %rax
  28031. movq %rdx, 80(%rsi)
  28032. movq %rax, 88(%rsi)
  28033. movq 408(%rdi), %rdx
  28034. movq 400(%rdi), %rax
  28035. bswapq %rdx
  28036. bswapq %rax
  28037. movq %rdx, 96(%rsi)
  28038. movq %rax, 104(%rsi)
  28039. movq 392(%rdi), %rdx
  28040. movq 384(%rdi), %rax
  28041. bswapq %rdx
  28042. bswapq %rax
  28043. movq %rdx, 112(%rsi)
  28044. movq %rax, 120(%rsi)
  28045. movq 376(%rdi), %rdx
  28046. movq 368(%rdi), %rax
  28047. bswapq %rdx
  28048. bswapq %rax
  28049. movq %rdx, 128(%rsi)
  28050. movq %rax, 136(%rsi)
  28051. movq 360(%rdi), %rdx
  28052. movq 352(%rdi), %rax
  28053. bswapq %rdx
  28054. bswapq %rax
  28055. movq %rdx, 144(%rsi)
  28056. movq %rax, 152(%rsi)
  28057. movq 344(%rdi), %rdx
  28058. movq 336(%rdi), %rax
  28059. bswapq %rdx
  28060. bswapq %rax
  28061. movq %rdx, 160(%rsi)
  28062. movq %rax, 168(%rsi)
  28063. movq 328(%rdi), %rdx
  28064. movq 320(%rdi), %rax
  28065. bswapq %rdx
  28066. bswapq %rax
  28067. movq %rdx, 176(%rsi)
  28068. movq %rax, 184(%rsi)
  28069. movq 312(%rdi), %rdx
  28070. movq 304(%rdi), %rax
  28071. bswapq %rdx
  28072. bswapq %rax
  28073. movq %rdx, 192(%rsi)
  28074. movq %rax, 200(%rsi)
  28075. movq 296(%rdi), %rdx
  28076. movq 288(%rdi), %rax
  28077. bswapq %rdx
  28078. bswapq %rax
  28079. movq %rdx, 208(%rsi)
  28080. movq %rax, 216(%rsi)
  28081. movq 280(%rdi), %rdx
  28082. movq 272(%rdi), %rax
  28083. bswapq %rdx
  28084. bswapq %rax
  28085. movq %rdx, 224(%rsi)
  28086. movq %rax, 232(%rsi)
  28087. movq 264(%rdi), %rdx
  28088. movq 256(%rdi), %rax
  28089. bswapq %rdx
  28090. bswapq %rax
  28091. movq %rdx, 240(%rsi)
  28092. movq %rax, 248(%rsi)
  28093. movq 248(%rdi), %rdx
  28094. movq 240(%rdi), %rax
  28095. bswapq %rdx
  28096. bswapq %rax
  28097. movq %rdx, 256(%rsi)
  28098. movq %rax, 264(%rsi)
  28099. movq 232(%rdi), %rdx
  28100. movq 224(%rdi), %rax
  28101. bswapq %rdx
  28102. bswapq %rax
  28103. movq %rdx, 272(%rsi)
  28104. movq %rax, 280(%rsi)
  28105. movq 216(%rdi), %rdx
  28106. movq 208(%rdi), %rax
  28107. bswapq %rdx
  28108. bswapq %rax
  28109. movq %rdx, 288(%rsi)
  28110. movq %rax, 296(%rsi)
  28111. movq 200(%rdi), %rdx
  28112. movq 192(%rdi), %rax
  28113. bswapq %rdx
  28114. bswapq %rax
  28115. movq %rdx, 304(%rsi)
  28116. movq %rax, 312(%rsi)
  28117. movq 184(%rdi), %rdx
  28118. movq 176(%rdi), %rax
  28119. bswapq %rdx
  28120. bswapq %rax
  28121. movq %rdx, 320(%rsi)
  28122. movq %rax, 328(%rsi)
  28123. movq 168(%rdi), %rdx
  28124. movq 160(%rdi), %rax
  28125. bswapq %rdx
  28126. bswapq %rax
  28127. movq %rdx, 336(%rsi)
  28128. movq %rax, 344(%rsi)
  28129. movq 152(%rdi), %rdx
  28130. movq 144(%rdi), %rax
  28131. bswapq %rdx
  28132. bswapq %rax
  28133. movq %rdx, 352(%rsi)
  28134. movq %rax, 360(%rsi)
  28135. movq 136(%rdi), %rdx
  28136. movq 128(%rdi), %rax
  28137. bswapq %rdx
  28138. bswapq %rax
  28139. movq %rdx, 368(%rsi)
  28140. movq %rax, 376(%rsi)
  28141. movq 120(%rdi), %rdx
  28142. movq 112(%rdi), %rax
  28143. bswapq %rdx
  28144. bswapq %rax
  28145. movq %rdx, 384(%rsi)
  28146. movq %rax, 392(%rsi)
  28147. movq 104(%rdi), %rdx
  28148. movq 96(%rdi), %rax
  28149. bswapq %rdx
  28150. bswapq %rax
  28151. movq %rdx, 400(%rsi)
  28152. movq %rax, 408(%rsi)
  28153. movq 88(%rdi), %rdx
  28154. movq 80(%rdi), %rax
  28155. bswapq %rdx
  28156. bswapq %rax
  28157. movq %rdx, 416(%rsi)
  28158. movq %rax, 424(%rsi)
  28159. movq 72(%rdi), %rdx
  28160. movq 64(%rdi), %rax
  28161. bswapq %rdx
  28162. bswapq %rax
  28163. movq %rdx, 432(%rsi)
  28164. movq %rax, 440(%rsi)
  28165. movq 56(%rdi), %rdx
  28166. movq 48(%rdi), %rax
  28167. bswapq %rdx
  28168. bswapq %rax
  28169. movq %rdx, 448(%rsi)
  28170. movq %rax, 456(%rsi)
  28171. movq 40(%rdi), %rdx
  28172. movq 32(%rdi), %rax
  28173. bswapq %rdx
  28174. bswapq %rax
  28175. movq %rdx, 464(%rsi)
  28176. movq %rax, 472(%rsi)
  28177. movq 24(%rdi), %rdx
  28178. movq 16(%rdi), %rax
  28179. bswapq %rdx
  28180. bswapq %rax
  28181. movq %rdx, 480(%rsi)
  28182. movq %rax, 488(%rsi)
  28183. movq 8(%rdi), %rdx
  28184. movq (%rdi), %rax
  28185. bswapq %rdx
  28186. bswapq %rax
  28187. movq %rdx, 496(%rsi)
  28188. movq %rax, 504(%rsi)
  28189. repz retq
  28190. #ifndef __APPLE__
  28191. .size sp_4096_to_bin_bswap,.-sp_4096_to_bin_bswap
  28192. #endif /* __APPLE__ */
  28193. /* Write r as big endian to byte array.
  28194. * Fixed length number of bytes written: 512
  28195. * Uses the movbe instruction which is optional.
  28196. *
  28197. * r A single precision integer.
  28198. * a Byte array.
  28199. */
  28200. #ifndef __APPLE__
  28201. .text
  28202. .globl sp_4096_to_bin_movbe
  28203. .type sp_4096_to_bin_movbe,@function
  28204. .align 16
  28205. sp_4096_to_bin_movbe:
  28206. #else
  28207. .section __TEXT,__text
  28208. .globl _sp_4096_to_bin_movbe
  28209. .p2align 4
  28210. _sp_4096_to_bin_movbe:
  28211. #endif /* __APPLE__ */
  28212. movbeq 504(%rdi), %rdx
  28213. movbeq 496(%rdi), %rax
  28214. movq %rdx, (%rsi)
  28215. movq %rax, 8(%rsi)
  28216. movbeq 488(%rdi), %rdx
  28217. movbeq 480(%rdi), %rax
  28218. movq %rdx, 16(%rsi)
  28219. movq %rax, 24(%rsi)
  28220. movbeq 472(%rdi), %rdx
  28221. movbeq 464(%rdi), %rax
  28222. movq %rdx, 32(%rsi)
  28223. movq %rax, 40(%rsi)
  28224. movbeq 456(%rdi), %rdx
  28225. movbeq 448(%rdi), %rax
  28226. movq %rdx, 48(%rsi)
  28227. movq %rax, 56(%rsi)
  28228. movbeq 440(%rdi), %rdx
  28229. movbeq 432(%rdi), %rax
  28230. movq %rdx, 64(%rsi)
  28231. movq %rax, 72(%rsi)
  28232. movbeq 424(%rdi), %rdx
  28233. movbeq 416(%rdi), %rax
  28234. movq %rdx, 80(%rsi)
  28235. movq %rax, 88(%rsi)
  28236. movbeq 408(%rdi), %rdx
  28237. movbeq 400(%rdi), %rax
  28238. movq %rdx, 96(%rsi)
  28239. movq %rax, 104(%rsi)
  28240. movbeq 392(%rdi), %rdx
  28241. movbeq 384(%rdi), %rax
  28242. movq %rdx, 112(%rsi)
  28243. movq %rax, 120(%rsi)
  28244. movbeq 376(%rdi), %rdx
  28245. movbeq 368(%rdi), %rax
  28246. movq %rdx, 128(%rsi)
  28247. movq %rax, 136(%rsi)
  28248. movbeq 360(%rdi), %rdx
  28249. movbeq 352(%rdi), %rax
  28250. movq %rdx, 144(%rsi)
  28251. movq %rax, 152(%rsi)
  28252. movbeq 344(%rdi), %rdx
  28253. movbeq 336(%rdi), %rax
  28254. movq %rdx, 160(%rsi)
  28255. movq %rax, 168(%rsi)
  28256. movbeq 328(%rdi), %rdx
  28257. movbeq 320(%rdi), %rax
  28258. movq %rdx, 176(%rsi)
  28259. movq %rax, 184(%rsi)
  28260. movbeq 312(%rdi), %rdx
  28261. movbeq 304(%rdi), %rax
  28262. movq %rdx, 192(%rsi)
  28263. movq %rax, 200(%rsi)
  28264. movbeq 296(%rdi), %rdx
  28265. movbeq 288(%rdi), %rax
  28266. movq %rdx, 208(%rsi)
  28267. movq %rax, 216(%rsi)
  28268. movbeq 280(%rdi), %rdx
  28269. movbeq 272(%rdi), %rax
  28270. movq %rdx, 224(%rsi)
  28271. movq %rax, 232(%rsi)
  28272. movbeq 264(%rdi), %rdx
  28273. movbeq 256(%rdi), %rax
  28274. movq %rdx, 240(%rsi)
  28275. movq %rax, 248(%rsi)
  28276. movbeq 248(%rdi), %rdx
  28277. movbeq 240(%rdi), %rax
  28278. movq %rdx, 256(%rsi)
  28279. movq %rax, 264(%rsi)
  28280. movbeq 232(%rdi), %rdx
  28281. movbeq 224(%rdi), %rax
  28282. movq %rdx, 272(%rsi)
  28283. movq %rax, 280(%rsi)
  28284. movbeq 216(%rdi), %rdx
  28285. movbeq 208(%rdi), %rax
  28286. movq %rdx, 288(%rsi)
  28287. movq %rax, 296(%rsi)
  28288. movbeq 200(%rdi), %rdx
  28289. movbeq 192(%rdi), %rax
  28290. movq %rdx, 304(%rsi)
  28291. movq %rax, 312(%rsi)
  28292. movbeq 184(%rdi), %rdx
  28293. movbeq 176(%rdi), %rax
  28294. movq %rdx, 320(%rsi)
  28295. movq %rax, 328(%rsi)
  28296. movbeq 168(%rdi), %rdx
  28297. movbeq 160(%rdi), %rax
  28298. movq %rdx, 336(%rsi)
  28299. movq %rax, 344(%rsi)
  28300. movbeq 152(%rdi), %rdx
  28301. movbeq 144(%rdi), %rax
  28302. movq %rdx, 352(%rsi)
  28303. movq %rax, 360(%rsi)
  28304. movbeq 136(%rdi), %rdx
  28305. movbeq 128(%rdi), %rax
  28306. movq %rdx, 368(%rsi)
  28307. movq %rax, 376(%rsi)
  28308. movbeq 120(%rdi), %rdx
  28309. movbeq 112(%rdi), %rax
  28310. movq %rdx, 384(%rsi)
  28311. movq %rax, 392(%rsi)
  28312. movbeq 104(%rdi), %rdx
  28313. movbeq 96(%rdi), %rax
  28314. movq %rdx, 400(%rsi)
  28315. movq %rax, 408(%rsi)
  28316. movbeq 88(%rdi), %rdx
  28317. movbeq 80(%rdi), %rax
  28318. movq %rdx, 416(%rsi)
  28319. movq %rax, 424(%rsi)
  28320. movbeq 72(%rdi), %rdx
  28321. movbeq 64(%rdi), %rax
  28322. movq %rdx, 432(%rsi)
  28323. movq %rax, 440(%rsi)
  28324. movbeq 56(%rdi), %rdx
  28325. movbeq 48(%rdi), %rax
  28326. movq %rdx, 448(%rsi)
  28327. movq %rax, 456(%rsi)
  28328. movbeq 40(%rdi), %rdx
  28329. movbeq 32(%rdi), %rax
  28330. movq %rdx, 464(%rsi)
  28331. movq %rax, 472(%rsi)
  28332. movbeq 24(%rdi), %rdx
  28333. movbeq 16(%rdi), %rax
  28334. movq %rdx, 480(%rsi)
  28335. movq %rax, 488(%rsi)
  28336. movbeq 8(%rdi), %rdx
  28337. movbeq (%rdi), %rax
  28338. movq %rdx, 496(%rsi)
  28339. movq %rax, 504(%rsi)
  28340. repz retq
  28341. #ifndef __APPLE__
  28342. .size sp_4096_to_bin_movbe,.-sp_4096_to_bin_movbe
  28343. #endif /* __APPLE__ */
  28344. /* Sub b from a into a. (a -= b)
  28345. *
  28346. * a A single precision integer and result.
  28347. * b A single precision integer.
  28348. */
  28349. #ifndef __APPLE__
  28350. .text
  28351. .globl sp_4096_sub_in_place_64
  28352. .type sp_4096_sub_in_place_64,@function
  28353. .align 16
  28354. sp_4096_sub_in_place_64:
  28355. #else
  28356. .section __TEXT,__text
  28357. .globl _sp_4096_sub_in_place_64
  28358. .p2align 4
  28359. _sp_4096_sub_in_place_64:
  28360. #endif /* __APPLE__ */
  28361. movq (%rdi), %rdx
  28362. xorq %rax, %rax
  28363. subq (%rsi), %rdx
  28364. movq 8(%rdi), %rcx
  28365. movq %rdx, (%rdi)
  28366. sbbq 8(%rsi), %rcx
  28367. movq 16(%rdi), %rdx
  28368. movq %rcx, 8(%rdi)
  28369. sbbq 16(%rsi), %rdx
  28370. movq 24(%rdi), %rcx
  28371. movq %rdx, 16(%rdi)
  28372. sbbq 24(%rsi), %rcx
  28373. movq 32(%rdi), %rdx
  28374. movq %rcx, 24(%rdi)
  28375. sbbq 32(%rsi), %rdx
  28376. movq 40(%rdi), %rcx
  28377. movq %rdx, 32(%rdi)
  28378. sbbq 40(%rsi), %rcx
  28379. movq 48(%rdi), %rdx
  28380. movq %rcx, 40(%rdi)
  28381. sbbq 48(%rsi), %rdx
  28382. movq 56(%rdi), %rcx
  28383. movq %rdx, 48(%rdi)
  28384. sbbq 56(%rsi), %rcx
  28385. movq 64(%rdi), %rdx
  28386. movq %rcx, 56(%rdi)
  28387. sbbq 64(%rsi), %rdx
  28388. movq 72(%rdi), %rcx
  28389. movq %rdx, 64(%rdi)
  28390. sbbq 72(%rsi), %rcx
  28391. movq 80(%rdi), %rdx
  28392. movq %rcx, 72(%rdi)
  28393. sbbq 80(%rsi), %rdx
  28394. movq 88(%rdi), %rcx
  28395. movq %rdx, 80(%rdi)
  28396. sbbq 88(%rsi), %rcx
  28397. movq 96(%rdi), %rdx
  28398. movq %rcx, 88(%rdi)
  28399. sbbq 96(%rsi), %rdx
  28400. movq 104(%rdi), %rcx
  28401. movq %rdx, 96(%rdi)
  28402. sbbq 104(%rsi), %rcx
  28403. movq 112(%rdi), %rdx
  28404. movq %rcx, 104(%rdi)
  28405. sbbq 112(%rsi), %rdx
  28406. movq 120(%rdi), %rcx
  28407. movq %rdx, 112(%rdi)
  28408. sbbq 120(%rsi), %rcx
  28409. movq 128(%rdi), %rdx
  28410. movq %rcx, 120(%rdi)
  28411. sbbq 128(%rsi), %rdx
  28412. movq 136(%rdi), %rcx
  28413. movq %rdx, 128(%rdi)
  28414. sbbq 136(%rsi), %rcx
  28415. movq 144(%rdi), %rdx
  28416. movq %rcx, 136(%rdi)
  28417. sbbq 144(%rsi), %rdx
  28418. movq 152(%rdi), %rcx
  28419. movq %rdx, 144(%rdi)
  28420. sbbq 152(%rsi), %rcx
  28421. movq 160(%rdi), %rdx
  28422. movq %rcx, 152(%rdi)
  28423. sbbq 160(%rsi), %rdx
  28424. movq 168(%rdi), %rcx
  28425. movq %rdx, 160(%rdi)
  28426. sbbq 168(%rsi), %rcx
  28427. movq 176(%rdi), %rdx
  28428. movq %rcx, 168(%rdi)
  28429. sbbq 176(%rsi), %rdx
  28430. movq 184(%rdi), %rcx
  28431. movq %rdx, 176(%rdi)
  28432. sbbq 184(%rsi), %rcx
  28433. movq 192(%rdi), %rdx
  28434. movq %rcx, 184(%rdi)
  28435. sbbq 192(%rsi), %rdx
  28436. movq 200(%rdi), %rcx
  28437. movq %rdx, 192(%rdi)
  28438. sbbq 200(%rsi), %rcx
  28439. movq 208(%rdi), %rdx
  28440. movq %rcx, 200(%rdi)
  28441. sbbq 208(%rsi), %rdx
  28442. movq 216(%rdi), %rcx
  28443. movq %rdx, 208(%rdi)
  28444. sbbq 216(%rsi), %rcx
  28445. movq 224(%rdi), %rdx
  28446. movq %rcx, 216(%rdi)
  28447. sbbq 224(%rsi), %rdx
  28448. movq 232(%rdi), %rcx
  28449. movq %rdx, 224(%rdi)
  28450. sbbq 232(%rsi), %rcx
  28451. movq 240(%rdi), %rdx
  28452. movq %rcx, 232(%rdi)
  28453. sbbq 240(%rsi), %rdx
  28454. movq 248(%rdi), %rcx
  28455. movq %rdx, 240(%rdi)
  28456. sbbq 248(%rsi), %rcx
  28457. movq 256(%rdi), %rdx
  28458. movq %rcx, 248(%rdi)
  28459. sbbq 256(%rsi), %rdx
  28460. movq 264(%rdi), %rcx
  28461. movq %rdx, 256(%rdi)
  28462. sbbq 264(%rsi), %rcx
  28463. movq 272(%rdi), %rdx
  28464. movq %rcx, 264(%rdi)
  28465. sbbq 272(%rsi), %rdx
  28466. movq 280(%rdi), %rcx
  28467. movq %rdx, 272(%rdi)
  28468. sbbq 280(%rsi), %rcx
  28469. movq 288(%rdi), %rdx
  28470. movq %rcx, 280(%rdi)
  28471. sbbq 288(%rsi), %rdx
  28472. movq 296(%rdi), %rcx
  28473. movq %rdx, 288(%rdi)
  28474. sbbq 296(%rsi), %rcx
  28475. movq 304(%rdi), %rdx
  28476. movq %rcx, 296(%rdi)
  28477. sbbq 304(%rsi), %rdx
  28478. movq 312(%rdi), %rcx
  28479. movq %rdx, 304(%rdi)
  28480. sbbq 312(%rsi), %rcx
  28481. movq 320(%rdi), %rdx
  28482. movq %rcx, 312(%rdi)
  28483. sbbq 320(%rsi), %rdx
  28484. movq 328(%rdi), %rcx
  28485. movq %rdx, 320(%rdi)
  28486. sbbq 328(%rsi), %rcx
  28487. movq 336(%rdi), %rdx
  28488. movq %rcx, 328(%rdi)
  28489. sbbq 336(%rsi), %rdx
  28490. movq 344(%rdi), %rcx
  28491. movq %rdx, 336(%rdi)
  28492. sbbq 344(%rsi), %rcx
  28493. movq 352(%rdi), %rdx
  28494. movq %rcx, 344(%rdi)
  28495. sbbq 352(%rsi), %rdx
  28496. movq 360(%rdi), %rcx
  28497. movq %rdx, 352(%rdi)
  28498. sbbq 360(%rsi), %rcx
  28499. movq 368(%rdi), %rdx
  28500. movq %rcx, 360(%rdi)
  28501. sbbq 368(%rsi), %rdx
  28502. movq 376(%rdi), %rcx
  28503. movq %rdx, 368(%rdi)
  28504. sbbq 376(%rsi), %rcx
  28505. movq 384(%rdi), %rdx
  28506. movq %rcx, 376(%rdi)
  28507. sbbq 384(%rsi), %rdx
  28508. movq 392(%rdi), %rcx
  28509. movq %rdx, 384(%rdi)
  28510. sbbq 392(%rsi), %rcx
  28511. movq 400(%rdi), %rdx
  28512. movq %rcx, 392(%rdi)
  28513. sbbq 400(%rsi), %rdx
  28514. movq 408(%rdi), %rcx
  28515. movq %rdx, 400(%rdi)
  28516. sbbq 408(%rsi), %rcx
  28517. movq 416(%rdi), %rdx
  28518. movq %rcx, 408(%rdi)
  28519. sbbq 416(%rsi), %rdx
  28520. movq 424(%rdi), %rcx
  28521. movq %rdx, 416(%rdi)
  28522. sbbq 424(%rsi), %rcx
  28523. movq 432(%rdi), %rdx
  28524. movq %rcx, 424(%rdi)
  28525. sbbq 432(%rsi), %rdx
  28526. movq 440(%rdi), %rcx
  28527. movq %rdx, 432(%rdi)
  28528. sbbq 440(%rsi), %rcx
  28529. movq 448(%rdi), %rdx
  28530. movq %rcx, 440(%rdi)
  28531. sbbq 448(%rsi), %rdx
  28532. movq 456(%rdi), %rcx
  28533. movq %rdx, 448(%rdi)
  28534. sbbq 456(%rsi), %rcx
  28535. movq 464(%rdi), %rdx
  28536. movq %rcx, 456(%rdi)
  28537. sbbq 464(%rsi), %rdx
  28538. movq 472(%rdi), %rcx
  28539. movq %rdx, 464(%rdi)
  28540. sbbq 472(%rsi), %rcx
  28541. movq 480(%rdi), %rdx
  28542. movq %rcx, 472(%rdi)
  28543. sbbq 480(%rsi), %rdx
  28544. movq 488(%rdi), %rcx
  28545. movq %rdx, 480(%rdi)
  28546. sbbq 488(%rsi), %rcx
  28547. movq 496(%rdi), %rdx
  28548. movq %rcx, 488(%rdi)
  28549. sbbq 496(%rsi), %rdx
  28550. movq 504(%rdi), %rcx
  28551. movq %rdx, 496(%rdi)
  28552. sbbq 504(%rsi), %rcx
  28553. movq %rcx, 504(%rdi)
  28554. sbbq $0x00, %rax
  28555. repz retq
  28556. #ifndef __APPLE__
  28557. .size sp_4096_sub_in_place_64,.-sp_4096_sub_in_place_64
  28558. #endif /* __APPLE__ */
  28559. /* Add b to a into r. (r = a + b)
  28560. *
  28561. * r A single precision integer.
  28562. * a A single precision integer.
  28563. * b A single precision integer.
  28564. */
  28565. #ifndef __APPLE__
  28566. .text
  28567. .globl sp_4096_add_64
  28568. .type sp_4096_add_64,@function
  28569. .align 16
  28570. sp_4096_add_64:
  28571. #else
  28572. .section __TEXT,__text
  28573. .globl _sp_4096_add_64
  28574. .p2align 4
  28575. _sp_4096_add_64:
  28576. #endif /* __APPLE__ */
  28577. # Add
  28578. movq (%rsi), %rcx
  28579. xorq %rax, %rax
  28580. addq (%rdx), %rcx
  28581. movq 8(%rsi), %r8
  28582. movq %rcx, (%rdi)
  28583. adcq 8(%rdx), %r8
  28584. movq 16(%rsi), %rcx
  28585. movq %r8, 8(%rdi)
  28586. adcq 16(%rdx), %rcx
  28587. movq 24(%rsi), %r8
  28588. movq %rcx, 16(%rdi)
  28589. adcq 24(%rdx), %r8
  28590. movq 32(%rsi), %rcx
  28591. movq %r8, 24(%rdi)
  28592. adcq 32(%rdx), %rcx
  28593. movq 40(%rsi), %r8
  28594. movq %rcx, 32(%rdi)
  28595. adcq 40(%rdx), %r8
  28596. movq 48(%rsi), %rcx
  28597. movq %r8, 40(%rdi)
  28598. adcq 48(%rdx), %rcx
  28599. movq 56(%rsi), %r8
  28600. movq %rcx, 48(%rdi)
  28601. adcq 56(%rdx), %r8
  28602. movq 64(%rsi), %rcx
  28603. movq %r8, 56(%rdi)
  28604. adcq 64(%rdx), %rcx
  28605. movq 72(%rsi), %r8
  28606. movq %rcx, 64(%rdi)
  28607. adcq 72(%rdx), %r8
  28608. movq 80(%rsi), %rcx
  28609. movq %r8, 72(%rdi)
  28610. adcq 80(%rdx), %rcx
  28611. movq 88(%rsi), %r8
  28612. movq %rcx, 80(%rdi)
  28613. adcq 88(%rdx), %r8
  28614. movq 96(%rsi), %rcx
  28615. movq %r8, 88(%rdi)
  28616. adcq 96(%rdx), %rcx
  28617. movq 104(%rsi), %r8
  28618. movq %rcx, 96(%rdi)
  28619. adcq 104(%rdx), %r8
  28620. movq 112(%rsi), %rcx
  28621. movq %r8, 104(%rdi)
  28622. adcq 112(%rdx), %rcx
  28623. movq 120(%rsi), %r8
  28624. movq %rcx, 112(%rdi)
  28625. adcq 120(%rdx), %r8
  28626. movq 128(%rsi), %rcx
  28627. movq %r8, 120(%rdi)
  28628. adcq 128(%rdx), %rcx
  28629. movq 136(%rsi), %r8
  28630. movq %rcx, 128(%rdi)
  28631. adcq 136(%rdx), %r8
  28632. movq 144(%rsi), %rcx
  28633. movq %r8, 136(%rdi)
  28634. adcq 144(%rdx), %rcx
  28635. movq 152(%rsi), %r8
  28636. movq %rcx, 144(%rdi)
  28637. adcq 152(%rdx), %r8
  28638. movq 160(%rsi), %rcx
  28639. movq %r8, 152(%rdi)
  28640. adcq 160(%rdx), %rcx
  28641. movq 168(%rsi), %r8
  28642. movq %rcx, 160(%rdi)
  28643. adcq 168(%rdx), %r8
  28644. movq 176(%rsi), %rcx
  28645. movq %r8, 168(%rdi)
  28646. adcq 176(%rdx), %rcx
  28647. movq 184(%rsi), %r8
  28648. movq %rcx, 176(%rdi)
  28649. adcq 184(%rdx), %r8
  28650. movq 192(%rsi), %rcx
  28651. movq %r8, 184(%rdi)
  28652. adcq 192(%rdx), %rcx
  28653. movq 200(%rsi), %r8
  28654. movq %rcx, 192(%rdi)
  28655. adcq 200(%rdx), %r8
  28656. movq 208(%rsi), %rcx
  28657. movq %r8, 200(%rdi)
  28658. adcq 208(%rdx), %rcx
  28659. movq 216(%rsi), %r8
  28660. movq %rcx, 208(%rdi)
  28661. adcq 216(%rdx), %r8
  28662. movq 224(%rsi), %rcx
  28663. movq %r8, 216(%rdi)
  28664. adcq 224(%rdx), %rcx
  28665. movq 232(%rsi), %r8
  28666. movq %rcx, 224(%rdi)
  28667. adcq 232(%rdx), %r8
  28668. movq 240(%rsi), %rcx
  28669. movq %r8, 232(%rdi)
  28670. adcq 240(%rdx), %rcx
  28671. movq 248(%rsi), %r8
  28672. movq %rcx, 240(%rdi)
  28673. adcq 248(%rdx), %r8
  28674. movq 256(%rsi), %rcx
  28675. movq %r8, 248(%rdi)
  28676. adcq 256(%rdx), %rcx
  28677. movq 264(%rsi), %r8
  28678. movq %rcx, 256(%rdi)
  28679. adcq 264(%rdx), %r8
  28680. movq 272(%rsi), %rcx
  28681. movq %r8, 264(%rdi)
  28682. adcq 272(%rdx), %rcx
  28683. movq 280(%rsi), %r8
  28684. movq %rcx, 272(%rdi)
  28685. adcq 280(%rdx), %r8
  28686. movq 288(%rsi), %rcx
  28687. movq %r8, 280(%rdi)
  28688. adcq 288(%rdx), %rcx
  28689. movq 296(%rsi), %r8
  28690. movq %rcx, 288(%rdi)
  28691. adcq 296(%rdx), %r8
  28692. movq 304(%rsi), %rcx
  28693. movq %r8, 296(%rdi)
  28694. adcq 304(%rdx), %rcx
  28695. movq 312(%rsi), %r8
  28696. movq %rcx, 304(%rdi)
  28697. adcq 312(%rdx), %r8
  28698. movq 320(%rsi), %rcx
  28699. movq %r8, 312(%rdi)
  28700. adcq 320(%rdx), %rcx
  28701. movq 328(%rsi), %r8
  28702. movq %rcx, 320(%rdi)
  28703. adcq 328(%rdx), %r8
  28704. movq 336(%rsi), %rcx
  28705. movq %r8, 328(%rdi)
  28706. adcq 336(%rdx), %rcx
  28707. movq 344(%rsi), %r8
  28708. movq %rcx, 336(%rdi)
  28709. adcq 344(%rdx), %r8
  28710. movq 352(%rsi), %rcx
  28711. movq %r8, 344(%rdi)
  28712. adcq 352(%rdx), %rcx
  28713. movq 360(%rsi), %r8
  28714. movq %rcx, 352(%rdi)
  28715. adcq 360(%rdx), %r8
  28716. movq 368(%rsi), %rcx
  28717. movq %r8, 360(%rdi)
  28718. adcq 368(%rdx), %rcx
  28719. movq 376(%rsi), %r8
  28720. movq %rcx, 368(%rdi)
  28721. adcq 376(%rdx), %r8
  28722. movq 384(%rsi), %rcx
  28723. movq %r8, 376(%rdi)
  28724. adcq 384(%rdx), %rcx
  28725. movq 392(%rsi), %r8
  28726. movq %rcx, 384(%rdi)
  28727. adcq 392(%rdx), %r8
  28728. movq 400(%rsi), %rcx
  28729. movq %r8, 392(%rdi)
  28730. adcq 400(%rdx), %rcx
  28731. movq 408(%rsi), %r8
  28732. movq %rcx, 400(%rdi)
  28733. adcq 408(%rdx), %r8
  28734. movq 416(%rsi), %rcx
  28735. movq %r8, 408(%rdi)
  28736. adcq 416(%rdx), %rcx
  28737. movq 424(%rsi), %r8
  28738. movq %rcx, 416(%rdi)
  28739. adcq 424(%rdx), %r8
  28740. movq 432(%rsi), %rcx
  28741. movq %r8, 424(%rdi)
  28742. adcq 432(%rdx), %rcx
  28743. movq 440(%rsi), %r8
  28744. movq %rcx, 432(%rdi)
  28745. adcq 440(%rdx), %r8
  28746. movq 448(%rsi), %rcx
  28747. movq %r8, 440(%rdi)
  28748. adcq 448(%rdx), %rcx
  28749. movq 456(%rsi), %r8
  28750. movq %rcx, 448(%rdi)
  28751. adcq 456(%rdx), %r8
  28752. movq 464(%rsi), %rcx
  28753. movq %r8, 456(%rdi)
  28754. adcq 464(%rdx), %rcx
  28755. movq 472(%rsi), %r8
  28756. movq %rcx, 464(%rdi)
  28757. adcq 472(%rdx), %r8
  28758. movq 480(%rsi), %rcx
  28759. movq %r8, 472(%rdi)
  28760. adcq 480(%rdx), %rcx
  28761. movq 488(%rsi), %r8
  28762. movq %rcx, 480(%rdi)
  28763. adcq 488(%rdx), %r8
  28764. movq 496(%rsi), %rcx
  28765. movq %r8, 488(%rdi)
  28766. adcq 496(%rdx), %rcx
  28767. movq 504(%rsi), %r8
  28768. movq %rcx, 496(%rdi)
  28769. adcq 504(%rdx), %r8
  28770. movq %r8, 504(%rdi)
  28771. adcq $0x00, %rax
  28772. repz retq
  28773. #ifndef __APPLE__
  28774. .size sp_4096_add_64,.-sp_4096_add_64
  28775. #endif /* __APPLE__ */
  28776. /* Multiply a and b into r. (r = a * b)
  28777. *
  28778. * r A single precision integer.
  28779. * a A single precision integer.
  28780. * b A single precision integer.
  28781. */
  28782. #ifndef __APPLE__
  28783. .text
  28784. .globl sp_4096_mul_64
  28785. .type sp_4096_mul_64,@function
  28786. .align 16
  28787. sp_4096_mul_64:
  28788. #else
  28789. .section __TEXT,__text
  28790. .globl _sp_4096_mul_64
  28791. .p2align 4
  28792. _sp_4096_mul_64:
  28793. #endif /* __APPLE__ */
  28794. pushq %r12
  28795. pushq %r13
  28796. pushq %r14
  28797. pushq %r15
  28798. subq $0x628, %rsp
  28799. movq %rdi, 1536(%rsp)
  28800. movq %rsi, 1544(%rsp)
  28801. movq %rdx, 1552(%rsp)
  28802. leaq 1024(%rsp), %r10
  28803. leaq 256(%rsi), %r12
  28804. # Add
  28805. movq (%rsi), %rax
  28806. xorq %r13, %r13
  28807. addq (%r12), %rax
  28808. movq 8(%rsi), %rcx
  28809. movq %rax, (%r10)
  28810. adcq 8(%r12), %rcx
  28811. movq 16(%rsi), %r8
  28812. movq %rcx, 8(%r10)
  28813. adcq 16(%r12), %r8
  28814. movq 24(%rsi), %rax
  28815. movq %r8, 16(%r10)
  28816. adcq 24(%r12), %rax
  28817. movq 32(%rsi), %rcx
  28818. movq %rax, 24(%r10)
  28819. adcq 32(%r12), %rcx
  28820. movq 40(%rsi), %r8
  28821. movq %rcx, 32(%r10)
  28822. adcq 40(%r12), %r8
  28823. movq 48(%rsi), %rax
  28824. movq %r8, 40(%r10)
  28825. adcq 48(%r12), %rax
  28826. movq 56(%rsi), %rcx
  28827. movq %rax, 48(%r10)
  28828. adcq 56(%r12), %rcx
  28829. movq 64(%rsi), %r8
  28830. movq %rcx, 56(%r10)
  28831. adcq 64(%r12), %r8
  28832. movq 72(%rsi), %rax
  28833. movq %r8, 64(%r10)
  28834. adcq 72(%r12), %rax
  28835. movq 80(%rsi), %rcx
  28836. movq %rax, 72(%r10)
  28837. adcq 80(%r12), %rcx
  28838. movq 88(%rsi), %r8
  28839. movq %rcx, 80(%r10)
  28840. adcq 88(%r12), %r8
  28841. movq 96(%rsi), %rax
  28842. movq %r8, 88(%r10)
  28843. adcq 96(%r12), %rax
  28844. movq 104(%rsi), %rcx
  28845. movq %rax, 96(%r10)
  28846. adcq 104(%r12), %rcx
  28847. movq 112(%rsi), %r8
  28848. movq %rcx, 104(%r10)
  28849. adcq 112(%r12), %r8
  28850. movq 120(%rsi), %rax
  28851. movq %r8, 112(%r10)
  28852. adcq 120(%r12), %rax
  28853. movq 128(%rsi), %rcx
  28854. movq %rax, 120(%r10)
  28855. adcq 128(%r12), %rcx
  28856. movq 136(%rsi), %r8
  28857. movq %rcx, 128(%r10)
  28858. adcq 136(%r12), %r8
  28859. movq 144(%rsi), %rax
  28860. movq %r8, 136(%r10)
  28861. adcq 144(%r12), %rax
  28862. movq 152(%rsi), %rcx
  28863. movq %rax, 144(%r10)
  28864. adcq 152(%r12), %rcx
  28865. movq 160(%rsi), %r8
  28866. movq %rcx, 152(%r10)
  28867. adcq 160(%r12), %r8
  28868. movq 168(%rsi), %rax
  28869. movq %r8, 160(%r10)
  28870. adcq 168(%r12), %rax
  28871. movq 176(%rsi), %rcx
  28872. movq %rax, 168(%r10)
  28873. adcq 176(%r12), %rcx
  28874. movq 184(%rsi), %r8
  28875. movq %rcx, 176(%r10)
  28876. adcq 184(%r12), %r8
  28877. movq 192(%rsi), %rax
  28878. movq %r8, 184(%r10)
  28879. adcq 192(%r12), %rax
  28880. movq 200(%rsi), %rcx
  28881. movq %rax, 192(%r10)
  28882. adcq 200(%r12), %rcx
  28883. movq 208(%rsi), %r8
  28884. movq %rcx, 200(%r10)
  28885. adcq 208(%r12), %r8
  28886. movq 216(%rsi), %rax
  28887. movq %r8, 208(%r10)
  28888. adcq 216(%r12), %rax
  28889. movq 224(%rsi), %rcx
  28890. movq %rax, 216(%r10)
  28891. adcq 224(%r12), %rcx
  28892. movq 232(%rsi), %r8
  28893. movq %rcx, 224(%r10)
  28894. adcq 232(%r12), %r8
  28895. movq 240(%rsi), %rax
  28896. movq %r8, 232(%r10)
  28897. adcq 240(%r12), %rax
  28898. movq 248(%rsi), %rcx
  28899. movq %rax, 240(%r10)
  28900. adcq 248(%r12), %rcx
  28901. movq %rcx, 248(%r10)
  28902. adcq $0x00, %r13
  28903. movq %r13, 1560(%rsp)
  28904. leaq 1280(%rsp), %r11
  28905. leaq 256(%rdx), %r12
  28906. # Add
  28907. movq (%rdx), %rax
  28908. xorq %r14, %r14
  28909. addq (%r12), %rax
  28910. movq 8(%rdx), %rcx
  28911. movq %rax, (%r11)
  28912. adcq 8(%r12), %rcx
  28913. movq 16(%rdx), %r8
  28914. movq %rcx, 8(%r11)
  28915. adcq 16(%r12), %r8
  28916. movq 24(%rdx), %rax
  28917. movq %r8, 16(%r11)
  28918. adcq 24(%r12), %rax
  28919. movq 32(%rdx), %rcx
  28920. movq %rax, 24(%r11)
  28921. adcq 32(%r12), %rcx
  28922. movq 40(%rdx), %r8
  28923. movq %rcx, 32(%r11)
  28924. adcq 40(%r12), %r8
  28925. movq 48(%rdx), %rax
  28926. movq %r8, 40(%r11)
  28927. adcq 48(%r12), %rax
  28928. movq 56(%rdx), %rcx
  28929. movq %rax, 48(%r11)
  28930. adcq 56(%r12), %rcx
  28931. movq 64(%rdx), %r8
  28932. movq %rcx, 56(%r11)
  28933. adcq 64(%r12), %r8
  28934. movq 72(%rdx), %rax
  28935. movq %r8, 64(%r11)
  28936. adcq 72(%r12), %rax
  28937. movq 80(%rdx), %rcx
  28938. movq %rax, 72(%r11)
  28939. adcq 80(%r12), %rcx
  28940. movq 88(%rdx), %r8
  28941. movq %rcx, 80(%r11)
  28942. adcq 88(%r12), %r8
  28943. movq 96(%rdx), %rax
  28944. movq %r8, 88(%r11)
  28945. adcq 96(%r12), %rax
  28946. movq 104(%rdx), %rcx
  28947. movq %rax, 96(%r11)
  28948. adcq 104(%r12), %rcx
  28949. movq 112(%rdx), %r8
  28950. movq %rcx, 104(%r11)
  28951. adcq 112(%r12), %r8
  28952. movq 120(%rdx), %rax
  28953. movq %r8, 112(%r11)
  28954. adcq 120(%r12), %rax
  28955. movq 128(%rdx), %rcx
  28956. movq %rax, 120(%r11)
  28957. adcq 128(%r12), %rcx
  28958. movq 136(%rdx), %r8
  28959. movq %rcx, 128(%r11)
  28960. adcq 136(%r12), %r8
  28961. movq 144(%rdx), %rax
  28962. movq %r8, 136(%r11)
  28963. adcq 144(%r12), %rax
  28964. movq 152(%rdx), %rcx
  28965. movq %rax, 144(%r11)
  28966. adcq 152(%r12), %rcx
  28967. movq 160(%rdx), %r8
  28968. movq %rcx, 152(%r11)
  28969. adcq 160(%r12), %r8
  28970. movq 168(%rdx), %rax
  28971. movq %r8, 160(%r11)
  28972. adcq 168(%r12), %rax
  28973. movq 176(%rdx), %rcx
  28974. movq %rax, 168(%r11)
  28975. adcq 176(%r12), %rcx
  28976. movq 184(%rdx), %r8
  28977. movq %rcx, 176(%r11)
  28978. adcq 184(%r12), %r8
  28979. movq 192(%rdx), %rax
  28980. movq %r8, 184(%r11)
  28981. adcq 192(%r12), %rax
  28982. movq 200(%rdx), %rcx
  28983. movq %rax, 192(%r11)
  28984. adcq 200(%r12), %rcx
  28985. movq 208(%rdx), %r8
  28986. movq %rcx, 200(%r11)
  28987. adcq 208(%r12), %r8
  28988. movq 216(%rdx), %rax
  28989. movq %r8, 208(%r11)
  28990. adcq 216(%r12), %rax
  28991. movq 224(%rdx), %rcx
  28992. movq %rax, 216(%r11)
  28993. adcq 224(%r12), %rcx
  28994. movq 232(%rdx), %r8
  28995. movq %rcx, 224(%r11)
  28996. adcq 232(%r12), %r8
  28997. movq 240(%rdx), %rax
  28998. movq %r8, 232(%r11)
  28999. adcq 240(%r12), %rax
  29000. movq 248(%rdx), %rcx
  29001. movq %rax, 240(%r11)
  29002. adcq 248(%r12), %rcx
  29003. movq %rcx, 248(%r11)
  29004. adcq $0x00, %r14
  29005. movq %r14, 1568(%rsp)
  29006. movq %r11, %rdx
  29007. movq %r10, %rsi
  29008. movq %rsp, %rdi
  29009. #ifndef __APPLE__
  29010. callq sp_2048_mul_32@plt
  29011. #else
  29012. callq _sp_2048_mul_32
  29013. #endif /* __APPLE__ */
  29014. movq 1552(%rsp), %rdx
  29015. movq 1544(%rsp), %rsi
  29016. leaq 512(%rsp), %rdi
  29017. addq $0x100, %rdx
  29018. addq $0x100, %rsi
  29019. #ifndef __APPLE__
  29020. callq sp_2048_mul_32@plt
  29021. #else
  29022. callq _sp_2048_mul_32
  29023. #endif /* __APPLE__ */
  29024. movq 1552(%rsp), %rdx
  29025. movq 1544(%rsp), %rsi
  29026. movq 1536(%rsp), %rdi
  29027. #ifndef __APPLE__
  29028. callq sp_2048_mul_32@plt
  29029. #else
  29030. callq _sp_2048_mul_32
  29031. #endif /* __APPLE__ */
  29032. movq 1560(%rsp), %r13
  29033. movq 1568(%rsp), %r14
  29034. movq 1536(%rsp), %r15
  29035. movq %r13, %r9
  29036. leaq 1024(%rsp), %r10
  29037. leaq 1280(%rsp), %r11
  29038. andq %r14, %r9
  29039. negq %r13
  29040. negq %r14
  29041. addq $0x200, %r15
  29042. movq (%r10), %rax
  29043. movq (%r11), %rcx
  29044. andq %r14, %rax
  29045. andq %r13, %rcx
  29046. movq %rax, (%r10)
  29047. movq %rcx, (%r11)
  29048. movq 8(%r10), %rax
  29049. movq 8(%r11), %rcx
  29050. andq %r14, %rax
  29051. andq %r13, %rcx
  29052. movq %rax, 8(%r10)
  29053. movq %rcx, 8(%r11)
  29054. movq 16(%r10), %rax
  29055. movq 16(%r11), %rcx
  29056. andq %r14, %rax
  29057. andq %r13, %rcx
  29058. movq %rax, 16(%r10)
  29059. movq %rcx, 16(%r11)
  29060. movq 24(%r10), %rax
  29061. movq 24(%r11), %rcx
  29062. andq %r14, %rax
  29063. andq %r13, %rcx
  29064. movq %rax, 24(%r10)
  29065. movq %rcx, 24(%r11)
  29066. movq 32(%r10), %rax
  29067. movq 32(%r11), %rcx
  29068. andq %r14, %rax
  29069. andq %r13, %rcx
  29070. movq %rax, 32(%r10)
  29071. movq %rcx, 32(%r11)
  29072. movq 40(%r10), %rax
  29073. movq 40(%r11), %rcx
  29074. andq %r14, %rax
  29075. andq %r13, %rcx
  29076. movq %rax, 40(%r10)
  29077. movq %rcx, 40(%r11)
  29078. movq 48(%r10), %rax
  29079. movq 48(%r11), %rcx
  29080. andq %r14, %rax
  29081. andq %r13, %rcx
  29082. movq %rax, 48(%r10)
  29083. movq %rcx, 48(%r11)
  29084. movq 56(%r10), %rax
  29085. movq 56(%r11), %rcx
  29086. andq %r14, %rax
  29087. andq %r13, %rcx
  29088. movq %rax, 56(%r10)
  29089. movq %rcx, 56(%r11)
  29090. movq 64(%r10), %rax
  29091. movq 64(%r11), %rcx
  29092. andq %r14, %rax
  29093. andq %r13, %rcx
  29094. movq %rax, 64(%r10)
  29095. movq %rcx, 64(%r11)
  29096. movq 72(%r10), %rax
  29097. movq 72(%r11), %rcx
  29098. andq %r14, %rax
  29099. andq %r13, %rcx
  29100. movq %rax, 72(%r10)
  29101. movq %rcx, 72(%r11)
  29102. movq 80(%r10), %rax
  29103. movq 80(%r11), %rcx
  29104. andq %r14, %rax
  29105. andq %r13, %rcx
  29106. movq %rax, 80(%r10)
  29107. movq %rcx, 80(%r11)
  29108. movq 88(%r10), %rax
  29109. movq 88(%r11), %rcx
  29110. andq %r14, %rax
  29111. andq %r13, %rcx
  29112. movq %rax, 88(%r10)
  29113. movq %rcx, 88(%r11)
  29114. movq 96(%r10), %rax
  29115. movq 96(%r11), %rcx
  29116. andq %r14, %rax
  29117. andq %r13, %rcx
  29118. movq %rax, 96(%r10)
  29119. movq %rcx, 96(%r11)
  29120. movq 104(%r10), %rax
  29121. movq 104(%r11), %rcx
  29122. andq %r14, %rax
  29123. andq %r13, %rcx
  29124. movq %rax, 104(%r10)
  29125. movq %rcx, 104(%r11)
  29126. movq 112(%r10), %rax
  29127. movq 112(%r11), %rcx
  29128. andq %r14, %rax
  29129. andq %r13, %rcx
  29130. movq %rax, 112(%r10)
  29131. movq %rcx, 112(%r11)
  29132. movq 120(%r10), %rax
  29133. movq 120(%r11), %rcx
  29134. andq %r14, %rax
  29135. andq %r13, %rcx
  29136. movq %rax, 120(%r10)
  29137. movq %rcx, 120(%r11)
  29138. movq 128(%r10), %rax
  29139. movq 128(%r11), %rcx
  29140. andq %r14, %rax
  29141. andq %r13, %rcx
  29142. movq %rax, 128(%r10)
  29143. movq %rcx, 128(%r11)
  29144. movq 136(%r10), %rax
  29145. movq 136(%r11), %rcx
  29146. andq %r14, %rax
  29147. andq %r13, %rcx
  29148. movq %rax, 136(%r10)
  29149. movq %rcx, 136(%r11)
  29150. movq 144(%r10), %rax
  29151. movq 144(%r11), %rcx
  29152. andq %r14, %rax
  29153. andq %r13, %rcx
  29154. movq %rax, 144(%r10)
  29155. movq %rcx, 144(%r11)
  29156. movq 152(%r10), %rax
  29157. movq 152(%r11), %rcx
  29158. andq %r14, %rax
  29159. andq %r13, %rcx
  29160. movq %rax, 152(%r10)
  29161. movq %rcx, 152(%r11)
  29162. movq 160(%r10), %rax
  29163. movq 160(%r11), %rcx
  29164. andq %r14, %rax
  29165. andq %r13, %rcx
  29166. movq %rax, 160(%r10)
  29167. movq %rcx, 160(%r11)
  29168. movq 168(%r10), %rax
  29169. movq 168(%r11), %rcx
  29170. andq %r14, %rax
  29171. andq %r13, %rcx
  29172. movq %rax, 168(%r10)
  29173. movq %rcx, 168(%r11)
  29174. movq 176(%r10), %rax
  29175. movq 176(%r11), %rcx
  29176. andq %r14, %rax
  29177. andq %r13, %rcx
  29178. movq %rax, 176(%r10)
  29179. movq %rcx, 176(%r11)
  29180. movq 184(%r10), %rax
  29181. movq 184(%r11), %rcx
  29182. andq %r14, %rax
  29183. andq %r13, %rcx
  29184. movq %rax, 184(%r10)
  29185. movq %rcx, 184(%r11)
  29186. movq 192(%r10), %rax
  29187. movq 192(%r11), %rcx
  29188. andq %r14, %rax
  29189. andq %r13, %rcx
  29190. movq %rax, 192(%r10)
  29191. movq %rcx, 192(%r11)
  29192. movq 200(%r10), %rax
  29193. movq 200(%r11), %rcx
  29194. andq %r14, %rax
  29195. andq %r13, %rcx
  29196. movq %rax, 200(%r10)
  29197. movq %rcx, 200(%r11)
  29198. movq 208(%r10), %rax
  29199. movq 208(%r11), %rcx
  29200. andq %r14, %rax
  29201. andq %r13, %rcx
  29202. movq %rax, 208(%r10)
  29203. movq %rcx, 208(%r11)
  29204. movq 216(%r10), %rax
  29205. movq 216(%r11), %rcx
  29206. andq %r14, %rax
  29207. andq %r13, %rcx
  29208. movq %rax, 216(%r10)
  29209. movq %rcx, 216(%r11)
  29210. movq 224(%r10), %rax
  29211. movq 224(%r11), %rcx
  29212. andq %r14, %rax
  29213. andq %r13, %rcx
  29214. movq %rax, 224(%r10)
  29215. movq %rcx, 224(%r11)
  29216. movq 232(%r10), %rax
  29217. movq 232(%r11), %rcx
  29218. andq %r14, %rax
  29219. andq %r13, %rcx
  29220. movq %rax, 232(%r10)
  29221. movq %rcx, 232(%r11)
  29222. movq 240(%r10), %rax
  29223. movq 240(%r11), %rcx
  29224. andq %r14, %rax
  29225. andq %r13, %rcx
  29226. movq %rax, 240(%r10)
  29227. movq %rcx, 240(%r11)
  29228. movq 248(%r10), %rax
  29229. movq 248(%r11), %rcx
  29230. andq %r14, %rax
  29231. andq %r13, %rcx
  29232. movq %rax, 248(%r10)
  29233. movq %rcx, 248(%r11)
  29234. movq (%r10), %rax
  29235. addq (%r11), %rax
  29236. movq 8(%r10), %rcx
  29237. movq %rax, (%r15)
  29238. adcq 8(%r11), %rcx
  29239. movq 16(%r10), %r8
  29240. movq %rcx, 8(%r15)
  29241. adcq 16(%r11), %r8
  29242. movq 24(%r10), %rax
  29243. movq %r8, 16(%r15)
  29244. adcq 24(%r11), %rax
  29245. movq 32(%r10), %rcx
  29246. movq %rax, 24(%r15)
  29247. adcq 32(%r11), %rcx
  29248. movq 40(%r10), %r8
  29249. movq %rcx, 32(%r15)
  29250. adcq 40(%r11), %r8
  29251. movq 48(%r10), %rax
  29252. movq %r8, 40(%r15)
  29253. adcq 48(%r11), %rax
  29254. movq 56(%r10), %rcx
  29255. movq %rax, 48(%r15)
  29256. adcq 56(%r11), %rcx
  29257. movq 64(%r10), %r8
  29258. movq %rcx, 56(%r15)
  29259. adcq 64(%r11), %r8
  29260. movq 72(%r10), %rax
  29261. movq %r8, 64(%r15)
  29262. adcq 72(%r11), %rax
  29263. movq 80(%r10), %rcx
  29264. movq %rax, 72(%r15)
  29265. adcq 80(%r11), %rcx
  29266. movq 88(%r10), %r8
  29267. movq %rcx, 80(%r15)
  29268. adcq 88(%r11), %r8
  29269. movq 96(%r10), %rax
  29270. movq %r8, 88(%r15)
  29271. adcq 96(%r11), %rax
  29272. movq 104(%r10), %rcx
  29273. movq %rax, 96(%r15)
  29274. adcq 104(%r11), %rcx
  29275. movq 112(%r10), %r8
  29276. movq %rcx, 104(%r15)
  29277. adcq 112(%r11), %r8
  29278. movq 120(%r10), %rax
  29279. movq %r8, 112(%r15)
  29280. adcq 120(%r11), %rax
  29281. movq 128(%r10), %rcx
  29282. movq %rax, 120(%r15)
  29283. adcq 128(%r11), %rcx
  29284. movq 136(%r10), %r8
  29285. movq %rcx, 128(%r15)
  29286. adcq 136(%r11), %r8
  29287. movq 144(%r10), %rax
  29288. movq %r8, 136(%r15)
  29289. adcq 144(%r11), %rax
  29290. movq 152(%r10), %rcx
  29291. movq %rax, 144(%r15)
  29292. adcq 152(%r11), %rcx
  29293. movq 160(%r10), %r8
  29294. movq %rcx, 152(%r15)
  29295. adcq 160(%r11), %r8
  29296. movq 168(%r10), %rax
  29297. movq %r8, 160(%r15)
  29298. adcq 168(%r11), %rax
  29299. movq 176(%r10), %rcx
  29300. movq %rax, 168(%r15)
  29301. adcq 176(%r11), %rcx
  29302. movq 184(%r10), %r8
  29303. movq %rcx, 176(%r15)
  29304. adcq 184(%r11), %r8
  29305. movq 192(%r10), %rax
  29306. movq %r8, 184(%r15)
  29307. adcq 192(%r11), %rax
  29308. movq 200(%r10), %rcx
  29309. movq %rax, 192(%r15)
  29310. adcq 200(%r11), %rcx
  29311. movq 208(%r10), %r8
  29312. movq %rcx, 200(%r15)
  29313. adcq 208(%r11), %r8
  29314. movq 216(%r10), %rax
  29315. movq %r8, 208(%r15)
  29316. adcq 216(%r11), %rax
  29317. movq 224(%r10), %rcx
  29318. movq %rax, 216(%r15)
  29319. adcq 224(%r11), %rcx
  29320. movq 232(%r10), %r8
  29321. movq %rcx, 224(%r15)
  29322. adcq 232(%r11), %r8
  29323. movq 240(%r10), %rax
  29324. movq %r8, 232(%r15)
  29325. adcq 240(%r11), %rax
  29326. movq 248(%r10), %rcx
  29327. movq %rax, 240(%r15)
  29328. adcq 248(%r11), %rcx
  29329. movq %rcx, 248(%r15)
  29330. adcq $0x00, %r9
  29331. leaq 512(%rsp), %r11
  29332. movq %rsp, %r10
  29333. movq (%r10), %rax
  29334. subq (%r11), %rax
  29335. movq 8(%r10), %rcx
  29336. movq %rax, (%r10)
  29337. sbbq 8(%r11), %rcx
  29338. movq 16(%r10), %r8
  29339. movq %rcx, 8(%r10)
  29340. sbbq 16(%r11), %r8
  29341. movq 24(%r10), %rax
  29342. movq %r8, 16(%r10)
  29343. sbbq 24(%r11), %rax
  29344. movq 32(%r10), %rcx
  29345. movq %rax, 24(%r10)
  29346. sbbq 32(%r11), %rcx
  29347. movq 40(%r10), %r8
  29348. movq %rcx, 32(%r10)
  29349. sbbq 40(%r11), %r8
  29350. movq 48(%r10), %rax
  29351. movq %r8, 40(%r10)
  29352. sbbq 48(%r11), %rax
  29353. movq 56(%r10), %rcx
  29354. movq %rax, 48(%r10)
  29355. sbbq 56(%r11), %rcx
  29356. movq 64(%r10), %r8
  29357. movq %rcx, 56(%r10)
  29358. sbbq 64(%r11), %r8
  29359. movq 72(%r10), %rax
  29360. movq %r8, 64(%r10)
  29361. sbbq 72(%r11), %rax
  29362. movq 80(%r10), %rcx
  29363. movq %rax, 72(%r10)
  29364. sbbq 80(%r11), %rcx
  29365. movq 88(%r10), %r8
  29366. movq %rcx, 80(%r10)
  29367. sbbq 88(%r11), %r8
  29368. movq 96(%r10), %rax
  29369. movq %r8, 88(%r10)
  29370. sbbq 96(%r11), %rax
  29371. movq 104(%r10), %rcx
  29372. movq %rax, 96(%r10)
  29373. sbbq 104(%r11), %rcx
  29374. movq 112(%r10), %r8
  29375. movq %rcx, 104(%r10)
  29376. sbbq 112(%r11), %r8
  29377. movq 120(%r10), %rax
  29378. movq %r8, 112(%r10)
  29379. sbbq 120(%r11), %rax
  29380. movq 128(%r10), %rcx
  29381. movq %rax, 120(%r10)
  29382. sbbq 128(%r11), %rcx
  29383. movq 136(%r10), %r8
  29384. movq %rcx, 128(%r10)
  29385. sbbq 136(%r11), %r8
  29386. movq 144(%r10), %rax
  29387. movq %r8, 136(%r10)
  29388. sbbq 144(%r11), %rax
  29389. movq 152(%r10), %rcx
  29390. movq %rax, 144(%r10)
  29391. sbbq 152(%r11), %rcx
  29392. movq 160(%r10), %r8
  29393. movq %rcx, 152(%r10)
  29394. sbbq 160(%r11), %r8
  29395. movq 168(%r10), %rax
  29396. movq %r8, 160(%r10)
  29397. sbbq 168(%r11), %rax
  29398. movq 176(%r10), %rcx
  29399. movq %rax, 168(%r10)
  29400. sbbq 176(%r11), %rcx
  29401. movq 184(%r10), %r8
  29402. movq %rcx, 176(%r10)
  29403. sbbq 184(%r11), %r8
  29404. movq 192(%r10), %rax
  29405. movq %r8, 184(%r10)
  29406. sbbq 192(%r11), %rax
  29407. movq 200(%r10), %rcx
  29408. movq %rax, 192(%r10)
  29409. sbbq 200(%r11), %rcx
  29410. movq 208(%r10), %r8
  29411. movq %rcx, 200(%r10)
  29412. sbbq 208(%r11), %r8
  29413. movq 216(%r10), %rax
  29414. movq %r8, 208(%r10)
  29415. sbbq 216(%r11), %rax
  29416. movq 224(%r10), %rcx
  29417. movq %rax, 216(%r10)
  29418. sbbq 224(%r11), %rcx
  29419. movq 232(%r10), %r8
  29420. movq %rcx, 224(%r10)
  29421. sbbq 232(%r11), %r8
  29422. movq 240(%r10), %rax
  29423. movq %r8, 232(%r10)
  29424. sbbq 240(%r11), %rax
  29425. movq 248(%r10), %rcx
  29426. movq %rax, 240(%r10)
  29427. sbbq 248(%r11), %rcx
  29428. movq 256(%r10), %r8
  29429. movq %rcx, 248(%r10)
  29430. sbbq 256(%r11), %r8
  29431. movq 264(%r10), %rax
  29432. movq %r8, 256(%r10)
  29433. sbbq 264(%r11), %rax
  29434. movq 272(%r10), %rcx
  29435. movq %rax, 264(%r10)
  29436. sbbq 272(%r11), %rcx
  29437. movq 280(%r10), %r8
  29438. movq %rcx, 272(%r10)
  29439. sbbq 280(%r11), %r8
  29440. movq 288(%r10), %rax
  29441. movq %r8, 280(%r10)
  29442. sbbq 288(%r11), %rax
  29443. movq 296(%r10), %rcx
  29444. movq %rax, 288(%r10)
  29445. sbbq 296(%r11), %rcx
  29446. movq 304(%r10), %r8
  29447. movq %rcx, 296(%r10)
  29448. sbbq 304(%r11), %r8
  29449. movq 312(%r10), %rax
  29450. movq %r8, 304(%r10)
  29451. sbbq 312(%r11), %rax
  29452. movq 320(%r10), %rcx
  29453. movq %rax, 312(%r10)
  29454. sbbq 320(%r11), %rcx
  29455. movq 328(%r10), %r8
  29456. movq %rcx, 320(%r10)
  29457. sbbq 328(%r11), %r8
  29458. movq 336(%r10), %rax
  29459. movq %r8, 328(%r10)
  29460. sbbq 336(%r11), %rax
  29461. movq 344(%r10), %rcx
  29462. movq %rax, 336(%r10)
  29463. sbbq 344(%r11), %rcx
  29464. movq 352(%r10), %r8
  29465. movq %rcx, 344(%r10)
  29466. sbbq 352(%r11), %r8
  29467. movq 360(%r10), %rax
  29468. movq %r8, 352(%r10)
  29469. sbbq 360(%r11), %rax
  29470. movq 368(%r10), %rcx
  29471. movq %rax, 360(%r10)
  29472. sbbq 368(%r11), %rcx
  29473. movq 376(%r10), %r8
  29474. movq %rcx, 368(%r10)
  29475. sbbq 376(%r11), %r8
  29476. movq 384(%r10), %rax
  29477. movq %r8, 376(%r10)
  29478. sbbq 384(%r11), %rax
  29479. movq 392(%r10), %rcx
  29480. movq %rax, 384(%r10)
  29481. sbbq 392(%r11), %rcx
  29482. movq 400(%r10), %r8
  29483. movq %rcx, 392(%r10)
  29484. sbbq 400(%r11), %r8
  29485. movq 408(%r10), %rax
  29486. movq %r8, 400(%r10)
  29487. sbbq 408(%r11), %rax
  29488. movq 416(%r10), %rcx
  29489. movq %rax, 408(%r10)
  29490. sbbq 416(%r11), %rcx
  29491. movq 424(%r10), %r8
  29492. movq %rcx, 416(%r10)
  29493. sbbq 424(%r11), %r8
  29494. movq 432(%r10), %rax
  29495. movq %r8, 424(%r10)
  29496. sbbq 432(%r11), %rax
  29497. movq 440(%r10), %rcx
  29498. movq %rax, 432(%r10)
  29499. sbbq 440(%r11), %rcx
  29500. movq 448(%r10), %r8
  29501. movq %rcx, 440(%r10)
  29502. sbbq 448(%r11), %r8
  29503. movq 456(%r10), %rax
  29504. movq %r8, 448(%r10)
  29505. sbbq 456(%r11), %rax
  29506. movq 464(%r10), %rcx
  29507. movq %rax, 456(%r10)
  29508. sbbq 464(%r11), %rcx
  29509. movq 472(%r10), %r8
  29510. movq %rcx, 464(%r10)
  29511. sbbq 472(%r11), %r8
  29512. movq 480(%r10), %rax
  29513. movq %r8, 472(%r10)
  29514. sbbq 480(%r11), %rax
  29515. movq 488(%r10), %rcx
  29516. movq %rax, 480(%r10)
  29517. sbbq 488(%r11), %rcx
  29518. movq 496(%r10), %r8
  29519. movq %rcx, 488(%r10)
  29520. sbbq 496(%r11), %r8
  29521. movq 504(%r10), %rax
  29522. movq %r8, 496(%r10)
  29523. sbbq 504(%r11), %rax
  29524. movq %rax, 504(%r10)
  29525. sbbq $0x00, %r9
  29526. movq (%r10), %rax
  29527. subq (%rdi), %rax
  29528. movq 8(%r10), %rcx
  29529. movq %rax, (%r10)
  29530. sbbq 8(%rdi), %rcx
  29531. movq 16(%r10), %r8
  29532. movq %rcx, 8(%r10)
  29533. sbbq 16(%rdi), %r8
  29534. movq 24(%r10), %rax
  29535. movq %r8, 16(%r10)
  29536. sbbq 24(%rdi), %rax
  29537. movq 32(%r10), %rcx
  29538. movq %rax, 24(%r10)
  29539. sbbq 32(%rdi), %rcx
  29540. movq 40(%r10), %r8
  29541. movq %rcx, 32(%r10)
  29542. sbbq 40(%rdi), %r8
  29543. movq 48(%r10), %rax
  29544. movq %r8, 40(%r10)
  29545. sbbq 48(%rdi), %rax
  29546. movq 56(%r10), %rcx
  29547. movq %rax, 48(%r10)
  29548. sbbq 56(%rdi), %rcx
  29549. movq 64(%r10), %r8
  29550. movq %rcx, 56(%r10)
  29551. sbbq 64(%rdi), %r8
  29552. movq 72(%r10), %rax
  29553. movq %r8, 64(%r10)
  29554. sbbq 72(%rdi), %rax
  29555. movq 80(%r10), %rcx
  29556. movq %rax, 72(%r10)
  29557. sbbq 80(%rdi), %rcx
  29558. movq 88(%r10), %r8
  29559. movq %rcx, 80(%r10)
  29560. sbbq 88(%rdi), %r8
  29561. movq 96(%r10), %rax
  29562. movq %r8, 88(%r10)
  29563. sbbq 96(%rdi), %rax
  29564. movq 104(%r10), %rcx
  29565. movq %rax, 96(%r10)
  29566. sbbq 104(%rdi), %rcx
  29567. movq 112(%r10), %r8
  29568. movq %rcx, 104(%r10)
  29569. sbbq 112(%rdi), %r8
  29570. movq 120(%r10), %rax
  29571. movq %r8, 112(%r10)
  29572. sbbq 120(%rdi), %rax
  29573. movq 128(%r10), %rcx
  29574. movq %rax, 120(%r10)
  29575. sbbq 128(%rdi), %rcx
  29576. movq 136(%r10), %r8
  29577. movq %rcx, 128(%r10)
  29578. sbbq 136(%rdi), %r8
  29579. movq 144(%r10), %rax
  29580. movq %r8, 136(%r10)
  29581. sbbq 144(%rdi), %rax
  29582. movq 152(%r10), %rcx
  29583. movq %rax, 144(%r10)
  29584. sbbq 152(%rdi), %rcx
  29585. movq 160(%r10), %r8
  29586. movq %rcx, 152(%r10)
  29587. sbbq 160(%rdi), %r8
  29588. movq 168(%r10), %rax
  29589. movq %r8, 160(%r10)
  29590. sbbq 168(%rdi), %rax
  29591. movq 176(%r10), %rcx
  29592. movq %rax, 168(%r10)
  29593. sbbq 176(%rdi), %rcx
  29594. movq 184(%r10), %r8
  29595. movq %rcx, 176(%r10)
  29596. sbbq 184(%rdi), %r8
  29597. movq 192(%r10), %rax
  29598. movq %r8, 184(%r10)
  29599. sbbq 192(%rdi), %rax
  29600. movq 200(%r10), %rcx
  29601. movq %rax, 192(%r10)
  29602. sbbq 200(%rdi), %rcx
  29603. movq 208(%r10), %r8
  29604. movq %rcx, 200(%r10)
  29605. sbbq 208(%rdi), %r8
  29606. movq 216(%r10), %rax
  29607. movq %r8, 208(%r10)
  29608. sbbq 216(%rdi), %rax
  29609. movq 224(%r10), %rcx
  29610. movq %rax, 216(%r10)
  29611. sbbq 224(%rdi), %rcx
  29612. movq 232(%r10), %r8
  29613. movq %rcx, 224(%r10)
  29614. sbbq 232(%rdi), %r8
  29615. movq 240(%r10), %rax
  29616. movq %r8, 232(%r10)
  29617. sbbq 240(%rdi), %rax
  29618. movq 248(%r10), %rcx
  29619. movq %rax, 240(%r10)
  29620. sbbq 248(%rdi), %rcx
  29621. movq 256(%r10), %r8
  29622. movq %rcx, 248(%r10)
  29623. sbbq 256(%rdi), %r8
  29624. movq 264(%r10), %rax
  29625. movq %r8, 256(%r10)
  29626. sbbq 264(%rdi), %rax
  29627. movq 272(%r10), %rcx
  29628. movq %rax, 264(%r10)
  29629. sbbq 272(%rdi), %rcx
  29630. movq 280(%r10), %r8
  29631. movq %rcx, 272(%r10)
  29632. sbbq 280(%rdi), %r8
  29633. movq 288(%r10), %rax
  29634. movq %r8, 280(%r10)
  29635. sbbq 288(%rdi), %rax
  29636. movq 296(%r10), %rcx
  29637. movq %rax, 288(%r10)
  29638. sbbq 296(%rdi), %rcx
  29639. movq 304(%r10), %r8
  29640. movq %rcx, 296(%r10)
  29641. sbbq 304(%rdi), %r8
  29642. movq 312(%r10), %rax
  29643. movq %r8, 304(%r10)
  29644. sbbq 312(%rdi), %rax
  29645. movq 320(%r10), %rcx
  29646. movq %rax, 312(%r10)
  29647. sbbq 320(%rdi), %rcx
  29648. movq 328(%r10), %r8
  29649. movq %rcx, 320(%r10)
  29650. sbbq 328(%rdi), %r8
  29651. movq 336(%r10), %rax
  29652. movq %r8, 328(%r10)
  29653. sbbq 336(%rdi), %rax
  29654. movq 344(%r10), %rcx
  29655. movq %rax, 336(%r10)
  29656. sbbq 344(%rdi), %rcx
  29657. movq 352(%r10), %r8
  29658. movq %rcx, 344(%r10)
  29659. sbbq 352(%rdi), %r8
  29660. movq 360(%r10), %rax
  29661. movq %r8, 352(%r10)
  29662. sbbq 360(%rdi), %rax
  29663. movq 368(%r10), %rcx
  29664. movq %rax, 360(%r10)
  29665. sbbq 368(%rdi), %rcx
  29666. movq 376(%r10), %r8
  29667. movq %rcx, 368(%r10)
  29668. sbbq 376(%rdi), %r8
  29669. movq 384(%r10), %rax
  29670. movq %r8, 376(%r10)
  29671. sbbq 384(%rdi), %rax
  29672. movq 392(%r10), %rcx
  29673. movq %rax, 384(%r10)
  29674. sbbq 392(%rdi), %rcx
  29675. movq 400(%r10), %r8
  29676. movq %rcx, 392(%r10)
  29677. sbbq 400(%rdi), %r8
  29678. movq 408(%r10), %rax
  29679. movq %r8, 400(%r10)
  29680. sbbq 408(%rdi), %rax
  29681. movq 416(%r10), %rcx
  29682. movq %rax, 408(%r10)
  29683. sbbq 416(%rdi), %rcx
  29684. movq 424(%r10), %r8
  29685. movq %rcx, 416(%r10)
  29686. sbbq 424(%rdi), %r8
  29687. movq 432(%r10), %rax
  29688. movq %r8, 424(%r10)
  29689. sbbq 432(%rdi), %rax
  29690. movq 440(%r10), %rcx
  29691. movq %rax, 432(%r10)
  29692. sbbq 440(%rdi), %rcx
  29693. movq 448(%r10), %r8
  29694. movq %rcx, 440(%r10)
  29695. sbbq 448(%rdi), %r8
  29696. movq 456(%r10), %rax
  29697. movq %r8, 448(%r10)
  29698. sbbq 456(%rdi), %rax
  29699. movq 464(%r10), %rcx
  29700. movq %rax, 456(%r10)
  29701. sbbq 464(%rdi), %rcx
  29702. movq 472(%r10), %r8
  29703. movq %rcx, 464(%r10)
  29704. sbbq 472(%rdi), %r8
  29705. movq 480(%r10), %rax
  29706. movq %r8, 472(%r10)
  29707. sbbq 480(%rdi), %rax
  29708. movq 488(%r10), %rcx
  29709. movq %rax, 480(%r10)
  29710. sbbq 488(%rdi), %rcx
  29711. movq 496(%r10), %r8
  29712. movq %rcx, 488(%r10)
  29713. sbbq 496(%rdi), %r8
  29714. movq 504(%r10), %rax
  29715. movq %r8, 496(%r10)
  29716. sbbq 504(%rdi), %rax
  29717. movq %rax, 504(%r10)
  29718. sbbq $0x00, %r9
  29719. subq $0x100, %r15
  29720. # Add
  29721. movq (%r15), %rax
  29722. addq (%r10), %rax
  29723. movq 8(%r15), %rcx
  29724. movq %rax, (%r15)
  29725. adcq 8(%r10), %rcx
  29726. movq 16(%r15), %r8
  29727. movq %rcx, 8(%r15)
  29728. adcq 16(%r10), %r8
  29729. movq 24(%r15), %rax
  29730. movq %r8, 16(%r15)
  29731. adcq 24(%r10), %rax
  29732. movq 32(%r15), %rcx
  29733. movq %rax, 24(%r15)
  29734. adcq 32(%r10), %rcx
  29735. movq 40(%r15), %r8
  29736. movq %rcx, 32(%r15)
  29737. adcq 40(%r10), %r8
  29738. movq 48(%r15), %rax
  29739. movq %r8, 40(%r15)
  29740. adcq 48(%r10), %rax
  29741. movq 56(%r15), %rcx
  29742. movq %rax, 48(%r15)
  29743. adcq 56(%r10), %rcx
  29744. movq 64(%r15), %r8
  29745. movq %rcx, 56(%r15)
  29746. adcq 64(%r10), %r8
  29747. movq 72(%r15), %rax
  29748. movq %r8, 64(%r15)
  29749. adcq 72(%r10), %rax
  29750. movq 80(%r15), %rcx
  29751. movq %rax, 72(%r15)
  29752. adcq 80(%r10), %rcx
  29753. movq 88(%r15), %r8
  29754. movq %rcx, 80(%r15)
  29755. adcq 88(%r10), %r8
  29756. movq 96(%r15), %rax
  29757. movq %r8, 88(%r15)
  29758. adcq 96(%r10), %rax
  29759. movq 104(%r15), %rcx
  29760. movq %rax, 96(%r15)
  29761. adcq 104(%r10), %rcx
  29762. movq 112(%r15), %r8
  29763. movq %rcx, 104(%r15)
  29764. adcq 112(%r10), %r8
  29765. movq 120(%r15), %rax
  29766. movq %r8, 112(%r15)
  29767. adcq 120(%r10), %rax
  29768. movq 128(%r15), %rcx
  29769. movq %rax, 120(%r15)
  29770. adcq 128(%r10), %rcx
  29771. movq 136(%r15), %r8
  29772. movq %rcx, 128(%r15)
  29773. adcq 136(%r10), %r8
  29774. movq 144(%r15), %rax
  29775. movq %r8, 136(%r15)
  29776. adcq 144(%r10), %rax
  29777. movq 152(%r15), %rcx
  29778. movq %rax, 144(%r15)
  29779. adcq 152(%r10), %rcx
  29780. movq 160(%r15), %r8
  29781. movq %rcx, 152(%r15)
  29782. adcq 160(%r10), %r8
  29783. movq 168(%r15), %rax
  29784. movq %r8, 160(%r15)
  29785. adcq 168(%r10), %rax
  29786. movq 176(%r15), %rcx
  29787. movq %rax, 168(%r15)
  29788. adcq 176(%r10), %rcx
  29789. movq 184(%r15), %r8
  29790. movq %rcx, 176(%r15)
  29791. adcq 184(%r10), %r8
  29792. movq 192(%r15), %rax
  29793. movq %r8, 184(%r15)
  29794. adcq 192(%r10), %rax
  29795. movq 200(%r15), %rcx
  29796. movq %rax, 192(%r15)
  29797. adcq 200(%r10), %rcx
  29798. movq 208(%r15), %r8
  29799. movq %rcx, 200(%r15)
  29800. adcq 208(%r10), %r8
  29801. movq 216(%r15), %rax
  29802. movq %r8, 208(%r15)
  29803. adcq 216(%r10), %rax
  29804. movq 224(%r15), %rcx
  29805. movq %rax, 216(%r15)
  29806. adcq 224(%r10), %rcx
  29807. movq 232(%r15), %r8
  29808. movq %rcx, 224(%r15)
  29809. adcq 232(%r10), %r8
  29810. movq 240(%r15), %rax
  29811. movq %r8, 232(%r15)
  29812. adcq 240(%r10), %rax
  29813. movq 248(%r15), %rcx
  29814. movq %rax, 240(%r15)
  29815. adcq 248(%r10), %rcx
  29816. movq 256(%r15), %r8
  29817. movq %rcx, 248(%r15)
  29818. adcq 256(%r10), %r8
  29819. movq 264(%r15), %rax
  29820. movq %r8, 256(%r15)
  29821. adcq 264(%r10), %rax
  29822. movq 272(%r15), %rcx
  29823. movq %rax, 264(%r15)
  29824. adcq 272(%r10), %rcx
  29825. movq 280(%r15), %r8
  29826. movq %rcx, 272(%r15)
  29827. adcq 280(%r10), %r8
  29828. movq 288(%r15), %rax
  29829. movq %r8, 280(%r15)
  29830. adcq 288(%r10), %rax
  29831. movq 296(%r15), %rcx
  29832. movq %rax, 288(%r15)
  29833. adcq 296(%r10), %rcx
  29834. movq 304(%r15), %r8
  29835. movq %rcx, 296(%r15)
  29836. adcq 304(%r10), %r8
  29837. movq 312(%r15), %rax
  29838. movq %r8, 304(%r15)
  29839. adcq 312(%r10), %rax
  29840. movq 320(%r15), %rcx
  29841. movq %rax, 312(%r15)
  29842. adcq 320(%r10), %rcx
  29843. movq 328(%r15), %r8
  29844. movq %rcx, 320(%r15)
  29845. adcq 328(%r10), %r8
  29846. movq 336(%r15), %rax
  29847. movq %r8, 328(%r15)
  29848. adcq 336(%r10), %rax
  29849. movq 344(%r15), %rcx
  29850. movq %rax, 336(%r15)
  29851. adcq 344(%r10), %rcx
  29852. movq 352(%r15), %r8
  29853. movq %rcx, 344(%r15)
  29854. adcq 352(%r10), %r8
  29855. movq 360(%r15), %rax
  29856. movq %r8, 352(%r15)
  29857. adcq 360(%r10), %rax
  29858. movq 368(%r15), %rcx
  29859. movq %rax, 360(%r15)
  29860. adcq 368(%r10), %rcx
  29861. movq 376(%r15), %r8
  29862. movq %rcx, 368(%r15)
  29863. adcq 376(%r10), %r8
  29864. movq 384(%r15), %rax
  29865. movq %r8, 376(%r15)
  29866. adcq 384(%r10), %rax
  29867. movq 392(%r15), %rcx
  29868. movq %rax, 384(%r15)
  29869. adcq 392(%r10), %rcx
  29870. movq 400(%r15), %r8
  29871. movq %rcx, 392(%r15)
  29872. adcq 400(%r10), %r8
  29873. movq 408(%r15), %rax
  29874. movq %r8, 400(%r15)
  29875. adcq 408(%r10), %rax
  29876. movq 416(%r15), %rcx
  29877. movq %rax, 408(%r15)
  29878. adcq 416(%r10), %rcx
  29879. movq 424(%r15), %r8
  29880. movq %rcx, 416(%r15)
  29881. adcq 424(%r10), %r8
  29882. movq 432(%r15), %rax
  29883. movq %r8, 424(%r15)
  29884. adcq 432(%r10), %rax
  29885. movq 440(%r15), %rcx
  29886. movq %rax, 432(%r15)
  29887. adcq 440(%r10), %rcx
  29888. movq 448(%r15), %r8
  29889. movq %rcx, 440(%r15)
  29890. adcq 448(%r10), %r8
  29891. movq 456(%r15), %rax
  29892. movq %r8, 448(%r15)
  29893. adcq 456(%r10), %rax
  29894. movq 464(%r15), %rcx
  29895. movq %rax, 456(%r15)
  29896. adcq 464(%r10), %rcx
  29897. movq 472(%r15), %r8
  29898. movq %rcx, 464(%r15)
  29899. adcq 472(%r10), %r8
  29900. movq 480(%r15), %rax
  29901. movq %r8, 472(%r15)
  29902. adcq 480(%r10), %rax
  29903. movq 488(%r15), %rcx
  29904. movq %rax, 480(%r15)
  29905. adcq 488(%r10), %rcx
  29906. movq 496(%r15), %r8
  29907. movq %rcx, 488(%r15)
  29908. adcq 496(%r10), %r8
  29909. movq 504(%r15), %rax
  29910. movq %r8, 496(%r15)
  29911. adcq 504(%r10), %rax
  29912. movq %rax, 504(%r15)
  29913. adcq $0x00, %r9
  29914. movq %r9, 768(%rdi)
  29915. addq $0x100, %r15
  29916. # Add
  29917. movq (%r15), %rax
  29918. xorq %r9, %r9
  29919. addq (%r11), %rax
  29920. movq 8(%r15), %rcx
  29921. movq %rax, (%r15)
  29922. adcq 8(%r11), %rcx
  29923. movq 16(%r15), %r8
  29924. movq %rcx, 8(%r15)
  29925. adcq 16(%r11), %r8
  29926. movq 24(%r15), %rax
  29927. movq %r8, 16(%r15)
  29928. adcq 24(%r11), %rax
  29929. movq 32(%r15), %rcx
  29930. movq %rax, 24(%r15)
  29931. adcq 32(%r11), %rcx
  29932. movq 40(%r15), %r8
  29933. movq %rcx, 32(%r15)
  29934. adcq 40(%r11), %r8
  29935. movq 48(%r15), %rax
  29936. movq %r8, 40(%r15)
  29937. adcq 48(%r11), %rax
  29938. movq 56(%r15), %rcx
  29939. movq %rax, 48(%r15)
  29940. adcq 56(%r11), %rcx
  29941. movq 64(%r15), %r8
  29942. movq %rcx, 56(%r15)
  29943. adcq 64(%r11), %r8
  29944. movq 72(%r15), %rax
  29945. movq %r8, 64(%r15)
  29946. adcq 72(%r11), %rax
  29947. movq 80(%r15), %rcx
  29948. movq %rax, 72(%r15)
  29949. adcq 80(%r11), %rcx
  29950. movq 88(%r15), %r8
  29951. movq %rcx, 80(%r15)
  29952. adcq 88(%r11), %r8
  29953. movq 96(%r15), %rax
  29954. movq %r8, 88(%r15)
  29955. adcq 96(%r11), %rax
  29956. movq 104(%r15), %rcx
  29957. movq %rax, 96(%r15)
  29958. adcq 104(%r11), %rcx
  29959. movq 112(%r15), %r8
  29960. movq %rcx, 104(%r15)
  29961. adcq 112(%r11), %r8
  29962. movq 120(%r15), %rax
  29963. movq %r8, 112(%r15)
  29964. adcq 120(%r11), %rax
  29965. movq 128(%r15), %rcx
  29966. movq %rax, 120(%r15)
  29967. adcq 128(%r11), %rcx
  29968. movq 136(%r15), %r8
  29969. movq %rcx, 128(%r15)
  29970. adcq 136(%r11), %r8
  29971. movq 144(%r15), %rax
  29972. movq %r8, 136(%r15)
  29973. adcq 144(%r11), %rax
  29974. movq 152(%r15), %rcx
  29975. movq %rax, 144(%r15)
  29976. adcq 152(%r11), %rcx
  29977. movq 160(%r15), %r8
  29978. movq %rcx, 152(%r15)
  29979. adcq 160(%r11), %r8
  29980. movq 168(%r15), %rax
  29981. movq %r8, 160(%r15)
  29982. adcq 168(%r11), %rax
  29983. movq 176(%r15), %rcx
  29984. movq %rax, 168(%r15)
  29985. adcq 176(%r11), %rcx
  29986. movq 184(%r15), %r8
  29987. movq %rcx, 176(%r15)
  29988. adcq 184(%r11), %r8
  29989. movq 192(%r15), %rax
  29990. movq %r8, 184(%r15)
  29991. adcq 192(%r11), %rax
  29992. movq 200(%r15), %rcx
  29993. movq %rax, 192(%r15)
  29994. adcq 200(%r11), %rcx
  29995. movq 208(%r15), %r8
  29996. movq %rcx, 200(%r15)
  29997. adcq 208(%r11), %r8
  29998. movq 216(%r15), %rax
  29999. movq %r8, 208(%r15)
  30000. adcq 216(%r11), %rax
  30001. movq 224(%r15), %rcx
  30002. movq %rax, 216(%r15)
  30003. adcq 224(%r11), %rcx
  30004. movq 232(%r15), %r8
  30005. movq %rcx, 224(%r15)
  30006. adcq 232(%r11), %r8
  30007. movq 240(%r15), %rax
  30008. movq %r8, 232(%r15)
  30009. adcq 240(%r11), %rax
  30010. movq 248(%r15), %rcx
  30011. movq %rax, 240(%r15)
  30012. adcq 248(%r11), %rcx
  30013. movq 256(%r15), %r8
  30014. movq %rcx, 248(%r15)
  30015. adcq 256(%r11), %r8
  30016. movq %r8, 256(%r15)
  30017. adcq $0x00, %r9
  30018. # Add to zero
  30019. movq 264(%r11), %rax
  30020. adcq $0x00, %rax
  30021. movq 272(%r11), %rcx
  30022. movq %rax, 264(%r15)
  30023. adcq $0x00, %rcx
  30024. movq 280(%r11), %r8
  30025. movq %rcx, 272(%r15)
  30026. adcq $0x00, %r8
  30027. movq 288(%r11), %rax
  30028. movq %r8, 280(%r15)
  30029. adcq $0x00, %rax
  30030. movq 296(%r11), %rcx
  30031. movq %rax, 288(%r15)
  30032. adcq $0x00, %rcx
  30033. movq 304(%r11), %r8
  30034. movq %rcx, 296(%r15)
  30035. adcq $0x00, %r8
  30036. movq 312(%r11), %rax
  30037. movq %r8, 304(%r15)
  30038. adcq $0x00, %rax
  30039. movq 320(%r11), %rcx
  30040. movq %rax, 312(%r15)
  30041. adcq $0x00, %rcx
  30042. movq 328(%r11), %r8
  30043. movq %rcx, 320(%r15)
  30044. adcq $0x00, %r8
  30045. movq 336(%r11), %rax
  30046. movq %r8, 328(%r15)
  30047. adcq $0x00, %rax
  30048. movq 344(%r11), %rcx
  30049. movq %rax, 336(%r15)
  30050. adcq $0x00, %rcx
  30051. movq 352(%r11), %r8
  30052. movq %rcx, 344(%r15)
  30053. adcq $0x00, %r8
  30054. movq 360(%r11), %rax
  30055. movq %r8, 352(%r15)
  30056. adcq $0x00, %rax
  30057. movq 368(%r11), %rcx
  30058. movq %rax, 360(%r15)
  30059. adcq $0x00, %rcx
  30060. movq 376(%r11), %r8
  30061. movq %rcx, 368(%r15)
  30062. adcq $0x00, %r8
  30063. movq 384(%r11), %rax
  30064. movq %r8, 376(%r15)
  30065. adcq $0x00, %rax
  30066. movq 392(%r11), %rcx
  30067. movq %rax, 384(%r15)
  30068. adcq $0x00, %rcx
  30069. movq 400(%r11), %r8
  30070. movq %rcx, 392(%r15)
  30071. adcq $0x00, %r8
  30072. movq 408(%r11), %rax
  30073. movq %r8, 400(%r15)
  30074. adcq $0x00, %rax
  30075. movq 416(%r11), %rcx
  30076. movq %rax, 408(%r15)
  30077. adcq $0x00, %rcx
  30078. movq 424(%r11), %r8
  30079. movq %rcx, 416(%r15)
  30080. adcq $0x00, %r8
  30081. movq 432(%r11), %rax
  30082. movq %r8, 424(%r15)
  30083. adcq $0x00, %rax
  30084. movq 440(%r11), %rcx
  30085. movq %rax, 432(%r15)
  30086. adcq $0x00, %rcx
  30087. movq 448(%r11), %r8
  30088. movq %rcx, 440(%r15)
  30089. adcq $0x00, %r8
  30090. movq 456(%r11), %rax
  30091. movq %r8, 448(%r15)
  30092. adcq $0x00, %rax
  30093. movq 464(%r11), %rcx
  30094. movq %rax, 456(%r15)
  30095. adcq $0x00, %rcx
  30096. movq 472(%r11), %r8
  30097. movq %rcx, 464(%r15)
  30098. adcq $0x00, %r8
  30099. movq 480(%r11), %rax
  30100. movq %r8, 472(%r15)
  30101. adcq $0x00, %rax
  30102. movq 488(%r11), %rcx
  30103. movq %rax, 480(%r15)
  30104. adcq $0x00, %rcx
  30105. movq 496(%r11), %r8
  30106. movq %rcx, 488(%r15)
  30107. adcq $0x00, %r8
  30108. movq 504(%r11), %rax
  30109. movq %r8, 496(%r15)
  30110. adcq $0x00, %rax
  30111. movq %rax, 504(%r15)
  30112. addq $0x628, %rsp
  30113. popq %r15
  30114. popq %r14
  30115. popq %r13
  30116. popq %r12
  30117. repz retq
  30118. #ifndef __APPLE__
  30119. .size sp_4096_mul_64,.-sp_4096_mul_64
  30120. #endif /* __APPLE__ */
  30121. /* Add a to a into r. (r = a + a)
  30122. *
  30123. * r A single precision integer.
  30124. * a A single precision integer.
  30125. */
  30126. #ifndef __APPLE__
  30127. .text
  30128. .globl sp_2048_dbl_32
  30129. .type sp_2048_dbl_32,@function
  30130. .align 16
  30131. sp_2048_dbl_32:
  30132. #else
  30133. .section __TEXT,__text
  30134. .globl _sp_2048_dbl_32
  30135. .p2align 4
  30136. _sp_2048_dbl_32:
  30137. #endif /* __APPLE__ */
  30138. movq (%rsi), %rdx
  30139. xorq %rax, %rax
  30140. addq %rdx, %rdx
  30141. movq 8(%rsi), %rcx
  30142. movq %rdx, (%rdi)
  30143. adcq %rcx, %rcx
  30144. movq 16(%rsi), %rdx
  30145. movq %rcx, 8(%rdi)
  30146. adcq %rdx, %rdx
  30147. movq 24(%rsi), %rcx
  30148. movq %rdx, 16(%rdi)
  30149. adcq %rcx, %rcx
  30150. movq 32(%rsi), %rdx
  30151. movq %rcx, 24(%rdi)
  30152. adcq %rdx, %rdx
  30153. movq 40(%rsi), %rcx
  30154. movq %rdx, 32(%rdi)
  30155. adcq %rcx, %rcx
  30156. movq 48(%rsi), %rdx
  30157. movq %rcx, 40(%rdi)
  30158. adcq %rdx, %rdx
  30159. movq 56(%rsi), %rcx
  30160. movq %rdx, 48(%rdi)
  30161. adcq %rcx, %rcx
  30162. movq 64(%rsi), %rdx
  30163. movq %rcx, 56(%rdi)
  30164. adcq %rdx, %rdx
  30165. movq 72(%rsi), %rcx
  30166. movq %rdx, 64(%rdi)
  30167. adcq %rcx, %rcx
  30168. movq 80(%rsi), %rdx
  30169. movq %rcx, 72(%rdi)
  30170. adcq %rdx, %rdx
  30171. movq 88(%rsi), %rcx
  30172. movq %rdx, 80(%rdi)
  30173. adcq %rcx, %rcx
  30174. movq 96(%rsi), %rdx
  30175. movq %rcx, 88(%rdi)
  30176. adcq %rdx, %rdx
  30177. movq 104(%rsi), %rcx
  30178. movq %rdx, 96(%rdi)
  30179. adcq %rcx, %rcx
  30180. movq 112(%rsi), %rdx
  30181. movq %rcx, 104(%rdi)
  30182. adcq %rdx, %rdx
  30183. movq 120(%rsi), %rcx
  30184. movq %rdx, 112(%rdi)
  30185. adcq %rcx, %rcx
  30186. movq 128(%rsi), %rdx
  30187. movq %rcx, 120(%rdi)
  30188. adcq %rdx, %rdx
  30189. movq 136(%rsi), %rcx
  30190. movq %rdx, 128(%rdi)
  30191. adcq %rcx, %rcx
  30192. movq 144(%rsi), %rdx
  30193. movq %rcx, 136(%rdi)
  30194. adcq %rdx, %rdx
  30195. movq 152(%rsi), %rcx
  30196. movq %rdx, 144(%rdi)
  30197. adcq %rcx, %rcx
  30198. movq 160(%rsi), %rdx
  30199. movq %rcx, 152(%rdi)
  30200. adcq %rdx, %rdx
  30201. movq 168(%rsi), %rcx
  30202. movq %rdx, 160(%rdi)
  30203. adcq %rcx, %rcx
  30204. movq 176(%rsi), %rdx
  30205. movq %rcx, 168(%rdi)
  30206. adcq %rdx, %rdx
  30207. movq 184(%rsi), %rcx
  30208. movq %rdx, 176(%rdi)
  30209. adcq %rcx, %rcx
  30210. movq 192(%rsi), %rdx
  30211. movq %rcx, 184(%rdi)
  30212. adcq %rdx, %rdx
  30213. movq 200(%rsi), %rcx
  30214. movq %rdx, 192(%rdi)
  30215. adcq %rcx, %rcx
  30216. movq 208(%rsi), %rdx
  30217. movq %rcx, 200(%rdi)
  30218. adcq %rdx, %rdx
  30219. movq 216(%rsi), %rcx
  30220. movq %rdx, 208(%rdi)
  30221. adcq %rcx, %rcx
  30222. movq 224(%rsi), %rdx
  30223. movq %rcx, 216(%rdi)
  30224. adcq %rdx, %rdx
  30225. movq 232(%rsi), %rcx
  30226. movq %rdx, 224(%rdi)
  30227. adcq %rcx, %rcx
  30228. movq 240(%rsi), %rdx
  30229. movq %rcx, 232(%rdi)
  30230. adcq %rdx, %rdx
  30231. movq 248(%rsi), %rcx
  30232. movq %rdx, 240(%rdi)
  30233. adcq %rcx, %rcx
  30234. movq %rcx, 248(%rdi)
  30235. adcq $0x00, %rax
  30236. repz retq
  30237. #ifndef __APPLE__
  30238. .size sp_2048_dbl_32,.-sp_2048_dbl_32
  30239. #endif /* __APPLE__ */
  30240. /* Square a and put result in r. (r = a * a)
  30241. *
  30242. * r A single precision integer.
  30243. * a A single precision integer.
  30244. */
  30245. #ifndef __APPLE__
  30246. .text
  30247. .globl sp_4096_sqr_64
  30248. .type sp_4096_sqr_64,@function
  30249. .align 16
  30250. sp_4096_sqr_64:
  30251. #else
  30252. .section __TEXT,__text
  30253. .globl _sp_4096_sqr_64
  30254. .p2align 4
  30255. _sp_4096_sqr_64:
  30256. #endif /* __APPLE__ */
  30257. subq $0x518, %rsp
  30258. movq %rdi, 1280(%rsp)
  30259. movq %rsi, 1288(%rsp)
  30260. leaq 1024(%rsp), %r8
  30261. leaq 256(%rsi), %r9
  30262. # Add
  30263. movq (%rsi), %rdx
  30264. xorq %rcx, %rcx
  30265. addq (%r9), %rdx
  30266. movq 8(%rsi), %rax
  30267. movq %rdx, (%r8)
  30268. adcq 8(%r9), %rax
  30269. movq 16(%rsi), %rdx
  30270. movq %rax, 8(%r8)
  30271. adcq 16(%r9), %rdx
  30272. movq 24(%rsi), %rax
  30273. movq %rdx, 16(%r8)
  30274. adcq 24(%r9), %rax
  30275. movq 32(%rsi), %rdx
  30276. movq %rax, 24(%r8)
  30277. adcq 32(%r9), %rdx
  30278. movq 40(%rsi), %rax
  30279. movq %rdx, 32(%r8)
  30280. adcq 40(%r9), %rax
  30281. movq 48(%rsi), %rdx
  30282. movq %rax, 40(%r8)
  30283. adcq 48(%r9), %rdx
  30284. movq 56(%rsi), %rax
  30285. movq %rdx, 48(%r8)
  30286. adcq 56(%r9), %rax
  30287. movq 64(%rsi), %rdx
  30288. movq %rax, 56(%r8)
  30289. adcq 64(%r9), %rdx
  30290. movq 72(%rsi), %rax
  30291. movq %rdx, 64(%r8)
  30292. adcq 72(%r9), %rax
  30293. movq 80(%rsi), %rdx
  30294. movq %rax, 72(%r8)
  30295. adcq 80(%r9), %rdx
  30296. movq 88(%rsi), %rax
  30297. movq %rdx, 80(%r8)
  30298. adcq 88(%r9), %rax
  30299. movq 96(%rsi), %rdx
  30300. movq %rax, 88(%r8)
  30301. adcq 96(%r9), %rdx
  30302. movq 104(%rsi), %rax
  30303. movq %rdx, 96(%r8)
  30304. adcq 104(%r9), %rax
  30305. movq 112(%rsi), %rdx
  30306. movq %rax, 104(%r8)
  30307. adcq 112(%r9), %rdx
  30308. movq 120(%rsi), %rax
  30309. movq %rdx, 112(%r8)
  30310. adcq 120(%r9), %rax
  30311. movq 128(%rsi), %rdx
  30312. movq %rax, 120(%r8)
  30313. adcq 128(%r9), %rdx
  30314. movq 136(%rsi), %rax
  30315. movq %rdx, 128(%r8)
  30316. adcq 136(%r9), %rax
  30317. movq 144(%rsi), %rdx
  30318. movq %rax, 136(%r8)
  30319. adcq 144(%r9), %rdx
  30320. movq 152(%rsi), %rax
  30321. movq %rdx, 144(%r8)
  30322. adcq 152(%r9), %rax
  30323. movq 160(%rsi), %rdx
  30324. movq %rax, 152(%r8)
  30325. adcq 160(%r9), %rdx
  30326. movq 168(%rsi), %rax
  30327. movq %rdx, 160(%r8)
  30328. adcq 168(%r9), %rax
  30329. movq 176(%rsi), %rdx
  30330. movq %rax, 168(%r8)
  30331. adcq 176(%r9), %rdx
  30332. movq 184(%rsi), %rax
  30333. movq %rdx, 176(%r8)
  30334. adcq 184(%r9), %rax
  30335. movq 192(%rsi), %rdx
  30336. movq %rax, 184(%r8)
  30337. adcq 192(%r9), %rdx
  30338. movq 200(%rsi), %rax
  30339. movq %rdx, 192(%r8)
  30340. adcq 200(%r9), %rax
  30341. movq 208(%rsi), %rdx
  30342. movq %rax, 200(%r8)
  30343. adcq 208(%r9), %rdx
  30344. movq 216(%rsi), %rax
  30345. movq %rdx, 208(%r8)
  30346. adcq 216(%r9), %rax
  30347. movq 224(%rsi), %rdx
  30348. movq %rax, 216(%r8)
  30349. adcq 224(%r9), %rdx
  30350. movq 232(%rsi), %rax
  30351. movq %rdx, 224(%r8)
  30352. adcq 232(%r9), %rax
  30353. movq 240(%rsi), %rdx
  30354. movq %rax, 232(%r8)
  30355. adcq 240(%r9), %rdx
  30356. movq 248(%rsi), %rax
  30357. movq %rdx, 240(%r8)
  30358. adcq 248(%r9), %rax
  30359. movq %rax, 248(%r8)
  30360. adcq $0x00, %rcx
  30361. movq %rcx, 1296(%rsp)
  30362. movq %r8, %rsi
  30363. movq %rsp, %rdi
  30364. #ifndef __APPLE__
  30365. callq sp_2048_sqr_32@plt
  30366. #else
  30367. callq _sp_2048_sqr_32
  30368. #endif /* __APPLE__ */
  30369. movq 1288(%rsp), %rsi
  30370. leaq 512(%rsp), %rdi
  30371. addq $0x100, %rsi
  30372. #ifndef __APPLE__
  30373. callq sp_2048_sqr_32@plt
  30374. #else
  30375. callq _sp_2048_sqr_32
  30376. #endif /* __APPLE__ */
  30377. movq 1288(%rsp), %rsi
  30378. movq 1280(%rsp), %rdi
  30379. #ifndef __APPLE__
  30380. callq sp_2048_sqr_32@plt
  30381. #else
  30382. callq _sp_2048_sqr_32
  30383. #endif /* __APPLE__ */
  30384. movq 1296(%rsp), %r10
  30385. leaq 1024(%rsp), %r8
  30386. movq %r10, %rcx
  30387. negq %r10
  30388. movq (%r8), %rdx
  30389. movq 8(%r8), %rax
  30390. andq %r10, %rdx
  30391. andq %r10, %rax
  30392. movq %rdx, 512(%rdi)
  30393. movq %rax, 520(%rdi)
  30394. movq 16(%r8), %rdx
  30395. movq 24(%r8), %rax
  30396. andq %r10, %rdx
  30397. andq %r10, %rax
  30398. movq %rdx, 528(%rdi)
  30399. movq %rax, 536(%rdi)
  30400. movq 32(%r8), %rdx
  30401. movq 40(%r8), %rax
  30402. andq %r10, %rdx
  30403. andq %r10, %rax
  30404. movq %rdx, 544(%rdi)
  30405. movq %rax, 552(%rdi)
  30406. movq 48(%r8), %rdx
  30407. movq 56(%r8), %rax
  30408. andq %r10, %rdx
  30409. andq %r10, %rax
  30410. movq %rdx, 560(%rdi)
  30411. movq %rax, 568(%rdi)
  30412. movq 64(%r8), %rdx
  30413. movq 72(%r8), %rax
  30414. andq %r10, %rdx
  30415. andq %r10, %rax
  30416. movq %rdx, 576(%rdi)
  30417. movq %rax, 584(%rdi)
  30418. movq 80(%r8), %rdx
  30419. movq 88(%r8), %rax
  30420. andq %r10, %rdx
  30421. andq %r10, %rax
  30422. movq %rdx, 592(%rdi)
  30423. movq %rax, 600(%rdi)
  30424. movq 96(%r8), %rdx
  30425. movq 104(%r8), %rax
  30426. andq %r10, %rdx
  30427. andq %r10, %rax
  30428. movq %rdx, 608(%rdi)
  30429. movq %rax, 616(%rdi)
  30430. movq 112(%r8), %rdx
  30431. movq 120(%r8), %rax
  30432. andq %r10, %rdx
  30433. andq %r10, %rax
  30434. movq %rdx, 624(%rdi)
  30435. movq %rax, 632(%rdi)
  30436. movq 128(%r8), %rdx
  30437. movq 136(%r8), %rax
  30438. andq %r10, %rdx
  30439. andq %r10, %rax
  30440. movq %rdx, 640(%rdi)
  30441. movq %rax, 648(%rdi)
  30442. movq 144(%r8), %rdx
  30443. movq 152(%r8), %rax
  30444. andq %r10, %rdx
  30445. andq %r10, %rax
  30446. movq %rdx, 656(%rdi)
  30447. movq %rax, 664(%rdi)
  30448. movq 160(%r8), %rdx
  30449. movq 168(%r8), %rax
  30450. andq %r10, %rdx
  30451. andq %r10, %rax
  30452. movq %rdx, 672(%rdi)
  30453. movq %rax, 680(%rdi)
  30454. movq 176(%r8), %rdx
  30455. movq 184(%r8), %rax
  30456. andq %r10, %rdx
  30457. andq %r10, %rax
  30458. movq %rdx, 688(%rdi)
  30459. movq %rax, 696(%rdi)
  30460. movq 192(%r8), %rdx
  30461. movq 200(%r8), %rax
  30462. andq %r10, %rdx
  30463. andq %r10, %rax
  30464. movq %rdx, 704(%rdi)
  30465. movq %rax, 712(%rdi)
  30466. movq 208(%r8), %rdx
  30467. movq 216(%r8), %rax
  30468. andq %r10, %rdx
  30469. andq %r10, %rax
  30470. movq %rdx, 720(%rdi)
  30471. movq %rax, 728(%rdi)
  30472. movq 224(%r8), %rdx
  30473. movq 232(%r8), %rax
  30474. andq %r10, %rdx
  30475. andq %r10, %rax
  30476. movq %rdx, 736(%rdi)
  30477. movq %rax, 744(%rdi)
  30478. movq 240(%r8), %rdx
  30479. movq 248(%r8), %rax
  30480. andq %r10, %rdx
  30481. andq %r10, %rax
  30482. movq %rdx, 752(%rdi)
  30483. movq %rax, 760(%rdi)
  30484. movq 512(%rdi), %rdx
  30485. addq %rdx, %rdx
  30486. movq 520(%rdi), %rax
  30487. movq %rdx, 512(%rdi)
  30488. adcq %rax, %rax
  30489. movq 528(%rdi), %rdx
  30490. movq %rax, 520(%rdi)
  30491. adcq %rdx, %rdx
  30492. movq 536(%rdi), %rax
  30493. movq %rdx, 528(%rdi)
  30494. adcq %rax, %rax
  30495. movq 544(%rdi), %rdx
  30496. movq %rax, 536(%rdi)
  30497. adcq %rdx, %rdx
  30498. movq 552(%rdi), %rax
  30499. movq %rdx, 544(%rdi)
  30500. adcq %rax, %rax
  30501. movq 560(%rdi), %rdx
  30502. movq %rax, 552(%rdi)
  30503. adcq %rdx, %rdx
  30504. movq 568(%rdi), %rax
  30505. movq %rdx, 560(%rdi)
  30506. adcq %rax, %rax
  30507. movq 576(%rdi), %rdx
  30508. movq %rax, 568(%rdi)
  30509. adcq %rdx, %rdx
  30510. movq 584(%rdi), %rax
  30511. movq %rdx, 576(%rdi)
  30512. adcq %rax, %rax
  30513. movq 592(%rdi), %rdx
  30514. movq %rax, 584(%rdi)
  30515. adcq %rdx, %rdx
  30516. movq 600(%rdi), %rax
  30517. movq %rdx, 592(%rdi)
  30518. adcq %rax, %rax
  30519. movq 608(%rdi), %rdx
  30520. movq %rax, 600(%rdi)
  30521. adcq %rdx, %rdx
  30522. movq 616(%rdi), %rax
  30523. movq %rdx, 608(%rdi)
  30524. adcq %rax, %rax
  30525. movq 624(%rdi), %rdx
  30526. movq %rax, 616(%rdi)
  30527. adcq %rdx, %rdx
  30528. movq 632(%rdi), %rax
  30529. movq %rdx, 624(%rdi)
  30530. adcq %rax, %rax
  30531. movq 640(%rdi), %rdx
  30532. movq %rax, 632(%rdi)
  30533. adcq %rdx, %rdx
  30534. movq 648(%rdi), %rax
  30535. movq %rdx, 640(%rdi)
  30536. adcq %rax, %rax
  30537. movq 656(%rdi), %rdx
  30538. movq %rax, 648(%rdi)
  30539. adcq %rdx, %rdx
  30540. movq 664(%rdi), %rax
  30541. movq %rdx, 656(%rdi)
  30542. adcq %rax, %rax
  30543. movq 672(%rdi), %rdx
  30544. movq %rax, 664(%rdi)
  30545. adcq %rdx, %rdx
  30546. movq 680(%rdi), %rax
  30547. movq %rdx, 672(%rdi)
  30548. adcq %rax, %rax
  30549. movq 688(%rdi), %rdx
  30550. movq %rax, 680(%rdi)
  30551. adcq %rdx, %rdx
  30552. movq 696(%rdi), %rax
  30553. movq %rdx, 688(%rdi)
  30554. adcq %rax, %rax
  30555. movq 704(%rdi), %rdx
  30556. movq %rax, 696(%rdi)
  30557. adcq %rdx, %rdx
  30558. movq 712(%rdi), %rax
  30559. movq %rdx, 704(%rdi)
  30560. adcq %rax, %rax
  30561. movq 720(%rdi), %rdx
  30562. movq %rax, 712(%rdi)
  30563. adcq %rdx, %rdx
  30564. movq 728(%rdi), %rax
  30565. movq %rdx, 720(%rdi)
  30566. adcq %rax, %rax
  30567. movq 736(%rdi), %rdx
  30568. movq %rax, 728(%rdi)
  30569. adcq %rdx, %rdx
  30570. movq 744(%rdi), %rax
  30571. movq %rdx, 736(%rdi)
  30572. adcq %rax, %rax
  30573. movq 752(%rdi), %rdx
  30574. movq %rax, 744(%rdi)
  30575. adcq %rdx, %rdx
  30576. movq 760(%rdi), %rax
  30577. movq %rdx, 752(%rdi)
  30578. adcq %rax, %rax
  30579. movq %rax, 760(%rdi)
  30580. adcq $0x00, %rcx
  30581. leaq 512(%rsp), %rsi
  30582. movq %rsp, %r8
  30583. movq (%r8), %rdx
  30584. subq (%rsi), %rdx
  30585. movq 8(%r8), %rax
  30586. movq %rdx, (%r8)
  30587. sbbq 8(%rsi), %rax
  30588. movq 16(%r8), %rdx
  30589. movq %rax, 8(%r8)
  30590. sbbq 16(%rsi), %rdx
  30591. movq 24(%r8), %rax
  30592. movq %rdx, 16(%r8)
  30593. sbbq 24(%rsi), %rax
  30594. movq 32(%r8), %rdx
  30595. movq %rax, 24(%r8)
  30596. sbbq 32(%rsi), %rdx
  30597. movq 40(%r8), %rax
  30598. movq %rdx, 32(%r8)
  30599. sbbq 40(%rsi), %rax
  30600. movq 48(%r8), %rdx
  30601. movq %rax, 40(%r8)
  30602. sbbq 48(%rsi), %rdx
  30603. movq 56(%r8), %rax
  30604. movq %rdx, 48(%r8)
  30605. sbbq 56(%rsi), %rax
  30606. movq 64(%r8), %rdx
  30607. movq %rax, 56(%r8)
  30608. sbbq 64(%rsi), %rdx
  30609. movq 72(%r8), %rax
  30610. movq %rdx, 64(%r8)
  30611. sbbq 72(%rsi), %rax
  30612. movq 80(%r8), %rdx
  30613. movq %rax, 72(%r8)
  30614. sbbq 80(%rsi), %rdx
  30615. movq 88(%r8), %rax
  30616. movq %rdx, 80(%r8)
  30617. sbbq 88(%rsi), %rax
  30618. movq 96(%r8), %rdx
  30619. movq %rax, 88(%r8)
  30620. sbbq 96(%rsi), %rdx
  30621. movq 104(%r8), %rax
  30622. movq %rdx, 96(%r8)
  30623. sbbq 104(%rsi), %rax
  30624. movq 112(%r8), %rdx
  30625. movq %rax, 104(%r8)
  30626. sbbq 112(%rsi), %rdx
  30627. movq 120(%r8), %rax
  30628. movq %rdx, 112(%r8)
  30629. sbbq 120(%rsi), %rax
  30630. movq 128(%r8), %rdx
  30631. movq %rax, 120(%r8)
  30632. sbbq 128(%rsi), %rdx
  30633. movq 136(%r8), %rax
  30634. movq %rdx, 128(%r8)
  30635. sbbq 136(%rsi), %rax
  30636. movq 144(%r8), %rdx
  30637. movq %rax, 136(%r8)
  30638. sbbq 144(%rsi), %rdx
  30639. movq 152(%r8), %rax
  30640. movq %rdx, 144(%r8)
  30641. sbbq 152(%rsi), %rax
  30642. movq 160(%r8), %rdx
  30643. movq %rax, 152(%r8)
  30644. sbbq 160(%rsi), %rdx
  30645. movq 168(%r8), %rax
  30646. movq %rdx, 160(%r8)
  30647. sbbq 168(%rsi), %rax
  30648. movq 176(%r8), %rdx
  30649. movq %rax, 168(%r8)
  30650. sbbq 176(%rsi), %rdx
  30651. movq 184(%r8), %rax
  30652. movq %rdx, 176(%r8)
  30653. sbbq 184(%rsi), %rax
  30654. movq 192(%r8), %rdx
  30655. movq %rax, 184(%r8)
  30656. sbbq 192(%rsi), %rdx
  30657. movq 200(%r8), %rax
  30658. movq %rdx, 192(%r8)
  30659. sbbq 200(%rsi), %rax
  30660. movq 208(%r8), %rdx
  30661. movq %rax, 200(%r8)
  30662. sbbq 208(%rsi), %rdx
  30663. movq 216(%r8), %rax
  30664. movq %rdx, 208(%r8)
  30665. sbbq 216(%rsi), %rax
  30666. movq 224(%r8), %rdx
  30667. movq %rax, 216(%r8)
  30668. sbbq 224(%rsi), %rdx
  30669. movq 232(%r8), %rax
  30670. movq %rdx, 224(%r8)
  30671. sbbq 232(%rsi), %rax
  30672. movq 240(%r8), %rdx
  30673. movq %rax, 232(%r8)
  30674. sbbq 240(%rsi), %rdx
  30675. movq 248(%r8), %rax
  30676. movq %rdx, 240(%r8)
  30677. sbbq 248(%rsi), %rax
  30678. movq 256(%r8), %rdx
  30679. movq %rax, 248(%r8)
  30680. sbbq 256(%rsi), %rdx
  30681. movq 264(%r8), %rax
  30682. movq %rdx, 256(%r8)
  30683. sbbq 264(%rsi), %rax
  30684. movq 272(%r8), %rdx
  30685. movq %rax, 264(%r8)
  30686. sbbq 272(%rsi), %rdx
  30687. movq 280(%r8), %rax
  30688. movq %rdx, 272(%r8)
  30689. sbbq 280(%rsi), %rax
  30690. movq 288(%r8), %rdx
  30691. movq %rax, 280(%r8)
  30692. sbbq 288(%rsi), %rdx
  30693. movq 296(%r8), %rax
  30694. movq %rdx, 288(%r8)
  30695. sbbq 296(%rsi), %rax
  30696. movq 304(%r8), %rdx
  30697. movq %rax, 296(%r8)
  30698. sbbq 304(%rsi), %rdx
  30699. movq 312(%r8), %rax
  30700. movq %rdx, 304(%r8)
  30701. sbbq 312(%rsi), %rax
  30702. movq 320(%r8), %rdx
  30703. movq %rax, 312(%r8)
  30704. sbbq 320(%rsi), %rdx
  30705. movq 328(%r8), %rax
  30706. movq %rdx, 320(%r8)
  30707. sbbq 328(%rsi), %rax
  30708. movq 336(%r8), %rdx
  30709. movq %rax, 328(%r8)
  30710. sbbq 336(%rsi), %rdx
  30711. movq 344(%r8), %rax
  30712. movq %rdx, 336(%r8)
  30713. sbbq 344(%rsi), %rax
  30714. movq 352(%r8), %rdx
  30715. movq %rax, 344(%r8)
  30716. sbbq 352(%rsi), %rdx
  30717. movq 360(%r8), %rax
  30718. movq %rdx, 352(%r8)
  30719. sbbq 360(%rsi), %rax
  30720. movq 368(%r8), %rdx
  30721. movq %rax, 360(%r8)
  30722. sbbq 368(%rsi), %rdx
  30723. movq 376(%r8), %rax
  30724. movq %rdx, 368(%r8)
  30725. sbbq 376(%rsi), %rax
  30726. movq 384(%r8), %rdx
  30727. movq %rax, 376(%r8)
  30728. sbbq 384(%rsi), %rdx
  30729. movq 392(%r8), %rax
  30730. movq %rdx, 384(%r8)
  30731. sbbq 392(%rsi), %rax
  30732. movq 400(%r8), %rdx
  30733. movq %rax, 392(%r8)
  30734. sbbq 400(%rsi), %rdx
  30735. movq 408(%r8), %rax
  30736. movq %rdx, 400(%r8)
  30737. sbbq 408(%rsi), %rax
  30738. movq 416(%r8), %rdx
  30739. movq %rax, 408(%r8)
  30740. sbbq 416(%rsi), %rdx
  30741. movq 424(%r8), %rax
  30742. movq %rdx, 416(%r8)
  30743. sbbq 424(%rsi), %rax
  30744. movq 432(%r8), %rdx
  30745. movq %rax, 424(%r8)
  30746. sbbq 432(%rsi), %rdx
  30747. movq 440(%r8), %rax
  30748. movq %rdx, 432(%r8)
  30749. sbbq 440(%rsi), %rax
  30750. movq 448(%r8), %rdx
  30751. movq %rax, 440(%r8)
  30752. sbbq 448(%rsi), %rdx
  30753. movq 456(%r8), %rax
  30754. movq %rdx, 448(%r8)
  30755. sbbq 456(%rsi), %rax
  30756. movq 464(%r8), %rdx
  30757. movq %rax, 456(%r8)
  30758. sbbq 464(%rsi), %rdx
  30759. movq 472(%r8), %rax
  30760. movq %rdx, 464(%r8)
  30761. sbbq 472(%rsi), %rax
  30762. movq 480(%r8), %rdx
  30763. movq %rax, 472(%r8)
  30764. sbbq 480(%rsi), %rdx
  30765. movq 488(%r8), %rax
  30766. movq %rdx, 480(%r8)
  30767. sbbq 488(%rsi), %rax
  30768. movq 496(%r8), %rdx
  30769. movq %rax, 488(%r8)
  30770. sbbq 496(%rsi), %rdx
  30771. movq 504(%r8), %rax
  30772. movq %rdx, 496(%r8)
  30773. sbbq 504(%rsi), %rax
  30774. movq %rax, 504(%r8)
  30775. sbbq $0x00, %rcx
  30776. movq (%r8), %rdx
  30777. subq (%rdi), %rdx
  30778. movq 8(%r8), %rax
  30779. movq %rdx, (%r8)
  30780. sbbq 8(%rdi), %rax
  30781. movq 16(%r8), %rdx
  30782. movq %rax, 8(%r8)
  30783. sbbq 16(%rdi), %rdx
  30784. movq 24(%r8), %rax
  30785. movq %rdx, 16(%r8)
  30786. sbbq 24(%rdi), %rax
  30787. movq 32(%r8), %rdx
  30788. movq %rax, 24(%r8)
  30789. sbbq 32(%rdi), %rdx
  30790. movq 40(%r8), %rax
  30791. movq %rdx, 32(%r8)
  30792. sbbq 40(%rdi), %rax
  30793. movq 48(%r8), %rdx
  30794. movq %rax, 40(%r8)
  30795. sbbq 48(%rdi), %rdx
  30796. movq 56(%r8), %rax
  30797. movq %rdx, 48(%r8)
  30798. sbbq 56(%rdi), %rax
  30799. movq 64(%r8), %rdx
  30800. movq %rax, 56(%r8)
  30801. sbbq 64(%rdi), %rdx
  30802. movq 72(%r8), %rax
  30803. movq %rdx, 64(%r8)
  30804. sbbq 72(%rdi), %rax
  30805. movq 80(%r8), %rdx
  30806. movq %rax, 72(%r8)
  30807. sbbq 80(%rdi), %rdx
  30808. movq 88(%r8), %rax
  30809. movq %rdx, 80(%r8)
  30810. sbbq 88(%rdi), %rax
  30811. movq 96(%r8), %rdx
  30812. movq %rax, 88(%r8)
  30813. sbbq 96(%rdi), %rdx
  30814. movq 104(%r8), %rax
  30815. movq %rdx, 96(%r8)
  30816. sbbq 104(%rdi), %rax
  30817. movq 112(%r8), %rdx
  30818. movq %rax, 104(%r8)
  30819. sbbq 112(%rdi), %rdx
  30820. movq 120(%r8), %rax
  30821. movq %rdx, 112(%r8)
  30822. sbbq 120(%rdi), %rax
  30823. movq 128(%r8), %rdx
  30824. movq %rax, 120(%r8)
  30825. sbbq 128(%rdi), %rdx
  30826. movq 136(%r8), %rax
  30827. movq %rdx, 128(%r8)
  30828. sbbq 136(%rdi), %rax
  30829. movq 144(%r8), %rdx
  30830. movq %rax, 136(%r8)
  30831. sbbq 144(%rdi), %rdx
  30832. movq 152(%r8), %rax
  30833. movq %rdx, 144(%r8)
  30834. sbbq 152(%rdi), %rax
  30835. movq 160(%r8), %rdx
  30836. movq %rax, 152(%r8)
  30837. sbbq 160(%rdi), %rdx
  30838. movq 168(%r8), %rax
  30839. movq %rdx, 160(%r8)
  30840. sbbq 168(%rdi), %rax
  30841. movq 176(%r8), %rdx
  30842. movq %rax, 168(%r8)
  30843. sbbq 176(%rdi), %rdx
  30844. movq 184(%r8), %rax
  30845. movq %rdx, 176(%r8)
  30846. sbbq 184(%rdi), %rax
  30847. movq 192(%r8), %rdx
  30848. movq %rax, 184(%r8)
  30849. sbbq 192(%rdi), %rdx
  30850. movq 200(%r8), %rax
  30851. movq %rdx, 192(%r8)
  30852. sbbq 200(%rdi), %rax
  30853. movq 208(%r8), %rdx
  30854. movq %rax, 200(%r8)
  30855. sbbq 208(%rdi), %rdx
  30856. movq 216(%r8), %rax
  30857. movq %rdx, 208(%r8)
  30858. sbbq 216(%rdi), %rax
  30859. movq 224(%r8), %rdx
  30860. movq %rax, 216(%r8)
  30861. sbbq 224(%rdi), %rdx
  30862. movq 232(%r8), %rax
  30863. movq %rdx, 224(%r8)
  30864. sbbq 232(%rdi), %rax
  30865. movq 240(%r8), %rdx
  30866. movq %rax, 232(%r8)
  30867. sbbq 240(%rdi), %rdx
  30868. movq 248(%r8), %rax
  30869. movq %rdx, 240(%r8)
  30870. sbbq 248(%rdi), %rax
  30871. movq 256(%r8), %rdx
  30872. movq %rax, 248(%r8)
  30873. sbbq 256(%rdi), %rdx
  30874. movq 264(%r8), %rax
  30875. movq %rdx, 256(%r8)
  30876. sbbq 264(%rdi), %rax
  30877. movq 272(%r8), %rdx
  30878. movq %rax, 264(%r8)
  30879. sbbq 272(%rdi), %rdx
  30880. movq 280(%r8), %rax
  30881. movq %rdx, 272(%r8)
  30882. sbbq 280(%rdi), %rax
  30883. movq 288(%r8), %rdx
  30884. movq %rax, 280(%r8)
  30885. sbbq 288(%rdi), %rdx
  30886. movq 296(%r8), %rax
  30887. movq %rdx, 288(%r8)
  30888. sbbq 296(%rdi), %rax
  30889. movq 304(%r8), %rdx
  30890. movq %rax, 296(%r8)
  30891. sbbq 304(%rdi), %rdx
  30892. movq 312(%r8), %rax
  30893. movq %rdx, 304(%r8)
  30894. sbbq 312(%rdi), %rax
  30895. movq 320(%r8), %rdx
  30896. movq %rax, 312(%r8)
  30897. sbbq 320(%rdi), %rdx
  30898. movq 328(%r8), %rax
  30899. movq %rdx, 320(%r8)
  30900. sbbq 328(%rdi), %rax
  30901. movq 336(%r8), %rdx
  30902. movq %rax, 328(%r8)
  30903. sbbq 336(%rdi), %rdx
  30904. movq 344(%r8), %rax
  30905. movq %rdx, 336(%r8)
  30906. sbbq 344(%rdi), %rax
  30907. movq 352(%r8), %rdx
  30908. movq %rax, 344(%r8)
  30909. sbbq 352(%rdi), %rdx
  30910. movq 360(%r8), %rax
  30911. movq %rdx, 352(%r8)
  30912. sbbq 360(%rdi), %rax
  30913. movq 368(%r8), %rdx
  30914. movq %rax, 360(%r8)
  30915. sbbq 368(%rdi), %rdx
  30916. movq 376(%r8), %rax
  30917. movq %rdx, 368(%r8)
  30918. sbbq 376(%rdi), %rax
  30919. movq 384(%r8), %rdx
  30920. movq %rax, 376(%r8)
  30921. sbbq 384(%rdi), %rdx
  30922. movq 392(%r8), %rax
  30923. movq %rdx, 384(%r8)
  30924. sbbq 392(%rdi), %rax
  30925. movq 400(%r8), %rdx
  30926. movq %rax, 392(%r8)
  30927. sbbq 400(%rdi), %rdx
  30928. movq 408(%r8), %rax
  30929. movq %rdx, 400(%r8)
  30930. sbbq 408(%rdi), %rax
  30931. movq 416(%r8), %rdx
  30932. movq %rax, 408(%r8)
  30933. sbbq 416(%rdi), %rdx
  30934. movq 424(%r8), %rax
  30935. movq %rdx, 416(%r8)
  30936. sbbq 424(%rdi), %rax
  30937. movq 432(%r8), %rdx
  30938. movq %rax, 424(%r8)
  30939. sbbq 432(%rdi), %rdx
  30940. movq 440(%r8), %rax
  30941. movq %rdx, 432(%r8)
  30942. sbbq 440(%rdi), %rax
  30943. movq 448(%r8), %rdx
  30944. movq %rax, 440(%r8)
  30945. sbbq 448(%rdi), %rdx
  30946. movq 456(%r8), %rax
  30947. movq %rdx, 448(%r8)
  30948. sbbq 456(%rdi), %rax
  30949. movq 464(%r8), %rdx
  30950. movq %rax, 456(%r8)
  30951. sbbq 464(%rdi), %rdx
  30952. movq 472(%r8), %rax
  30953. movq %rdx, 464(%r8)
  30954. sbbq 472(%rdi), %rax
  30955. movq 480(%r8), %rdx
  30956. movq %rax, 472(%r8)
  30957. sbbq 480(%rdi), %rdx
  30958. movq 488(%r8), %rax
  30959. movq %rdx, 480(%r8)
  30960. sbbq 488(%rdi), %rax
  30961. movq 496(%r8), %rdx
  30962. movq %rax, 488(%r8)
  30963. sbbq 496(%rdi), %rdx
  30964. movq 504(%r8), %rax
  30965. movq %rdx, 496(%r8)
  30966. sbbq 504(%rdi), %rax
  30967. movq %rax, 504(%r8)
  30968. sbbq $0x00, %rcx
  30969. # Add in place
  30970. movq 256(%rdi), %rdx
  30971. addq (%r8), %rdx
  30972. movq 264(%rdi), %rax
  30973. movq %rdx, 256(%rdi)
  30974. adcq 8(%r8), %rax
  30975. movq 272(%rdi), %rdx
  30976. movq %rax, 264(%rdi)
  30977. adcq 16(%r8), %rdx
  30978. movq 280(%rdi), %rax
  30979. movq %rdx, 272(%rdi)
  30980. adcq 24(%r8), %rax
  30981. movq 288(%rdi), %rdx
  30982. movq %rax, 280(%rdi)
  30983. adcq 32(%r8), %rdx
  30984. movq 296(%rdi), %rax
  30985. movq %rdx, 288(%rdi)
  30986. adcq 40(%r8), %rax
  30987. movq 304(%rdi), %rdx
  30988. movq %rax, 296(%rdi)
  30989. adcq 48(%r8), %rdx
  30990. movq 312(%rdi), %rax
  30991. movq %rdx, 304(%rdi)
  30992. adcq 56(%r8), %rax
  30993. movq 320(%rdi), %rdx
  30994. movq %rax, 312(%rdi)
  30995. adcq 64(%r8), %rdx
  30996. movq 328(%rdi), %rax
  30997. movq %rdx, 320(%rdi)
  30998. adcq 72(%r8), %rax
  30999. movq 336(%rdi), %rdx
  31000. movq %rax, 328(%rdi)
  31001. adcq 80(%r8), %rdx
  31002. movq 344(%rdi), %rax
  31003. movq %rdx, 336(%rdi)
  31004. adcq 88(%r8), %rax
  31005. movq 352(%rdi), %rdx
  31006. movq %rax, 344(%rdi)
  31007. adcq 96(%r8), %rdx
  31008. movq 360(%rdi), %rax
  31009. movq %rdx, 352(%rdi)
  31010. adcq 104(%r8), %rax
  31011. movq 368(%rdi), %rdx
  31012. movq %rax, 360(%rdi)
  31013. adcq 112(%r8), %rdx
  31014. movq 376(%rdi), %rax
  31015. movq %rdx, 368(%rdi)
  31016. adcq 120(%r8), %rax
  31017. movq 384(%rdi), %rdx
  31018. movq %rax, 376(%rdi)
  31019. adcq 128(%r8), %rdx
  31020. movq 392(%rdi), %rax
  31021. movq %rdx, 384(%rdi)
  31022. adcq 136(%r8), %rax
  31023. movq 400(%rdi), %rdx
  31024. movq %rax, 392(%rdi)
  31025. adcq 144(%r8), %rdx
  31026. movq 408(%rdi), %rax
  31027. movq %rdx, 400(%rdi)
  31028. adcq 152(%r8), %rax
  31029. movq 416(%rdi), %rdx
  31030. movq %rax, 408(%rdi)
  31031. adcq 160(%r8), %rdx
  31032. movq 424(%rdi), %rax
  31033. movq %rdx, 416(%rdi)
  31034. adcq 168(%r8), %rax
  31035. movq 432(%rdi), %rdx
  31036. movq %rax, 424(%rdi)
  31037. adcq 176(%r8), %rdx
  31038. movq 440(%rdi), %rax
  31039. movq %rdx, 432(%rdi)
  31040. adcq 184(%r8), %rax
  31041. movq 448(%rdi), %rdx
  31042. movq %rax, 440(%rdi)
  31043. adcq 192(%r8), %rdx
  31044. movq 456(%rdi), %rax
  31045. movq %rdx, 448(%rdi)
  31046. adcq 200(%r8), %rax
  31047. movq 464(%rdi), %rdx
  31048. movq %rax, 456(%rdi)
  31049. adcq 208(%r8), %rdx
  31050. movq 472(%rdi), %rax
  31051. movq %rdx, 464(%rdi)
  31052. adcq 216(%r8), %rax
  31053. movq 480(%rdi), %rdx
  31054. movq %rax, 472(%rdi)
  31055. adcq 224(%r8), %rdx
  31056. movq 488(%rdi), %rax
  31057. movq %rdx, 480(%rdi)
  31058. adcq 232(%r8), %rax
  31059. movq 496(%rdi), %rdx
  31060. movq %rax, 488(%rdi)
  31061. adcq 240(%r8), %rdx
  31062. movq 504(%rdi), %rax
  31063. movq %rdx, 496(%rdi)
  31064. adcq 248(%r8), %rax
  31065. movq 512(%rdi), %rdx
  31066. movq %rax, 504(%rdi)
  31067. adcq 256(%r8), %rdx
  31068. movq 520(%rdi), %rax
  31069. movq %rdx, 512(%rdi)
  31070. adcq 264(%r8), %rax
  31071. movq 528(%rdi), %rdx
  31072. movq %rax, 520(%rdi)
  31073. adcq 272(%r8), %rdx
  31074. movq 536(%rdi), %rax
  31075. movq %rdx, 528(%rdi)
  31076. adcq 280(%r8), %rax
  31077. movq 544(%rdi), %rdx
  31078. movq %rax, 536(%rdi)
  31079. adcq 288(%r8), %rdx
  31080. movq 552(%rdi), %rax
  31081. movq %rdx, 544(%rdi)
  31082. adcq 296(%r8), %rax
  31083. movq 560(%rdi), %rdx
  31084. movq %rax, 552(%rdi)
  31085. adcq 304(%r8), %rdx
  31086. movq 568(%rdi), %rax
  31087. movq %rdx, 560(%rdi)
  31088. adcq 312(%r8), %rax
  31089. movq 576(%rdi), %rdx
  31090. movq %rax, 568(%rdi)
  31091. adcq 320(%r8), %rdx
  31092. movq 584(%rdi), %rax
  31093. movq %rdx, 576(%rdi)
  31094. adcq 328(%r8), %rax
  31095. movq 592(%rdi), %rdx
  31096. movq %rax, 584(%rdi)
  31097. adcq 336(%r8), %rdx
  31098. movq 600(%rdi), %rax
  31099. movq %rdx, 592(%rdi)
  31100. adcq 344(%r8), %rax
  31101. movq 608(%rdi), %rdx
  31102. movq %rax, 600(%rdi)
  31103. adcq 352(%r8), %rdx
  31104. movq 616(%rdi), %rax
  31105. movq %rdx, 608(%rdi)
  31106. adcq 360(%r8), %rax
  31107. movq 624(%rdi), %rdx
  31108. movq %rax, 616(%rdi)
  31109. adcq 368(%r8), %rdx
  31110. movq 632(%rdi), %rax
  31111. movq %rdx, 624(%rdi)
  31112. adcq 376(%r8), %rax
  31113. movq 640(%rdi), %rdx
  31114. movq %rax, 632(%rdi)
  31115. adcq 384(%r8), %rdx
  31116. movq 648(%rdi), %rax
  31117. movq %rdx, 640(%rdi)
  31118. adcq 392(%r8), %rax
  31119. movq 656(%rdi), %rdx
  31120. movq %rax, 648(%rdi)
  31121. adcq 400(%r8), %rdx
  31122. movq 664(%rdi), %rax
  31123. movq %rdx, 656(%rdi)
  31124. adcq 408(%r8), %rax
  31125. movq 672(%rdi), %rdx
  31126. movq %rax, 664(%rdi)
  31127. adcq 416(%r8), %rdx
  31128. movq 680(%rdi), %rax
  31129. movq %rdx, 672(%rdi)
  31130. adcq 424(%r8), %rax
  31131. movq 688(%rdi), %rdx
  31132. movq %rax, 680(%rdi)
  31133. adcq 432(%r8), %rdx
  31134. movq 696(%rdi), %rax
  31135. movq %rdx, 688(%rdi)
  31136. adcq 440(%r8), %rax
  31137. movq 704(%rdi), %rdx
  31138. movq %rax, 696(%rdi)
  31139. adcq 448(%r8), %rdx
  31140. movq 712(%rdi), %rax
  31141. movq %rdx, 704(%rdi)
  31142. adcq 456(%r8), %rax
  31143. movq 720(%rdi), %rdx
  31144. movq %rax, 712(%rdi)
  31145. adcq 464(%r8), %rdx
  31146. movq 728(%rdi), %rax
  31147. movq %rdx, 720(%rdi)
  31148. adcq 472(%r8), %rax
  31149. movq 736(%rdi), %rdx
  31150. movq %rax, 728(%rdi)
  31151. adcq 480(%r8), %rdx
  31152. movq 744(%rdi), %rax
  31153. movq %rdx, 736(%rdi)
  31154. adcq 488(%r8), %rax
  31155. movq 752(%rdi), %rdx
  31156. movq %rax, 744(%rdi)
  31157. adcq 496(%r8), %rdx
  31158. movq 760(%rdi), %rax
  31159. movq %rdx, 752(%rdi)
  31160. adcq 504(%r8), %rax
  31161. movq %rax, 760(%rdi)
  31162. adcq $0x00, %rcx
  31163. movq %rcx, 768(%rdi)
  31164. # Add in place
  31165. movq 512(%rdi), %rdx
  31166. xorq %rcx, %rcx
  31167. addq (%rsi), %rdx
  31168. movq 520(%rdi), %rax
  31169. movq %rdx, 512(%rdi)
  31170. adcq 8(%rsi), %rax
  31171. movq 528(%rdi), %rdx
  31172. movq %rax, 520(%rdi)
  31173. adcq 16(%rsi), %rdx
  31174. movq 536(%rdi), %rax
  31175. movq %rdx, 528(%rdi)
  31176. adcq 24(%rsi), %rax
  31177. movq 544(%rdi), %rdx
  31178. movq %rax, 536(%rdi)
  31179. adcq 32(%rsi), %rdx
  31180. movq 552(%rdi), %rax
  31181. movq %rdx, 544(%rdi)
  31182. adcq 40(%rsi), %rax
  31183. movq 560(%rdi), %rdx
  31184. movq %rax, 552(%rdi)
  31185. adcq 48(%rsi), %rdx
  31186. movq 568(%rdi), %rax
  31187. movq %rdx, 560(%rdi)
  31188. adcq 56(%rsi), %rax
  31189. movq 576(%rdi), %rdx
  31190. movq %rax, 568(%rdi)
  31191. adcq 64(%rsi), %rdx
  31192. movq 584(%rdi), %rax
  31193. movq %rdx, 576(%rdi)
  31194. adcq 72(%rsi), %rax
  31195. movq 592(%rdi), %rdx
  31196. movq %rax, 584(%rdi)
  31197. adcq 80(%rsi), %rdx
  31198. movq 600(%rdi), %rax
  31199. movq %rdx, 592(%rdi)
  31200. adcq 88(%rsi), %rax
  31201. movq 608(%rdi), %rdx
  31202. movq %rax, 600(%rdi)
  31203. adcq 96(%rsi), %rdx
  31204. movq 616(%rdi), %rax
  31205. movq %rdx, 608(%rdi)
  31206. adcq 104(%rsi), %rax
  31207. movq 624(%rdi), %rdx
  31208. movq %rax, 616(%rdi)
  31209. adcq 112(%rsi), %rdx
  31210. movq 632(%rdi), %rax
  31211. movq %rdx, 624(%rdi)
  31212. adcq 120(%rsi), %rax
  31213. movq 640(%rdi), %rdx
  31214. movq %rax, 632(%rdi)
  31215. adcq 128(%rsi), %rdx
  31216. movq 648(%rdi), %rax
  31217. movq %rdx, 640(%rdi)
  31218. adcq 136(%rsi), %rax
  31219. movq 656(%rdi), %rdx
  31220. movq %rax, 648(%rdi)
  31221. adcq 144(%rsi), %rdx
  31222. movq 664(%rdi), %rax
  31223. movq %rdx, 656(%rdi)
  31224. adcq 152(%rsi), %rax
  31225. movq 672(%rdi), %rdx
  31226. movq %rax, 664(%rdi)
  31227. adcq 160(%rsi), %rdx
  31228. movq 680(%rdi), %rax
  31229. movq %rdx, 672(%rdi)
  31230. adcq 168(%rsi), %rax
  31231. movq 688(%rdi), %rdx
  31232. movq %rax, 680(%rdi)
  31233. adcq 176(%rsi), %rdx
  31234. movq 696(%rdi), %rax
  31235. movq %rdx, 688(%rdi)
  31236. adcq 184(%rsi), %rax
  31237. movq 704(%rdi), %rdx
  31238. movq %rax, 696(%rdi)
  31239. adcq 192(%rsi), %rdx
  31240. movq 712(%rdi), %rax
  31241. movq %rdx, 704(%rdi)
  31242. adcq 200(%rsi), %rax
  31243. movq 720(%rdi), %rdx
  31244. movq %rax, 712(%rdi)
  31245. adcq 208(%rsi), %rdx
  31246. movq 728(%rdi), %rax
  31247. movq %rdx, 720(%rdi)
  31248. adcq 216(%rsi), %rax
  31249. movq 736(%rdi), %rdx
  31250. movq %rax, 728(%rdi)
  31251. adcq 224(%rsi), %rdx
  31252. movq 744(%rdi), %rax
  31253. movq %rdx, 736(%rdi)
  31254. adcq 232(%rsi), %rax
  31255. movq 752(%rdi), %rdx
  31256. movq %rax, 744(%rdi)
  31257. adcq 240(%rsi), %rdx
  31258. movq 760(%rdi), %rax
  31259. movq %rdx, 752(%rdi)
  31260. adcq 248(%rsi), %rax
  31261. movq 768(%rdi), %rdx
  31262. movq %rax, 760(%rdi)
  31263. adcq 256(%rsi), %rdx
  31264. movq %rdx, 768(%rdi)
  31265. adcq $0x00, %rcx
  31266. # Add to zero
  31267. movq 264(%rsi), %rdx
  31268. adcq $0x00, %rdx
  31269. movq 272(%rsi), %rax
  31270. movq %rdx, 776(%rdi)
  31271. adcq $0x00, %rax
  31272. movq 280(%rsi), %rdx
  31273. movq %rax, 784(%rdi)
  31274. adcq $0x00, %rdx
  31275. movq 288(%rsi), %rax
  31276. movq %rdx, 792(%rdi)
  31277. adcq $0x00, %rax
  31278. movq 296(%rsi), %rdx
  31279. movq %rax, 800(%rdi)
  31280. adcq $0x00, %rdx
  31281. movq 304(%rsi), %rax
  31282. movq %rdx, 808(%rdi)
  31283. adcq $0x00, %rax
  31284. movq 312(%rsi), %rdx
  31285. movq %rax, 816(%rdi)
  31286. adcq $0x00, %rdx
  31287. movq 320(%rsi), %rax
  31288. movq %rdx, 824(%rdi)
  31289. adcq $0x00, %rax
  31290. movq 328(%rsi), %rdx
  31291. movq %rax, 832(%rdi)
  31292. adcq $0x00, %rdx
  31293. movq 336(%rsi), %rax
  31294. movq %rdx, 840(%rdi)
  31295. adcq $0x00, %rax
  31296. movq 344(%rsi), %rdx
  31297. movq %rax, 848(%rdi)
  31298. adcq $0x00, %rdx
  31299. movq 352(%rsi), %rax
  31300. movq %rdx, 856(%rdi)
  31301. adcq $0x00, %rax
  31302. movq 360(%rsi), %rdx
  31303. movq %rax, 864(%rdi)
  31304. adcq $0x00, %rdx
  31305. movq 368(%rsi), %rax
  31306. movq %rdx, 872(%rdi)
  31307. adcq $0x00, %rax
  31308. movq 376(%rsi), %rdx
  31309. movq %rax, 880(%rdi)
  31310. adcq $0x00, %rdx
  31311. movq 384(%rsi), %rax
  31312. movq %rdx, 888(%rdi)
  31313. adcq $0x00, %rax
  31314. movq 392(%rsi), %rdx
  31315. movq %rax, 896(%rdi)
  31316. adcq $0x00, %rdx
  31317. movq 400(%rsi), %rax
  31318. movq %rdx, 904(%rdi)
  31319. adcq $0x00, %rax
  31320. movq 408(%rsi), %rdx
  31321. movq %rax, 912(%rdi)
  31322. adcq $0x00, %rdx
  31323. movq 416(%rsi), %rax
  31324. movq %rdx, 920(%rdi)
  31325. adcq $0x00, %rax
  31326. movq 424(%rsi), %rdx
  31327. movq %rax, 928(%rdi)
  31328. adcq $0x00, %rdx
  31329. movq 432(%rsi), %rax
  31330. movq %rdx, 936(%rdi)
  31331. adcq $0x00, %rax
  31332. movq 440(%rsi), %rdx
  31333. movq %rax, 944(%rdi)
  31334. adcq $0x00, %rdx
  31335. movq 448(%rsi), %rax
  31336. movq %rdx, 952(%rdi)
  31337. adcq $0x00, %rax
  31338. movq 456(%rsi), %rdx
  31339. movq %rax, 960(%rdi)
  31340. adcq $0x00, %rdx
  31341. movq 464(%rsi), %rax
  31342. movq %rdx, 968(%rdi)
  31343. adcq $0x00, %rax
  31344. movq 472(%rsi), %rdx
  31345. movq %rax, 976(%rdi)
  31346. adcq $0x00, %rdx
  31347. movq 480(%rsi), %rax
  31348. movq %rdx, 984(%rdi)
  31349. adcq $0x00, %rax
  31350. movq 488(%rsi), %rdx
  31351. movq %rax, 992(%rdi)
  31352. adcq $0x00, %rdx
  31353. movq 496(%rsi), %rax
  31354. movq %rdx, 1000(%rdi)
  31355. adcq $0x00, %rax
  31356. movq 504(%rsi), %rdx
  31357. movq %rax, 1008(%rdi)
  31358. adcq $0x00, %rdx
  31359. movq %rdx, 1016(%rdi)
  31360. addq $0x518, %rsp
  31361. repz retq
  31362. #ifndef __APPLE__
  31363. .size sp_4096_sqr_64,.-sp_4096_sqr_64
  31364. #endif /* __APPLE__ */
  31365. /* Multiply a and b into r. (r = a * b)
  31366. *
  31367. * r A single precision integer.
  31368. * a A single precision integer.
  31369. * b A single precision integer.
  31370. */
  31371. #ifndef __APPLE__
  31372. .text
  31373. .globl sp_4096_mul_avx2_64
  31374. .type sp_4096_mul_avx2_64,@function
  31375. .align 16
  31376. sp_4096_mul_avx2_64:
  31377. #else
  31378. .section __TEXT,__text
  31379. .globl _sp_4096_mul_avx2_64
  31380. .p2align 4
  31381. _sp_4096_mul_avx2_64:
  31382. #endif /* __APPLE__ */
  31383. pushq %r12
  31384. pushq %r13
  31385. pushq %r14
  31386. pushq %r15
  31387. subq $0x628, %rsp
  31388. movq %rdi, 1536(%rsp)
  31389. movq %rsi, 1544(%rsp)
  31390. movq %rdx, 1552(%rsp)
  31391. leaq 1024(%rsp), %r10
  31392. leaq 256(%rsi), %r12
  31393. # Add
  31394. movq (%rsi), %rax
  31395. xorq %r13, %r13
  31396. addq (%r12), %rax
  31397. movq 8(%rsi), %rcx
  31398. movq %rax, (%r10)
  31399. adcq 8(%r12), %rcx
  31400. movq 16(%rsi), %r8
  31401. movq %rcx, 8(%r10)
  31402. adcq 16(%r12), %r8
  31403. movq 24(%rsi), %rax
  31404. movq %r8, 16(%r10)
  31405. adcq 24(%r12), %rax
  31406. movq 32(%rsi), %rcx
  31407. movq %rax, 24(%r10)
  31408. adcq 32(%r12), %rcx
  31409. movq 40(%rsi), %r8
  31410. movq %rcx, 32(%r10)
  31411. adcq 40(%r12), %r8
  31412. movq 48(%rsi), %rax
  31413. movq %r8, 40(%r10)
  31414. adcq 48(%r12), %rax
  31415. movq 56(%rsi), %rcx
  31416. movq %rax, 48(%r10)
  31417. adcq 56(%r12), %rcx
  31418. movq 64(%rsi), %r8
  31419. movq %rcx, 56(%r10)
  31420. adcq 64(%r12), %r8
  31421. movq 72(%rsi), %rax
  31422. movq %r8, 64(%r10)
  31423. adcq 72(%r12), %rax
  31424. movq 80(%rsi), %rcx
  31425. movq %rax, 72(%r10)
  31426. adcq 80(%r12), %rcx
  31427. movq 88(%rsi), %r8
  31428. movq %rcx, 80(%r10)
  31429. adcq 88(%r12), %r8
  31430. movq 96(%rsi), %rax
  31431. movq %r8, 88(%r10)
  31432. adcq 96(%r12), %rax
  31433. movq 104(%rsi), %rcx
  31434. movq %rax, 96(%r10)
  31435. adcq 104(%r12), %rcx
  31436. movq 112(%rsi), %r8
  31437. movq %rcx, 104(%r10)
  31438. adcq 112(%r12), %r8
  31439. movq 120(%rsi), %rax
  31440. movq %r8, 112(%r10)
  31441. adcq 120(%r12), %rax
  31442. movq 128(%rsi), %rcx
  31443. movq %rax, 120(%r10)
  31444. adcq 128(%r12), %rcx
  31445. movq 136(%rsi), %r8
  31446. movq %rcx, 128(%r10)
  31447. adcq 136(%r12), %r8
  31448. movq 144(%rsi), %rax
  31449. movq %r8, 136(%r10)
  31450. adcq 144(%r12), %rax
  31451. movq 152(%rsi), %rcx
  31452. movq %rax, 144(%r10)
  31453. adcq 152(%r12), %rcx
  31454. movq 160(%rsi), %r8
  31455. movq %rcx, 152(%r10)
  31456. adcq 160(%r12), %r8
  31457. movq 168(%rsi), %rax
  31458. movq %r8, 160(%r10)
  31459. adcq 168(%r12), %rax
  31460. movq 176(%rsi), %rcx
  31461. movq %rax, 168(%r10)
  31462. adcq 176(%r12), %rcx
  31463. movq 184(%rsi), %r8
  31464. movq %rcx, 176(%r10)
  31465. adcq 184(%r12), %r8
  31466. movq 192(%rsi), %rax
  31467. movq %r8, 184(%r10)
  31468. adcq 192(%r12), %rax
  31469. movq 200(%rsi), %rcx
  31470. movq %rax, 192(%r10)
  31471. adcq 200(%r12), %rcx
  31472. movq 208(%rsi), %r8
  31473. movq %rcx, 200(%r10)
  31474. adcq 208(%r12), %r8
  31475. movq 216(%rsi), %rax
  31476. movq %r8, 208(%r10)
  31477. adcq 216(%r12), %rax
  31478. movq 224(%rsi), %rcx
  31479. movq %rax, 216(%r10)
  31480. adcq 224(%r12), %rcx
  31481. movq 232(%rsi), %r8
  31482. movq %rcx, 224(%r10)
  31483. adcq 232(%r12), %r8
  31484. movq 240(%rsi), %rax
  31485. movq %r8, 232(%r10)
  31486. adcq 240(%r12), %rax
  31487. movq 248(%rsi), %rcx
  31488. movq %rax, 240(%r10)
  31489. adcq 248(%r12), %rcx
  31490. movq %rcx, 248(%r10)
  31491. adcq $0x00, %r13
  31492. movq %r13, 1560(%rsp)
  31493. leaq 1280(%rsp), %r11
  31494. leaq 256(%rdx), %r12
  31495. # Add
  31496. movq (%rdx), %rax
  31497. xorq %r14, %r14
  31498. addq (%r12), %rax
  31499. movq 8(%rdx), %rcx
  31500. movq %rax, (%r11)
  31501. adcq 8(%r12), %rcx
  31502. movq 16(%rdx), %r8
  31503. movq %rcx, 8(%r11)
  31504. adcq 16(%r12), %r8
  31505. movq 24(%rdx), %rax
  31506. movq %r8, 16(%r11)
  31507. adcq 24(%r12), %rax
  31508. movq 32(%rdx), %rcx
  31509. movq %rax, 24(%r11)
  31510. adcq 32(%r12), %rcx
  31511. movq 40(%rdx), %r8
  31512. movq %rcx, 32(%r11)
  31513. adcq 40(%r12), %r8
  31514. movq 48(%rdx), %rax
  31515. movq %r8, 40(%r11)
  31516. adcq 48(%r12), %rax
  31517. movq 56(%rdx), %rcx
  31518. movq %rax, 48(%r11)
  31519. adcq 56(%r12), %rcx
  31520. movq 64(%rdx), %r8
  31521. movq %rcx, 56(%r11)
  31522. adcq 64(%r12), %r8
  31523. movq 72(%rdx), %rax
  31524. movq %r8, 64(%r11)
  31525. adcq 72(%r12), %rax
  31526. movq 80(%rdx), %rcx
  31527. movq %rax, 72(%r11)
  31528. adcq 80(%r12), %rcx
  31529. movq 88(%rdx), %r8
  31530. movq %rcx, 80(%r11)
  31531. adcq 88(%r12), %r8
  31532. movq 96(%rdx), %rax
  31533. movq %r8, 88(%r11)
  31534. adcq 96(%r12), %rax
  31535. movq 104(%rdx), %rcx
  31536. movq %rax, 96(%r11)
  31537. adcq 104(%r12), %rcx
  31538. movq 112(%rdx), %r8
  31539. movq %rcx, 104(%r11)
  31540. adcq 112(%r12), %r8
  31541. movq 120(%rdx), %rax
  31542. movq %r8, 112(%r11)
  31543. adcq 120(%r12), %rax
  31544. movq 128(%rdx), %rcx
  31545. movq %rax, 120(%r11)
  31546. adcq 128(%r12), %rcx
  31547. movq 136(%rdx), %r8
  31548. movq %rcx, 128(%r11)
  31549. adcq 136(%r12), %r8
  31550. movq 144(%rdx), %rax
  31551. movq %r8, 136(%r11)
  31552. adcq 144(%r12), %rax
  31553. movq 152(%rdx), %rcx
  31554. movq %rax, 144(%r11)
  31555. adcq 152(%r12), %rcx
  31556. movq 160(%rdx), %r8
  31557. movq %rcx, 152(%r11)
  31558. adcq 160(%r12), %r8
  31559. movq 168(%rdx), %rax
  31560. movq %r8, 160(%r11)
  31561. adcq 168(%r12), %rax
  31562. movq 176(%rdx), %rcx
  31563. movq %rax, 168(%r11)
  31564. adcq 176(%r12), %rcx
  31565. movq 184(%rdx), %r8
  31566. movq %rcx, 176(%r11)
  31567. adcq 184(%r12), %r8
  31568. movq 192(%rdx), %rax
  31569. movq %r8, 184(%r11)
  31570. adcq 192(%r12), %rax
  31571. movq 200(%rdx), %rcx
  31572. movq %rax, 192(%r11)
  31573. adcq 200(%r12), %rcx
  31574. movq 208(%rdx), %r8
  31575. movq %rcx, 200(%r11)
  31576. adcq 208(%r12), %r8
  31577. movq 216(%rdx), %rax
  31578. movq %r8, 208(%r11)
  31579. adcq 216(%r12), %rax
  31580. movq 224(%rdx), %rcx
  31581. movq %rax, 216(%r11)
  31582. adcq 224(%r12), %rcx
  31583. movq 232(%rdx), %r8
  31584. movq %rcx, 224(%r11)
  31585. adcq 232(%r12), %r8
  31586. movq 240(%rdx), %rax
  31587. movq %r8, 232(%r11)
  31588. adcq 240(%r12), %rax
  31589. movq 248(%rdx), %rcx
  31590. movq %rax, 240(%r11)
  31591. adcq 248(%r12), %rcx
  31592. movq %rcx, 248(%r11)
  31593. adcq $0x00, %r14
  31594. movq %r14, 1568(%rsp)
  31595. movq %r11, %rdx
  31596. movq %r10, %rsi
  31597. movq %rsp, %rdi
  31598. #ifndef __APPLE__
  31599. callq sp_2048_mul_avx2_32@plt
  31600. #else
  31601. callq _sp_2048_mul_avx2_32
  31602. #endif /* __APPLE__ */
  31603. movq 1552(%rsp), %rdx
  31604. movq 1544(%rsp), %rsi
  31605. leaq 512(%rsp), %rdi
  31606. addq $0x100, %rdx
  31607. addq $0x100, %rsi
  31608. #ifndef __APPLE__
  31609. callq sp_2048_mul_avx2_32@plt
  31610. #else
  31611. callq _sp_2048_mul_avx2_32
  31612. #endif /* __APPLE__ */
  31613. movq 1552(%rsp), %rdx
  31614. movq 1544(%rsp), %rsi
  31615. movq 1536(%rsp), %rdi
  31616. #ifndef __APPLE__
  31617. callq sp_2048_mul_avx2_32@plt
  31618. #else
  31619. callq _sp_2048_mul_avx2_32
  31620. #endif /* __APPLE__ */
  31621. movq 1560(%rsp), %r13
  31622. movq 1568(%rsp), %r14
  31623. movq 1536(%rsp), %r15
  31624. movq %r13, %r9
  31625. leaq 1024(%rsp), %r10
  31626. leaq 1280(%rsp), %r11
  31627. andq %r14, %r9
  31628. negq %r13
  31629. negq %r14
  31630. addq $0x200, %r15
  31631. movq (%r10), %rax
  31632. movq (%r11), %rcx
  31633. pextq %r14, %rax, %rax
  31634. pextq %r13, %rcx, %rcx
  31635. addq %rcx, %rax
  31636. movq 8(%r10), %rcx
  31637. movq 8(%r11), %r8
  31638. pextq %r14, %rcx, %rcx
  31639. pextq %r13, %r8, %r8
  31640. movq %rax, (%r15)
  31641. adcq %r8, %rcx
  31642. movq 16(%r10), %r8
  31643. movq 16(%r11), %rax
  31644. pextq %r14, %r8, %r8
  31645. pextq %r13, %rax, %rax
  31646. movq %rcx, 8(%r15)
  31647. adcq %rax, %r8
  31648. movq 24(%r10), %rax
  31649. movq 24(%r11), %rcx
  31650. pextq %r14, %rax, %rax
  31651. pextq %r13, %rcx, %rcx
  31652. movq %r8, 16(%r15)
  31653. adcq %rcx, %rax
  31654. movq 32(%r10), %rcx
  31655. movq 32(%r11), %r8
  31656. pextq %r14, %rcx, %rcx
  31657. pextq %r13, %r8, %r8
  31658. movq %rax, 24(%r15)
  31659. adcq %r8, %rcx
  31660. movq 40(%r10), %r8
  31661. movq 40(%r11), %rax
  31662. pextq %r14, %r8, %r8
  31663. pextq %r13, %rax, %rax
  31664. movq %rcx, 32(%r15)
  31665. adcq %rax, %r8
  31666. movq 48(%r10), %rax
  31667. movq 48(%r11), %rcx
  31668. pextq %r14, %rax, %rax
  31669. pextq %r13, %rcx, %rcx
  31670. movq %r8, 40(%r15)
  31671. adcq %rcx, %rax
  31672. movq 56(%r10), %rcx
  31673. movq 56(%r11), %r8
  31674. pextq %r14, %rcx, %rcx
  31675. pextq %r13, %r8, %r8
  31676. movq %rax, 48(%r15)
  31677. adcq %r8, %rcx
  31678. movq 64(%r10), %r8
  31679. movq 64(%r11), %rax
  31680. pextq %r14, %r8, %r8
  31681. pextq %r13, %rax, %rax
  31682. movq %rcx, 56(%r15)
  31683. adcq %rax, %r8
  31684. movq 72(%r10), %rax
  31685. movq 72(%r11), %rcx
  31686. pextq %r14, %rax, %rax
  31687. pextq %r13, %rcx, %rcx
  31688. movq %r8, 64(%r15)
  31689. adcq %rcx, %rax
  31690. movq 80(%r10), %rcx
  31691. movq 80(%r11), %r8
  31692. pextq %r14, %rcx, %rcx
  31693. pextq %r13, %r8, %r8
  31694. movq %rax, 72(%r15)
  31695. adcq %r8, %rcx
  31696. movq 88(%r10), %r8
  31697. movq 88(%r11), %rax
  31698. pextq %r14, %r8, %r8
  31699. pextq %r13, %rax, %rax
  31700. movq %rcx, 80(%r15)
  31701. adcq %rax, %r8
  31702. movq 96(%r10), %rax
  31703. movq 96(%r11), %rcx
  31704. pextq %r14, %rax, %rax
  31705. pextq %r13, %rcx, %rcx
  31706. movq %r8, 88(%r15)
  31707. adcq %rcx, %rax
  31708. movq 104(%r10), %rcx
  31709. movq 104(%r11), %r8
  31710. pextq %r14, %rcx, %rcx
  31711. pextq %r13, %r8, %r8
  31712. movq %rax, 96(%r15)
  31713. adcq %r8, %rcx
  31714. movq 112(%r10), %r8
  31715. movq 112(%r11), %rax
  31716. pextq %r14, %r8, %r8
  31717. pextq %r13, %rax, %rax
  31718. movq %rcx, 104(%r15)
  31719. adcq %rax, %r8
  31720. movq 120(%r10), %rax
  31721. movq 120(%r11), %rcx
  31722. pextq %r14, %rax, %rax
  31723. pextq %r13, %rcx, %rcx
  31724. movq %r8, 112(%r15)
  31725. adcq %rcx, %rax
  31726. movq 128(%r10), %rcx
  31727. movq 128(%r11), %r8
  31728. pextq %r14, %rcx, %rcx
  31729. pextq %r13, %r8, %r8
  31730. movq %rax, 120(%r15)
  31731. adcq %r8, %rcx
  31732. movq 136(%r10), %r8
  31733. movq 136(%r11), %rax
  31734. pextq %r14, %r8, %r8
  31735. pextq %r13, %rax, %rax
  31736. movq %rcx, 128(%r15)
  31737. adcq %rax, %r8
  31738. movq 144(%r10), %rax
  31739. movq 144(%r11), %rcx
  31740. pextq %r14, %rax, %rax
  31741. pextq %r13, %rcx, %rcx
  31742. movq %r8, 136(%r15)
  31743. adcq %rcx, %rax
  31744. movq 152(%r10), %rcx
  31745. movq 152(%r11), %r8
  31746. pextq %r14, %rcx, %rcx
  31747. pextq %r13, %r8, %r8
  31748. movq %rax, 144(%r15)
  31749. adcq %r8, %rcx
  31750. movq 160(%r10), %r8
  31751. movq 160(%r11), %rax
  31752. pextq %r14, %r8, %r8
  31753. pextq %r13, %rax, %rax
  31754. movq %rcx, 152(%r15)
  31755. adcq %rax, %r8
  31756. movq 168(%r10), %rax
  31757. movq 168(%r11), %rcx
  31758. pextq %r14, %rax, %rax
  31759. pextq %r13, %rcx, %rcx
  31760. movq %r8, 160(%r15)
  31761. adcq %rcx, %rax
  31762. movq 176(%r10), %rcx
  31763. movq 176(%r11), %r8
  31764. pextq %r14, %rcx, %rcx
  31765. pextq %r13, %r8, %r8
  31766. movq %rax, 168(%r15)
  31767. adcq %r8, %rcx
  31768. movq 184(%r10), %r8
  31769. movq 184(%r11), %rax
  31770. pextq %r14, %r8, %r8
  31771. pextq %r13, %rax, %rax
  31772. movq %rcx, 176(%r15)
  31773. adcq %rax, %r8
  31774. movq 192(%r10), %rax
  31775. movq 192(%r11), %rcx
  31776. pextq %r14, %rax, %rax
  31777. pextq %r13, %rcx, %rcx
  31778. movq %r8, 184(%r15)
  31779. adcq %rcx, %rax
  31780. movq 200(%r10), %rcx
  31781. movq 200(%r11), %r8
  31782. pextq %r14, %rcx, %rcx
  31783. pextq %r13, %r8, %r8
  31784. movq %rax, 192(%r15)
  31785. adcq %r8, %rcx
  31786. movq 208(%r10), %r8
  31787. movq 208(%r11), %rax
  31788. pextq %r14, %r8, %r8
  31789. pextq %r13, %rax, %rax
  31790. movq %rcx, 200(%r15)
  31791. adcq %rax, %r8
  31792. movq 216(%r10), %rax
  31793. movq 216(%r11), %rcx
  31794. pextq %r14, %rax, %rax
  31795. pextq %r13, %rcx, %rcx
  31796. movq %r8, 208(%r15)
  31797. adcq %rcx, %rax
  31798. movq 224(%r10), %rcx
  31799. movq 224(%r11), %r8
  31800. pextq %r14, %rcx, %rcx
  31801. pextq %r13, %r8, %r8
  31802. movq %rax, 216(%r15)
  31803. adcq %r8, %rcx
  31804. movq 232(%r10), %r8
  31805. movq 232(%r11), %rax
  31806. pextq %r14, %r8, %r8
  31807. pextq %r13, %rax, %rax
  31808. movq %rcx, 224(%r15)
  31809. adcq %rax, %r8
  31810. movq 240(%r10), %rax
  31811. movq 240(%r11), %rcx
  31812. pextq %r14, %rax, %rax
  31813. pextq %r13, %rcx, %rcx
  31814. movq %r8, 232(%r15)
  31815. adcq %rcx, %rax
  31816. movq 248(%r10), %rcx
  31817. movq 248(%r11), %r8
  31818. pextq %r14, %rcx, %rcx
  31819. pextq %r13, %r8, %r8
  31820. movq %rax, 240(%r15)
  31821. adcq %r8, %rcx
  31822. movq %rcx, 248(%r15)
  31823. adcq $0x00, %r9
  31824. leaq 512(%rsp), %r11
  31825. movq %rsp, %r10
  31826. movq (%r10), %rax
  31827. subq (%r11), %rax
  31828. movq 8(%r10), %rcx
  31829. movq %rax, (%r10)
  31830. sbbq 8(%r11), %rcx
  31831. movq 16(%r10), %r8
  31832. movq %rcx, 8(%r10)
  31833. sbbq 16(%r11), %r8
  31834. movq 24(%r10), %rax
  31835. movq %r8, 16(%r10)
  31836. sbbq 24(%r11), %rax
  31837. movq 32(%r10), %rcx
  31838. movq %rax, 24(%r10)
  31839. sbbq 32(%r11), %rcx
  31840. movq 40(%r10), %r8
  31841. movq %rcx, 32(%r10)
  31842. sbbq 40(%r11), %r8
  31843. movq 48(%r10), %rax
  31844. movq %r8, 40(%r10)
  31845. sbbq 48(%r11), %rax
  31846. movq 56(%r10), %rcx
  31847. movq %rax, 48(%r10)
  31848. sbbq 56(%r11), %rcx
  31849. movq 64(%r10), %r8
  31850. movq %rcx, 56(%r10)
  31851. sbbq 64(%r11), %r8
  31852. movq 72(%r10), %rax
  31853. movq %r8, 64(%r10)
  31854. sbbq 72(%r11), %rax
  31855. movq 80(%r10), %rcx
  31856. movq %rax, 72(%r10)
  31857. sbbq 80(%r11), %rcx
  31858. movq 88(%r10), %r8
  31859. movq %rcx, 80(%r10)
  31860. sbbq 88(%r11), %r8
  31861. movq 96(%r10), %rax
  31862. movq %r8, 88(%r10)
  31863. sbbq 96(%r11), %rax
  31864. movq 104(%r10), %rcx
  31865. movq %rax, 96(%r10)
  31866. sbbq 104(%r11), %rcx
  31867. movq 112(%r10), %r8
  31868. movq %rcx, 104(%r10)
  31869. sbbq 112(%r11), %r8
  31870. movq 120(%r10), %rax
  31871. movq %r8, 112(%r10)
  31872. sbbq 120(%r11), %rax
  31873. movq 128(%r10), %rcx
  31874. movq %rax, 120(%r10)
  31875. sbbq 128(%r11), %rcx
  31876. movq 136(%r10), %r8
  31877. movq %rcx, 128(%r10)
  31878. sbbq 136(%r11), %r8
  31879. movq 144(%r10), %rax
  31880. movq %r8, 136(%r10)
  31881. sbbq 144(%r11), %rax
  31882. movq 152(%r10), %rcx
  31883. movq %rax, 144(%r10)
  31884. sbbq 152(%r11), %rcx
  31885. movq 160(%r10), %r8
  31886. movq %rcx, 152(%r10)
  31887. sbbq 160(%r11), %r8
  31888. movq 168(%r10), %rax
  31889. movq %r8, 160(%r10)
  31890. sbbq 168(%r11), %rax
  31891. movq 176(%r10), %rcx
  31892. movq %rax, 168(%r10)
  31893. sbbq 176(%r11), %rcx
  31894. movq 184(%r10), %r8
  31895. movq %rcx, 176(%r10)
  31896. sbbq 184(%r11), %r8
  31897. movq 192(%r10), %rax
  31898. movq %r8, 184(%r10)
  31899. sbbq 192(%r11), %rax
  31900. movq 200(%r10), %rcx
  31901. movq %rax, 192(%r10)
  31902. sbbq 200(%r11), %rcx
  31903. movq 208(%r10), %r8
  31904. movq %rcx, 200(%r10)
  31905. sbbq 208(%r11), %r8
  31906. movq 216(%r10), %rax
  31907. movq %r8, 208(%r10)
  31908. sbbq 216(%r11), %rax
  31909. movq 224(%r10), %rcx
  31910. movq %rax, 216(%r10)
  31911. sbbq 224(%r11), %rcx
  31912. movq 232(%r10), %r8
  31913. movq %rcx, 224(%r10)
  31914. sbbq 232(%r11), %r8
  31915. movq 240(%r10), %rax
  31916. movq %r8, 232(%r10)
  31917. sbbq 240(%r11), %rax
  31918. movq 248(%r10), %rcx
  31919. movq %rax, 240(%r10)
  31920. sbbq 248(%r11), %rcx
  31921. movq 256(%r10), %r8
  31922. movq %rcx, 248(%r10)
  31923. sbbq 256(%r11), %r8
  31924. movq 264(%r10), %rax
  31925. movq %r8, 256(%r10)
  31926. sbbq 264(%r11), %rax
  31927. movq 272(%r10), %rcx
  31928. movq %rax, 264(%r10)
  31929. sbbq 272(%r11), %rcx
  31930. movq 280(%r10), %r8
  31931. movq %rcx, 272(%r10)
  31932. sbbq 280(%r11), %r8
  31933. movq 288(%r10), %rax
  31934. movq %r8, 280(%r10)
  31935. sbbq 288(%r11), %rax
  31936. movq 296(%r10), %rcx
  31937. movq %rax, 288(%r10)
  31938. sbbq 296(%r11), %rcx
  31939. movq 304(%r10), %r8
  31940. movq %rcx, 296(%r10)
  31941. sbbq 304(%r11), %r8
  31942. movq 312(%r10), %rax
  31943. movq %r8, 304(%r10)
  31944. sbbq 312(%r11), %rax
  31945. movq 320(%r10), %rcx
  31946. movq %rax, 312(%r10)
  31947. sbbq 320(%r11), %rcx
  31948. movq 328(%r10), %r8
  31949. movq %rcx, 320(%r10)
  31950. sbbq 328(%r11), %r8
  31951. movq 336(%r10), %rax
  31952. movq %r8, 328(%r10)
  31953. sbbq 336(%r11), %rax
  31954. movq 344(%r10), %rcx
  31955. movq %rax, 336(%r10)
  31956. sbbq 344(%r11), %rcx
  31957. movq 352(%r10), %r8
  31958. movq %rcx, 344(%r10)
  31959. sbbq 352(%r11), %r8
  31960. movq 360(%r10), %rax
  31961. movq %r8, 352(%r10)
  31962. sbbq 360(%r11), %rax
  31963. movq 368(%r10), %rcx
  31964. movq %rax, 360(%r10)
  31965. sbbq 368(%r11), %rcx
  31966. movq 376(%r10), %r8
  31967. movq %rcx, 368(%r10)
  31968. sbbq 376(%r11), %r8
  31969. movq 384(%r10), %rax
  31970. movq %r8, 376(%r10)
  31971. sbbq 384(%r11), %rax
  31972. movq 392(%r10), %rcx
  31973. movq %rax, 384(%r10)
  31974. sbbq 392(%r11), %rcx
  31975. movq 400(%r10), %r8
  31976. movq %rcx, 392(%r10)
  31977. sbbq 400(%r11), %r8
  31978. movq 408(%r10), %rax
  31979. movq %r8, 400(%r10)
  31980. sbbq 408(%r11), %rax
  31981. movq 416(%r10), %rcx
  31982. movq %rax, 408(%r10)
  31983. sbbq 416(%r11), %rcx
  31984. movq 424(%r10), %r8
  31985. movq %rcx, 416(%r10)
  31986. sbbq 424(%r11), %r8
  31987. movq 432(%r10), %rax
  31988. movq %r8, 424(%r10)
  31989. sbbq 432(%r11), %rax
  31990. movq 440(%r10), %rcx
  31991. movq %rax, 432(%r10)
  31992. sbbq 440(%r11), %rcx
  31993. movq 448(%r10), %r8
  31994. movq %rcx, 440(%r10)
  31995. sbbq 448(%r11), %r8
  31996. movq 456(%r10), %rax
  31997. movq %r8, 448(%r10)
  31998. sbbq 456(%r11), %rax
  31999. movq 464(%r10), %rcx
  32000. movq %rax, 456(%r10)
  32001. sbbq 464(%r11), %rcx
  32002. movq 472(%r10), %r8
  32003. movq %rcx, 464(%r10)
  32004. sbbq 472(%r11), %r8
  32005. movq 480(%r10), %rax
  32006. movq %r8, 472(%r10)
  32007. sbbq 480(%r11), %rax
  32008. movq 488(%r10), %rcx
  32009. movq %rax, 480(%r10)
  32010. sbbq 488(%r11), %rcx
  32011. movq 496(%r10), %r8
  32012. movq %rcx, 488(%r10)
  32013. sbbq 496(%r11), %r8
  32014. movq 504(%r10), %rax
  32015. movq %r8, 496(%r10)
  32016. sbbq 504(%r11), %rax
  32017. movq %rax, 504(%r10)
  32018. sbbq $0x00, %r9
  32019. movq (%r10), %rax
  32020. subq (%rdi), %rax
  32021. movq 8(%r10), %rcx
  32022. movq %rax, (%r10)
  32023. sbbq 8(%rdi), %rcx
  32024. movq 16(%r10), %r8
  32025. movq %rcx, 8(%r10)
  32026. sbbq 16(%rdi), %r8
  32027. movq 24(%r10), %rax
  32028. movq %r8, 16(%r10)
  32029. sbbq 24(%rdi), %rax
  32030. movq 32(%r10), %rcx
  32031. movq %rax, 24(%r10)
  32032. sbbq 32(%rdi), %rcx
  32033. movq 40(%r10), %r8
  32034. movq %rcx, 32(%r10)
  32035. sbbq 40(%rdi), %r8
  32036. movq 48(%r10), %rax
  32037. movq %r8, 40(%r10)
  32038. sbbq 48(%rdi), %rax
  32039. movq 56(%r10), %rcx
  32040. movq %rax, 48(%r10)
  32041. sbbq 56(%rdi), %rcx
  32042. movq 64(%r10), %r8
  32043. movq %rcx, 56(%r10)
  32044. sbbq 64(%rdi), %r8
  32045. movq 72(%r10), %rax
  32046. movq %r8, 64(%r10)
  32047. sbbq 72(%rdi), %rax
  32048. movq 80(%r10), %rcx
  32049. movq %rax, 72(%r10)
  32050. sbbq 80(%rdi), %rcx
  32051. movq 88(%r10), %r8
  32052. movq %rcx, 80(%r10)
  32053. sbbq 88(%rdi), %r8
  32054. movq 96(%r10), %rax
  32055. movq %r8, 88(%r10)
  32056. sbbq 96(%rdi), %rax
  32057. movq 104(%r10), %rcx
  32058. movq %rax, 96(%r10)
  32059. sbbq 104(%rdi), %rcx
  32060. movq 112(%r10), %r8
  32061. movq %rcx, 104(%r10)
  32062. sbbq 112(%rdi), %r8
  32063. movq 120(%r10), %rax
  32064. movq %r8, 112(%r10)
  32065. sbbq 120(%rdi), %rax
  32066. movq 128(%r10), %rcx
  32067. movq %rax, 120(%r10)
  32068. sbbq 128(%rdi), %rcx
  32069. movq 136(%r10), %r8
  32070. movq %rcx, 128(%r10)
  32071. sbbq 136(%rdi), %r8
  32072. movq 144(%r10), %rax
  32073. movq %r8, 136(%r10)
  32074. sbbq 144(%rdi), %rax
  32075. movq 152(%r10), %rcx
  32076. movq %rax, 144(%r10)
  32077. sbbq 152(%rdi), %rcx
  32078. movq 160(%r10), %r8
  32079. movq %rcx, 152(%r10)
  32080. sbbq 160(%rdi), %r8
  32081. movq 168(%r10), %rax
  32082. movq %r8, 160(%r10)
  32083. sbbq 168(%rdi), %rax
  32084. movq 176(%r10), %rcx
  32085. movq %rax, 168(%r10)
  32086. sbbq 176(%rdi), %rcx
  32087. movq 184(%r10), %r8
  32088. movq %rcx, 176(%r10)
  32089. sbbq 184(%rdi), %r8
  32090. movq 192(%r10), %rax
  32091. movq %r8, 184(%r10)
  32092. sbbq 192(%rdi), %rax
  32093. movq 200(%r10), %rcx
  32094. movq %rax, 192(%r10)
  32095. sbbq 200(%rdi), %rcx
  32096. movq 208(%r10), %r8
  32097. movq %rcx, 200(%r10)
  32098. sbbq 208(%rdi), %r8
  32099. movq 216(%r10), %rax
  32100. movq %r8, 208(%r10)
  32101. sbbq 216(%rdi), %rax
  32102. movq 224(%r10), %rcx
  32103. movq %rax, 216(%r10)
  32104. sbbq 224(%rdi), %rcx
  32105. movq 232(%r10), %r8
  32106. movq %rcx, 224(%r10)
  32107. sbbq 232(%rdi), %r8
  32108. movq 240(%r10), %rax
  32109. movq %r8, 232(%r10)
  32110. sbbq 240(%rdi), %rax
  32111. movq 248(%r10), %rcx
  32112. movq %rax, 240(%r10)
  32113. sbbq 248(%rdi), %rcx
  32114. movq 256(%r10), %r8
  32115. movq %rcx, 248(%r10)
  32116. sbbq 256(%rdi), %r8
  32117. movq 264(%r10), %rax
  32118. movq %r8, 256(%r10)
  32119. sbbq 264(%rdi), %rax
  32120. movq 272(%r10), %rcx
  32121. movq %rax, 264(%r10)
  32122. sbbq 272(%rdi), %rcx
  32123. movq 280(%r10), %r8
  32124. movq %rcx, 272(%r10)
  32125. sbbq 280(%rdi), %r8
  32126. movq 288(%r10), %rax
  32127. movq %r8, 280(%r10)
  32128. sbbq 288(%rdi), %rax
  32129. movq 296(%r10), %rcx
  32130. movq %rax, 288(%r10)
  32131. sbbq 296(%rdi), %rcx
  32132. movq 304(%r10), %r8
  32133. movq %rcx, 296(%r10)
  32134. sbbq 304(%rdi), %r8
  32135. movq 312(%r10), %rax
  32136. movq %r8, 304(%r10)
  32137. sbbq 312(%rdi), %rax
  32138. movq 320(%r10), %rcx
  32139. movq %rax, 312(%r10)
  32140. sbbq 320(%rdi), %rcx
  32141. movq 328(%r10), %r8
  32142. movq %rcx, 320(%r10)
  32143. sbbq 328(%rdi), %r8
  32144. movq 336(%r10), %rax
  32145. movq %r8, 328(%r10)
  32146. sbbq 336(%rdi), %rax
  32147. movq 344(%r10), %rcx
  32148. movq %rax, 336(%r10)
  32149. sbbq 344(%rdi), %rcx
  32150. movq 352(%r10), %r8
  32151. movq %rcx, 344(%r10)
  32152. sbbq 352(%rdi), %r8
  32153. movq 360(%r10), %rax
  32154. movq %r8, 352(%r10)
  32155. sbbq 360(%rdi), %rax
  32156. movq 368(%r10), %rcx
  32157. movq %rax, 360(%r10)
  32158. sbbq 368(%rdi), %rcx
  32159. movq 376(%r10), %r8
  32160. movq %rcx, 368(%r10)
  32161. sbbq 376(%rdi), %r8
  32162. movq 384(%r10), %rax
  32163. movq %r8, 376(%r10)
  32164. sbbq 384(%rdi), %rax
  32165. movq 392(%r10), %rcx
  32166. movq %rax, 384(%r10)
  32167. sbbq 392(%rdi), %rcx
  32168. movq 400(%r10), %r8
  32169. movq %rcx, 392(%r10)
  32170. sbbq 400(%rdi), %r8
  32171. movq 408(%r10), %rax
  32172. movq %r8, 400(%r10)
  32173. sbbq 408(%rdi), %rax
  32174. movq 416(%r10), %rcx
  32175. movq %rax, 408(%r10)
  32176. sbbq 416(%rdi), %rcx
  32177. movq 424(%r10), %r8
  32178. movq %rcx, 416(%r10)
  32179. sbbq 424(%rdi), %r8
  32180. movq 432(%r10), %rax
  32181. movq %r8, 424(%r10)
  32182. sbbq 432(%rdi), %rax
  32183. movq 440(%r10), %rcx
  32184. movq %rax, 432(%r10)
  32185. sbbq 440(%rdi), %rcx
  32186. movq 448(%r10), %r8
  32187. movq %rcx, 440(%r10)
  32188. sbbq 448(%rdi), %r8
  32189. movq 456(%r10), %rax
  32190. movq %r8, 448(%r10)
  32191. sbbq 456(%rdi), %rax
  32192. movq 464(%r10), %rcx
  32193. movq %rax, 456(%r10)
  32194. sbbq 464(%rdi), %rcx
  32195. movq 472(%r10), %r8
  32196. movq %rcx, 464(%r10)
  32197. sbbq 472(%rdi), %r8
  32198. movq 480(%r10), %rax
  32199. movq %r8, 472(%r10)
  32200. sbbq 480(%rdi), %rax
  32201. movq 488(%r10), %rcx
  32202. movq %rax, 480(%r10)
  32203. sbbq 488(%rdi), %rcx
  32204. movq 496(%r10), %r8
  32205. movq %rcx, 488(%r10)
  32206. sbbq 496(%rdi), %r8
  32207. movq 504(%r10), %rax
  32208. movq %r8, 496(%r10)
  32209. sbbq 504(%rdi), %rax
  32210. movq %rax, 504(%r10)
  32211. sbbq $0x00, %r9
  32212. subq $0x100, %r15
  32213. # Add
  32214. movq (%r15), %rax
  32215. addq (%r10), %rax
  32216. movq 8(%r15), %rcx
  32217. movq %rax, (%r15)
  32218. adcq 8(%r10), %rcx
  32219. movq 16(%r15), %r8
  32220. movq %rcx, 8(%r15)
  32221. adcq 16(%r10), %r8
  32222. movq 24(%r15), %rax
  32223. movq %r8, 16(%r15)
  32224. adcq 24(%r10), %rax
  32225. movq 32(%r15), %rcx
  32226. movq %rax, 24(%r15)
  32227. adcq 32(%r10), %rcx
  32228. movq 40(%r15), %r8
  32229. movq %rcx, 32(%r15)
  32230. adcq 40(%r10), %r8
  32231. movq 48(%r15), %rax
  32232. movq %r8, 40(%r15)
  32233. adcq 48(%r10), %rax
  32234. movq 56(%r15), %rcx
  32235. movq %rax, 48(%r15)
  32236. adcq 56(%r10), %rcx
  32237. movq 64(%r15), %r8
  32238. movq %rcx, 56(%r15)
  32239. adcq 64(%r10), %r8
  32240. movq 72(%r15), %rax
  32241. movq %r8, 64(%r15)
  32242. adcq 72(%r10), %rax
  32243. movq 80(%r15), %rcx
  32244. movq %rax, 72(%r15)
  32245. adcq 80(%r10), %rcx
  32246. movq 88(%r15), %r8
  32247. movq %rcx, 80(%r15)
  32248. adcq 88(%r10), %r8
  32249. movq 96(%r15), %rax
  32250. movq %r8, 88(%r15)
  32251. adcq 96(%r10), %rax
  32252. movq 104(%r15), %rcx
  32253. movq %rax, 96(%r15)
  32254. adcq 104(%r10), %rcx
  32255. movq 112(%r15), %r8
  32256. movq %rcx, 104(%r15)
  32257. adcq 112(%r10), %r8
  32258. movq 120(%r15), %rax
  32259. movq %r8, 112(%r15)
  32260. adcq 120(%r10), %rax
  32261. movq 128(%r15), %rcx
  32262. movq %rax, 120(%r15)
  32263. adcq 128(%r10), %rcx
  32264. movq 136(%r15), %r8
  32265. movq %rcx, 128(%r15)
  32266. adcq 136(%r10), %r8
  32267. movq 144(%r15), %rax
  32268. movq %r8, 136(%r15)
  32269. adcq 144(%r10), %rax
  32270. movq 152(%r15), %rcx
  32271. movq %rax, 144(%r15)
  32272. adcq 152(%r10), %rcx
  32273. movq 160(%r15), %r8
  32274. movq %rcx, 152(%r15)
  32275. adcq 160(%r10), %r8
  32276. movq 168(%r15), %rax
  32277. movq %r8, 160(%r15)
  32278. adcq 168(%r10), %rax
  32279. movq 176(%r15), %rcx
  32280. movq %rax, 168(%r15)
  32281. adcq 176(%r10), %rcx
  32282. movq 184(%r15), %r8
  32283. movq %rcx, 176(%r15)
  32284. adcq 184(%r10), %r8
  32285. movq 192(%r15), %rax
  32286. movq %r8, 184(%r15)
  32287. adcq 192(%r10), %rax
  32288. movq 200(%r15), %rcx
  32289. movq %rax, 192(%r15)
  32290. adcq 200(%r10), %rcx
  32291. movq 208(%r15), %r8
  32292. movq %rcx, 200(%r15)
  32293. adcq 208(%r10), %r8
  32294. movq 216(%r15), %rax
  32295. movq %r8, 208(%r15)
  32296. adcq 216(%r10), %rax
  32297. movq 224(%r15), %rcx
  32298. movq %rax, 216(%r15)
  32299. adcq 224(%r10), %rcx
  32300. movq 232(%r15), %r8
  32301. movq %rcx, 224(%r15)
  32302. adcq 232(%r10), %r8
  32303. movq 240(%r15), %rax
  32304. movq %r8, 232(%r15)
  32305. adcq 240(%r10), %rax
  32306. movq 248(%r15), %rcx
  32307. movq %rax, 240(%r15)
  32308. adcq 248(%r10), %rcx
  32309. movq 256(%r15), %r8
  32310. movq %rcx, 248(%r15)
  32311. adcq 256(%r10), %r8
  32312. movq 264(%r15), %rax
  32313. movq %r8, 256(%r15)
  32314. adcq 264(%r10), %rax
  32315. movq 272(%r15), %rcx
  32316. movq %rax, 264(%r15)
  32317. adcq 272(%r10), %rcx
  32318. movq 280(%r15), %r8
  32319. movq %rcx, 272(%r15)
  32320. adcq 280(%r10), %r8
  32321. movq 288(%r15), %rax
  32322. movq %r8, 280(%r15)
  32323. adcq 288(%r10), %rax
  32324. movq 296(%r15), %rcx
  32325. movq %rax, 288(%r15)
  32326. adcq 296(%r10), %rcx
  32327. movq 304(%r15), %r8
  32328. movq %rcx, 296(%r15)
  32329. adcq 304(%r10), %r8
  32330. movq 312(%r15), %rax
  32331. movq %r8, 304(%r15)
  32332. adcq 312(%r10), %rax
  32333. movq 320(%r15), %rcx
  32334. movq %rax, 312(%r15)
  32335. adcq 320(%r10), %rcx
  32336. movq 328(%r15), %r8
  32337. movq %rcx, 320(%r15)
  32338. adcq 328(%r10), %r8
  32339. movq 336(%r15), %rax
  32340. movq %r8, 328(%r15)
  32341. adcq 336(%r10), %rax
  32342. movq 344(%r15), %rcx
  32343. movq %rax, 336(%r15)
  32344. adcq 344(%r10), %rcx
  32345. movq 352(%r15), %r8
  32346. movq %rcx, 344(%r15)
  32347. adcq 352(%r10), %r8
  32348. movq 360(%r15), %rax
  32349. movq %r8, 352(%r15)
  32350. adcq 360(%r10), %rax
  32351. movq 368(%r15), %rcx
  32352. movq %rax, 360(%r15)
  32353. adcq 368(%r10), %rcx
  32354. movq 376(%r15), %r8
  32355. movq %rcx, 368(%r15)
  32356. adcq 376(%r10), %r8
  32357. movq 384(%r15), %rax
  32358. movq %r8, 376(%r15)
  32359. adcq 384(%r10), %rax
  32360. movq 392(%r15), %rcx
  32361. movq %rax, 384(%r15)
  32362. adcq 392(%r10), %rcx
  32363. movq 400(%r15), %r8
  32364. movq %rcx, 392(%r15)
  32365. adcq 400(%r10), %r8
  32366. movq 408(%r15), %rax
  32367. movq %r8, 400(%r15)
  32368. adcq 408(%r10), %rax
  32369. movq 416(%r15), %rcx
  32370. movq %rax, 408(%r15)
  32371. adcq 416(%r10), %rcx
  32372. movq 424(%r15), %r8
  32373. movq %rcx, 416(%r15)
  32374. adcq 424(%r10), %r8
  32375. movq 432(%r15), %rax
  32376. movq %r8, 424(%r15)
  32377. adcq 432(%r10), %rax
  32378. movq 440(%r15), %rcx
  32379. movq %rax, 432(%r15)
  32380. adcq 440(%r10), %rcx
  32381. movq 448(%r15), %r8
  32382. movq %rcx, 440(%r15)
  32383. adcq 448(%r10), %r8
  32384. movq 456(%r15), %rax
  32385. movq %r8, 448(%r15)
  32386. adcq 456(%r10), %rax
  32387. movq 464(%r15), %rcx
  32388. movq %rax, 456(%r15)
  32389. adcq 464(%r10), %rcx
  32390. movq 472(%r15), %r8
  32391. movq %rcx, 464(%r15)
  32392. adcq 472(%r10), %r8
  32393. movq 480(%r15), %rax
  32394. movq %r8, 472(%r15)
  32395. adcq 480(%r10), %rax
  32396. movq 488(%r15), %rcx
  32397. movq %rax, 480(%r15)
  32398. adcq 488(%r10), %rcx
  32399. movq 496(%r15), %r8
  32400. movq %rcx, 488(%r15)
  32401. adcq 496(%r10), %r8
  32402. movq 504(%r15), %rax
  32403. movq %r8, 496(%r15)
  32404. adcq 504(%r10), %rax
  32405. movq %rax, 504(%r15)
  32406. adcq $0x00, %r9
  32407. movq %r9, 768(%rdi)
  32408. addq $0x100, %r15
  32409. # Add
  32410. movq (%r15), %rax
  32411. xorq %r9, %r9
  32412. addq (%r11), %rax
  32413. movq 8(%r15), %rcx
  32414. movq %rax, (%r15)
  32415. adcq 8(%r11), %rcx
  32416. movq 16(%r15), %r8
  32417. movq %rcx, 8(%r15)
  32418. adcq 16(%r11), %r8
  32419. movq 24(%r15), %rax
  32420. movq %r8, 16(%r15)
  32421. adcq 24(%r11), %rax
  32422. movq 32(%r15), %rcx
  32423. movq %rax, 24(%r15)
  32424. adcq 32(%r11), %rcx
  32425. movq 40(%r15), %r8
  32426. movq %rcx, 32(%r15)
  32427. adcq 40(%r11), %r8
  32428. movq 48(%r15), %rax
  32429. movq %r8, 40(%r15)
  32430. adcq 48(%r11), %rax
  32431. movq 56(%r15), %rcx
  32432. movq %rax, 48(%r15)
  32433. adcq 56(%r11), %rcx
  32434. movq 64(%r15), %r8
  32435. movq %rcx, 56(%r15)
  32436. adcq 64(%r11), %r8
  32437. movq 72(%r15), %rax
  32438. movq %r8, 64(%r15)
  32439. adcq 72(%r11), %rax
  32440. movq 80(%r15), %rcx
  32441. movq %rax, 72(%r15)
  32442. adcq 80(%r11), %rcx
  32443. movq 88(%r15), %r8
  32444. movq %rcx, 80(%r15)
  32445. adcq 88(%r11), %r8
  32446. movq 96(%r15), %rax
  32447. movq %r8, 88(%r15)
  32448. adcq 96(%r11), %rax
  32449. movq 104(%r15), %rcx
  32450. movq %rax, 96(%r15)
  32451. adcq 104(%r11), %rcx
  32452. movq 112(%r15), %r8
  32453. movq %rcx, 104(%r15)
  32454. adcq 112(%r11), %r8
  32455. movq 120(%r15), %rax
  32456. movq %r8, 112(%r15)
  32457. adcq 120(%r11), %rax
  32458. movq 128(%r15), %rcx
  32459. movq %rax, 120(%r15)
  32460. adcq 128(%r11), %rcx
  32461. movq 136(%r15), %r8
  32462. movq %rcx, 128(%r15)
  32463. adcq 136(%r11), %r8
  32464. movq 144(%r15), %rax
  32465. movq %r8, 136(%r15)
  32466. adcq 144(%r11), %rax
  32467. movq 152(%r15), %rcx
  32468. movq %rax, 144(%r15)
  32469. adcq 152(%r11), %rcx
  32470. movq 160(%r15), %r8
  32471. movq %rcx, 152(%r15)
  32472. adcq 160(%r11), %r8
  32473. movq 168(%r15), %rax
  32474. movq %r8, 160(%r15)
  32475. adcq 168(%r11), %rax
  32476. movq 176(%r15), %rcx
  32477. movq %rax, 168(%r15)
  32478. adcq 176(%r11), %rcx
  32479. movq 184(%r15), %r8
  32480. movq %rcx, 176(%r15)
  32481. adcq 184(%r11), %r8
  32482. movq 192(%r15), %rax
  32483. movq %r8, 184(%r15)
  32484. adcq 192(%r11), %rax
  32485. movq 200(%r15), %rcx
  32486. movq %rax, 192(%r15)
  32487. adcq 200(%r11), %rcx
  32488. movq 208(%r15), %r8
  32489. movq %rcx, 200(%r15)
  32490. adcq 208(%r11), %r8
  32491. movq 216(%r15), %rax
  32492. movq %r8, 208(%r15)
  32493. adcq 216(%r11), %rax
  32494. movq 224(%r15), %rcx
  32495. movq %rax, 216(%r15)
  32496. adcq 224(%r11), %rcx
  32497. movq 232(%r15), %r8
  32498. movq %rcx, 224(%r15)
  32499. adcq 232(%r11), %r8
  32500. movq 240(%r15), %rax
  32501. movq %r8, 232(%r15)
  32502. adcq 240(%r11), %rax
  32503. movq 248(%r15), %rcx
  32504. movq %rax, 240(%r15)
  32505. adcq 248(%r11), %rcx
  32506. movq 256(%r15), %r8
  32507. movq %rcx, 248(%r15)
  32508. adcq 256(%r11), %r8
  32509. movq %r8, 256(%r15)
  32510. adcq $0x00, %r9
  32511. # Add to zero
  32512. movq 264(%r11), %rax
  32513. adcq $0x00, %rax
  32514. movq 272(%r11), %rcx
  32515. movq %rax, 264(%r15)
  32516. adcq $0x00, %rcx
  32517. movq 280(%r11), %r8
  32518. movq %rcx, 272(%r15)
  32519. adcq $0x00, %r8
  32520. movq 288(%r11), %rax
  32521. movq %r8, 280(%r15)
  32522. adcq $0x00, %rax
  32523. movq 296(%r11), %rcx
  32524. movq %rax, 288(%r15)
  32525. adcq $0x00, %rcx
  32526. movq 304(%r11), %r8
  32527. movq %rcx, 296(%r15)
  32528. adcq $0x00, %r8
  32529. movq 312(%r11), %rax
  32530. movq %r8, 304(%r15)
  32531. adcq $0x00, %rax
  32532. movq 320(%r11), %rcx
  32533. movq %rax, 312(%r15)
  32534. adcq $0x00, %rcx
  32535. movq 328(%r11), %r8
  32536. movq %rcx, 320(%r15)
  32537. adcq $0x00, %r8
  32538. movq 336(%r11), %rax
  32539. movq %r8, 328(%r15)
  32540. adcq $0x00, %rax
  32541. movq 344(%r11), %rcx
  32542. movq %rax, 336(%r15)
  32543. adcq $0x00, %rcx
  32544. movq 352(%r11), %r8
  32545. movq %rcx, 344(%r15)
  32546. adcq $0x00, %r8
  32547. movq 360(%r11), %rax
  32548. movq %r8, 352(%r15)
  32549. adcq $0x00, %rax
  32550. movq 368(%r11), %rcx
  32551. movq %rax, 360(%r15)
  32552. adcq $0x00, %rcx
  32553. movq 376(%r11), %r8
  32554. movq %rcx, 368(%r15)
  32555. adcq $0x00, %r8
  32556. movq 384(%r11), %rax
  32557. movq %r8, 376(%r15)
  32558. adcq $0x00, %rax
  32559. movq 392(%r11), %rcx
  32560. movq %rax, 384(%r15)
  32561. adcq $0x00, %rcx
  32562. movq 400(%r11), %r8
  32563. movq %rcx, 392(%r15)
  32564. adcq $0x00, %r8
  32565. movq 408(%r11), %rax
  32566. movq %r8, 400(%r15)
  32567. adcq $0x00, %rax
  32568. movq 416(%r11), %rcx
  32569. movq %rax, 408(%r15)
  32570. adcq $0x00, %rcx
  32571. movq 424(%r11), %r8
  32572. movq %rcx, 416(%r15)
  32573. adcq $0x00, %r8
  32574. movq 432(%r11), %rax
  32575. movq %r8, 424(%r15)
  32576. adcq $0x00, %rax
  32577. movq 440(%r11), %rcx
  32578. movq %rax, 432(%r15)
  32579. adcq $0x00, %rcx
  32580. movq 448(%r11), %r8
  32581. movq %rcx, 440(%r15)
  32582. adcq $0x00, %r8
  32583. movq 456(%r11), %rax
  32584. movq %r8, 448(%r15)
  32585. adcq $0x00, %rax
  32586. movq 464(%r11), %rcx
  32587. movq %rax, 456(%r15)
  32588. adcq $0x00, %rcx
  32589. movq 472(%r11), %r8
  32590. movq %rcx, 464(%r15)
  32591. adcq $0x00, %r8
  32592. movq 480(%r11), %rax
  32593. movq %r8, 472(%r15)
  32594. adcq $0x00, %rax
  32595. movq 488(%r11), %rcx
  32596. movq %rax, 480(%r15)
  32597. adcq $0x00, %rcx
  32598. movq 496(%r11), %r8
  32599. movq %rcx, 488(%r15)
  32600. adcq $0x00, %r8
  32601. movq 504(%r11), %rax
  32602. movq %r8, 496(%r15)
  32603. adcq $0x00, %rax
  32604. movq %rax, 504(%r15)
  32605. addq $0x628, %rsp
  32606. popq %r15
  32607. popq %r14
  32608. popq %r13
  32609. popq %r12
  32610. repz retq
  32611. #ifndef __APPLE__
  32612. .size sp_4096_mul_avx2_64,.-sp_4096_mul_avx2_64
  32613. #endif /* __APPLE__ */
  32614. /* Square a and put result in r. (r = a * a)
  32615. *
  32616. * r A single precision integer.
  32617. * a A single precision integer.
  32618. */
  32619. #ifndef __APPLE__
  32620. .text
  32621. .globl sp_4096_sqr_avx2_64
  32622. .type sp_4096_sqr_avx2_64,@function
  32623. .align 16
  32624. sp_4096_sqr_avx2_64:
  32625. #else
  32626. .section __TEXT,__text
  32627. .globl _sp_4096_sqr_avx2_64
  32628. .p2align 4
  32629. _sp_4096_sqr_avx2_64:
  32630. #endif /* __APPLE__ */
  32631. subq $0x518, %rsp
  32632. movq %rdi, 1280(%rsp)
  32633. movq %rsi, 1288(%rsp)
  32634. leaq 1024(%rsp), %r8
  32635. leaq 256(%rsi), %r9
  32636. # Add
  32637. movq (%rsi), %rdx
  32638. xorq %rcx, %rcx
  32639. addq (%r9), %rdx
  32640. movq 8(%rsi), %rax
  32641. movq %rdx, (%r8)
  32642. adcq 8(%r9), %rax
  32643. movq 16(%rsi), %rdx
  32644. movq %rax, 8(%r8)
  32645. adcq 16(%r9), %rdx
  32646. movq 24(%rsi), %rax
  32647. movq %rdx, 16(%r8)
  32648. adcq 24(%r9), %rax
  32649. movq 32(%rsi), %rdx
  32650. movq %rax, 24(%r8)
  32651. adcq 32(%r9), %rdx
  32652. movq 40(%rsi), %rax
  32653. movq %rdx, 32(%r8)
  32654. adcq 40(%r9), %rax
  32655. movq 48(%rsi), %rdx
  32656. movq %rax, 40(%r8)
  32657. adcq 48(%r9), %rdx
  32658. movq 56(%rsi), %rax
  32659. movq %rdx, 48(%r8)
  32660. adcq 56(%r9), %rax
  32661. movq 64(%rsi), %rdx
  32662. movq %rax, 56(%r8)
  32663. adcq 64(%r9), %rdx
  32664. movq 72(%rsi), %rax
  32665. movq %rdx, 64(%r8)
  32666. adcq 72(%r9), %rax
  32667. movq 80(%rsi), %rdx
  32668. movq %rax, 72(%r8)
  32669. adcq 80(%r9), %rdx
  32670. movq 88(%rsi), %rax
  32671. movq %rdx, 80(%r8)
  32672. adcq 88(%r9), %rax
  32673. movq 96(%rsi), %rdx
  32674. movq %rax, 88(%r8)
  32675. adcq 96(%r9), %rdx
  32676. movq 104(%rsi), %rax
  32677. movq %rdx, 96(%r8)
  32678. adcq 104(%r9), %rax
  32679. movq 112(%rsi), %rdx
  32680. movq %rax, 104(%r8)
  32681. adcq 112(%r9), %rdx
  32682. movq 120(%rsi), %rax
  32683. movq %rdx, 112(%r8)
  32684. adcq 120(%r9), %rax
  32685. movq 128(%rsi), %rdx
  32686. movq %rax, 120(%r8)
  32687. adcq 128(%r9), %rdx
  32688. movq 136(%rsi), %rax
  32689. movq %rdx, 128(%r8)
  32690. adcq 136(%r9), %rax
  32691. movq 144(%rsi), %rdx
  32692. movq %rax, 136(%r8)
  32693. adcq 144(%r9), %rdx
  32694. movq 152(%rsi), %rax
  32695. movq %rdx, 144(%r8)
  32696. adcq 152(%r9), %rax
  32697. movq 160(%rsi), %rdx
  32698. movq %rax, 152(%r8)
  32699. adcq 160(%r9), %rdx
  32700. movq 168(%rsi), %rax
  32701. movq %rdx, 160(%r8)
  32702. adcq 168(%r9), %rax
  32703. movq 176(%rsi), %rdx
  32704. movq %rax, 168(%r8)
  32705. adcq 176(%r9), %rdx
  32706. movq 184(%rsi), %rax
  32707. movq %rdx, 176(%r8)
  32708. adcq 184(%r9), %rax
  32709. movq 192(%rsi), %rdx
  32710. movq %rax, 184(%r8)
  32711. adcq 192(%r9), %rdx
  32712. movq 200(%rsi), %rax
  32713. movq %rdx, 192(%r8)
  32714. adcq 200(%r9), %rax
  32715. movq 208(%rsi), %rdx
  32716. movq %rax, 200(%r8)
  32717. adcq 208(%r9), %rdx
  32718. movq 216(%rsi), %rax
  32719. movq %rdx, 208(%r8)
  32720. adcq 216(%r9), %rax
  32721. movq 224(%rsi), %rdx
  32722. movq %rax, 216(%r8)
  32723. adcq 224(%r9), %rdx
  32724. movq 232(%rsi), %rax
  32725. movq %rdx, 224(%r8)
  32726. adcq 232(%r9), %rax
  32727. movq 240(%rsi), %rdx
  32728. movq %rax, 232(%r8)
  32729. adcq 240(%r9), %rdx
  32730. movq 248(%rsi), %rax
  32731. movq %rdx, 240(%r8)
  32732. adcq 248(%r9), %rax
  32733. movq %rax, 248(%r8)
  32734. adcq $0x00, %rcx
  32735. movq %rcx, 1296(%rsp)
  32736. movq %r8, %rsi
  32737. movq %rsp, %rdi
  32738. #ifndef __APPLE__
  32739. callq sp_2048_sqr_avx2_32@plt
  32740. #else
  32741. callq _sp_2048_sqr_avx2_32
  32742. #endif /* __APPLE__ */
  32743. movq 1288(%rsp), %rsi
  32744. leaq 512(%rsp), %rdi
  32745. addq $0x100, %rsi
  32746. #ifndef __APPLE__
  32747. callq sp_2048_sqr_avx2_32@plt
  32748. #else
  32749. callq _sp_2048_sqr_avx2_32
  32750. #endif /* __APPLE__ */
  32751. movq 1288(%rsp), %rsi
  32752. movq 1280(%rsp), %rdi
  32753. #ifndef __APPLE__
  32754. callq sp_2048_sqr_avx2_32@plt
  32755. #else
  32756. callq _sp_2048_sqr_avx2_32
  32757. #endif /* __APPLE__ */
  32758. movq 1296(%rsp), %r10
  32759. leaq 1024(%rsp), %r8
  32760. movq %r10, %rcx
  32761. negq %r10
  32762. movq (%r8), %rdx
  32763. pextq %r10, %rdx, %rdx
  32764. addq %rdx, %rdx
  32765. movq 8(%r8), %rax
  32766. movq %rdx, 512(%rdi)
  32767. pextq %r10, %rax, %rax
  32768. adcq %rax, %rax
  32769. movq 16(%r8), %rdx
  32770. movq %rax, 520(%rdi)
  32771. pextq %r10, %rdx, %rdx
  32772. adcq %rdx, %rdx
  32773. movq 24(%r8), %rax
  32774. movq %rdx, 528(%rdi)
  32775. pextq %r10, %rax, %rax
  32776. adcq %rax, %rax
  32777. movq 32(%r8), %rdx
  32778. movq %rax, 536(%rdi)
  32779. pextq %r10, %rdx, %rdx
  32780. adcq %rdx, %rdx
  32781. movq 40(%r8), %rax
  32782. movq %rdx, 544(%rdi)
  32783. pextq %r10, %rax, %rax
  32784. adcq %rax, %rax
  32785. movq 48(%r8), %rdx
  32786. movq %rax, 552(%rdi)
  32787. pextq %r10, %rdx, %rdx
  32788. adcq %rdx, %rdx
  32789. movq 56(%r8), %rax
  32790. movq %rdx, 560(%rdi)
  32791. pextq %r10, %rax, %rax
  32792. adcq %rax, %rax
  32793. movq 64(%r8), %rdx
  32794. movq %rax, 568(%rdi)
  32795. pextq %r10, %rdx, %rdx
  32796. adcq %rdx, %rdx
  32797. movq 72(%r8), %rax
  32798. movq %rdx, 576(%rdi)
  32799. pextq %r10, %rax, %rax
  32800. adcq %rax, %rax
  32801. movq 80(%r8), %rdx
  32802. movq %rax, 584(%rdi)
  32803. pextq %r10, %rdx, %rdx
  32804. adcq %rdx, %rdx
  32805. movq 88(%r8), %rax
  32806. movq %rdx, 592(%rdi)
  32807. pextq %r10, %rax, %rax
  32808. adcq %rax, %rax
  32809. movq 96(%r8), %rdx
  32810. movq %rax, 600(%rdi)
  32811. pextq %r10, %rdx, %rdx
  32812. adcq %rdx, %rdx
  32813. movq 104(%r8), %rax
  32814. movq %rdx, 608(%rdi)
  32815. pextq %r10, %rax, %rax
  32816. adcq %rax, %rax
  32817. movq 112(%r8), %rdx
  32818. movq %rax, 616(%rdi)
  32819. pextq %r10, %rdx, %rdx
  32820. adcq %rdx, %rdx
  32821. movq 120(%r8), %rax
  32822. movq %rdx, 624(%rdi)
  32823. pextq %r10, %rax, %rax
  32824. adcq %rax, %rax
  32825. movq 128(%r8), %rdx
  32826. movq %rax, 632(%rdi)
  32827. pextq %r10, %rdx, %rdx
  32828. adcq %rdx, %rdx
  32829. movq 136(%r8), %rax
  32830. movq %rdx, 640(%rdi)
  32831. pextq %r10, %rax, %rax
  32832. adcq %rax, %rax
  32833. movq 144(%r8), %rdx
  32834. movq %rax, 648(%rdi)
  32835. pextq %r10, %rdx, %rdx
  32836. adcq %rdx, %rdx
  32837. movq 152(%r8), %rax
  32838. movq %rdx, 656(%rdi)
  32839. pextq %r10, %rax, %rax
  32840. adcq %rax, %rax
  32841. movq 160(%r8), %rdx
  32842. movq %rax, 664(%rdi)
  32843. pextq %r10, %rdx, %rdx
  32844. adcq %rdx, %rdx
  32845. movq 168(%r8), %rax
  32846. movq %rdx, 672(%rdi)
  32847. pextq %r10, %rax, %rax
  32848. adcq %rax, %rax
  32849. movq 176(%r8), %rdx
  32850. movq %rax, 680(%rdi)
  32851. pextq %r10, %rdx, %rdx
  32852. adcq %rdx, %rdx
  32853. movq 184(%r8), %rax
  32854. movq %rdx, 688(%rdi)
  32855. pextq %r10, %rax, %rax
  32856. adcq %rax, %rax
  32857. movq 192(%r8), %rdx
  32858. movq %rax, 696(%rdi)
  32859. pextq %r10, %rdx, %rdx
  32860. adcq %rdx, %rdx
  32861. movq 200(%r8), %rax
  32862. movq %rdx, 704(%rdi)
  32863. pextq %r10, %rax, %rax
  32864. adcq %rax, %rax
  32865. movq 208(%r8), %rdx
  32866. movq %rax, 712(%rdi)
  32867. pextq %r10, %rdx, %rdx
  32868. adcq %rdx, %rdx
  32869. movq 216(%r8), %rax
  32870. movq %rdx, 720(%rdi)
  32871. pextq %r10, %rax, %rax
  32872. adcq %rax, %rax
  32873. movq 224(%r8), %rdx
  32874. movq %rax, 728(%rdi)
  32875. pextq %r10, %rdx, %rdx
  32876. adcq %rdx, %rdx
  32877. movq 232(%r8), %rax
  32878. movq %rdx, 736(%rdi)
  32879. pextq %r10, %rax, %rax
  32880. adcq %rax, %rax
  32881. movq 240(%r8), %rdx
  32882. movq %rax, 744(%rdi)
  32883. pextq %r10, %rdx, %rdx
  32884. adcq %rdx, %rdx
  32885. movq 248(%r8), %rax
  32886. movq %rdx, 752(%rdi)
  32887. pextq %r10, %rax, %rax
  32888. adcq %rax, %rax
  32889. movq %rax, 760(%rdi)
  32890. adcq $0x00, %rcx
  32891. leaq 512(%rsp), %rsi
  32892. movq %rsp, %r8
  32893. movq (%r8), %rdx
  32894. subq (%rsi), %rdx
  32895. movq 8(%r8), %rax
  32896. movq %rdx, (%r8)
  32897. sbbq 8(%rsi), %rax
  32898. movq 16(%r8), %rdx
  32899. movq %rax, 8(%r8)
  32900. sbbq 16(%rsi), %rdx
  32901. movq 24(%r8), %rax
  32902. movq %rdx, 16(%r8)
  32903. sbbq 24(%rsi), %rax
  32904. movq 32(%r8), %rdx
  32905. movq %rax, 24(%r8)
  32906. sbbq 32(%rsi), %rdx
  32907. movq 40(%r8), %rax
  32908. movq %rdx, 32(%r8)
  32909. sbbq 40(%rsi), %rax
  32910. movq 48(%r8), %rdx
  32911. movq %rax, 40(%r8)
  32912. sbbq 48(%rsi), %rdx
  32913. movq 56(%r8), %rax
  32914. movq %rdx, 48(%r8)
  32915. sbbq 56(%rsi), %rax
  32916. movq 64(%r8), %rdx
  32917. movq %rax, 56(%r8)
  32918. sbbq 64(%rsi), %rdx
  32919. movq 72(%r8), %rax
  32920. movq %rdx, 64(%r8)
  32921. sbbq 72(%rsi), %rax
  32922. movq 80(%r8), %rdx
  32923. movq %rax, 72(%r8)
  32924. sbbq 80(%rsi), %rdx
  32925. movq 88(%r8), %rax
  32926. movq %rdx, 80(%r8)
  32927. sbbq 88(%rsi), %rax
  32928. movq 96(%r8), %rdx
  32929. movq %rax, 88(%r8)
  32930. sbbq 96(%rsi), %rdx
  32931. movq 104(%r8), %rax
  32932. movq %rdx, 96(%r8)
  32933. sbbq 104(%rsi), %rax
  32934. movq 112(%r8), %rdx
  32935. movq %rax, 104(%r8)
  32936. sbbq 112(%rsi), %rdx
  32937. movq 120(%r8), %rax
  32938. movq %rdx, 112(%r8)
  32939. sbbq 120(%rsi), %rax
  32940. movq 128(%r8), %rdx
  32941. movq %rax, 120(%r8)
  32942. sbbq 128(%rsi), %rdx
  32943. movq 136(%r8), %rax
  32944. movq %rdx, 128(%r8)
  32945. sbbq 136(%rsi), %rax
  32946. movq 144(%r8), %rdx
  32947. movq %rax, 136(%r8)
  32948. sbbq 144(%rsi), %rdx
  32949. movq 152(%r8), %rax
  32950. movq %rdx, 144(%r8)
  32951. sbbq 152(%rsi), %rax
  32952. movq 160(%r8), %rdx
  32953. movq %rax, 152(%r8)
  32954. sbbq 160(%rsi), %rdx
  32955. movq 168(%r8), %rax
  32956. movq %rdx, 160(%r8)
  32957. sbbq 168(%rsi), %rax
  32958. movq 176(%r8), %rdx
  32959. movq %rax, 168(%r8)
  32960. sbbq 176(%rsi), %rdx
  32961. movq 184(%r8), %rax
  32962. movq %rdx, 176(%r8)
  32963. sbbq 184(%rsi), %rax
  32964. movq 192(%r8), %rdx
  32965. movq %rax, 184(%r8)
  32966. sbbq 192(%rsi), %rdx
  32967. movq 200(%r8), %rax
  32968. movq %rdx, 192(%r8)
  32969. sbbq 200(%rsi), %rax
  32970. movq 208(%r8), %rdx
  32971. movq %rax, 200(%r8)
  32972. sbbq 208(%rsi), %rdx
  32973. movq 216(%r8), %rax
  32974. movq %rdx, 208(%r8)
  32975. sbbq 216(%rsi), %rax
  32976. movq 224(%r8), %rdx
  32977. movq %rax, 216(%r8)
  32978. sbbq 224(%rsi), %rdx
  32979. movq 232(%r8), %rax
  32980. movq %rdx, 224(%r8)
  32981. sbbq 232(%rsi), %rax
  32982. movq 240(%r8), %rdx
  32983. movq %rax, 232(%r8)
  32984. sbbq 240(%rsi), %rdx
  32985. movq 248(%r8), %rax
  32986. movq %rdx, 240(%r8)
  32987. sbbq 248(%rsi), %rax
  32988. movq 256(%r8), %rdx
  32989. movq %rax, 248(%r8)
  32990. sbbq 256(%rsi), %rdx
  32991. movq 264(%r8), %rax
  32992. movq %rdx, 256(%r8)
  32993. sbbq 264(%rsi), %rax
  32994. movq 272(%r8), %rdx
  32995. movq %rax, 264(%r8)
  32996. sbbq 272(%rsi), %rdx
  32997. movq 280(%r8), %rax
  32998. movq %rdx, 272(%r8)
  32999. sbbq 280(%rsi), %rax
  33000. movq 288(%r8), %rdx
  33001. movq %rax, 280(%r8)
  33002. sbbq 288(%rsi), %rdx
  33003. movq 296(%r8), %rax
  33004. movq %rdx, 288(%r8)
  33005. sbbq 296(%rsi), %rax
  33006. movq 304(%r8), %rdx
  33007. movq %rax, 296(%r8)
  33008. sbbq 304(%rsi), %rdx
  33009. movq 312(%r8), %rax
  33010. movq %rdx, 304(%r8)
  33011. sbbq 312(%rsi), %rax
  33012. movq 320(%r8), %rdx
  33013. movq %rax, 312(%r8)
  33014. sbbq 320(%rsi), %rdx
  33015. movq 328(%r8), %rax
  33016. movq %rdx, 320(%r8)
  33017. sbbq 328(%rsi), %rax
  33018. movq 336(%r8), %rdx
  33019. movq %rax, 328(%r8)
  33020. sbbq 336(%rsi), %rdx
  33021. movq 344(%r8), %rax
  33022. movq %rdx, 336(%r8)
  33023. sbbq 344(%rsi), %rax
  33024. movq 352(%r8), %rdx
  33025. movq %rax, 344(%r8)
  33026. sbbq 352(%rsi), %rdx
  33027. movq 360(%r8), %rax
  33028. movq %rdx, 352(%r8)
  33029. sbbq 360(%rsi), %rax
  33030. movq 368(%r8), %rdx
  33031. movq %rax, 360(%r8)
  33032. sbbq 368(%rsi), %rdx
  33033. movq 376(%r8), %rax
  33034. movq %rdx, 368(%r8)
  33035. sbbq 376(%rsi), %rax
  33036. movq 384(%r8), %rdx
  33037. movq %rax, 376(%r8)
  33038. sbbq 384(%rsi), %rdx
  33039. movq 392(%r8), %rax
  33040. movq %rdx, 384(%r8)
  33041. sbbq 392(%rsi), %rax
  33042. movq 400(%r8), %rdx
  33043. movq %rax, 392(%r8)
  33044. sbbq 400(%rsi), %rdx
  33045. movq 408(%r8), %rax
  33046. movq %rdx, 400(%r8)
  33047. sbbq 408(%rsi), %rax
  33048. movq 416(%r8), %rdx
  33049. movq %rax, 408(%r8)
  33050. sbbq 416(%rsi), %rdx
  33051. movq 424(%r8), %rax
  33052. movq %rdx, 416(%r8)
  33053. sbbq 424(%rsi), %rax
  33054. movq 432(%r8), %rdx
  33055. movq %rax, 424(%r8)
  33056. sbbq 432(%rsi), %rdx
  33057. movq 440(%r8), %rax
  33058. movq %rdx, 432(%r8)
  33059. sbbq 440(%rsi), %rax
  33060. movq 448(%r8), %rdx
  33061. movq %rax, 440(%r8)
  33062. sbbq 448(%rsi), %rdx
  33063. movq 456(%r8), %rax
  33064. movq %rdx, 448(%r8)
  33065. sbbq 456(%rsi), %rax
  33066. movq 464(%r8), %rdx
  33067. movq %rax, 456(%r8)
  33068. sbbq 464(%rsi), %rdx
  33069. movq 472(%r8), %rax
  33070. movq %rdx, 464(%r8)
  33071. sbbq 472(%rsi), %rax
  33072. movq 480(%r8), %rdx
  33073. movq %rax, 472(%r8)
  33074. sbbq 480(%rsi), %rdx
  33075. movq 488(%r8), %rax
  33076. movq %rdx, 480(%r8)
  33077. sbbq 488(%rsi), %rax
  33078. movq 496(%r8), %rdx
  33079. movq %rax, 488(%r8)
  33080. sbbq 496(%rsi), %rdx
  33081. movq 504(%r8), %rax
  33082. movq %rdx, 496(%r8)
  33083. sbbq 504(%rsi), %rax
  33084. movq %rax, 504(%r8)
  33085. sbbq $0x00, %rcx
  33086. movq (%r8), %rdx
  33087. subq (%rdi), %rdx
  33088. movq 8(%r8), %rax
  33089. movq %rdx, (%r8)
  33090. sbbq 8(%rdi), %rax
  33091. movq 16(%r8), %rdx
  33092. movq %rax, 8(%r8)
  33093. sbbq 16(%rdi), %rdx
  33094. movq 24(%r8), %rax
  33095. movq %rdx, 16(%r8)
  33096. sbbq 24(%rdi), %rax
  33097. movq 32(%r8), %rdx
  33098. movq %rax, 24(%r8)
  33099. sbbq 32(%rdi), %rdx
  33100. movq 40(%r8), %rax
  33101. movq %rdx, 32(%r8)
  33102. sbbq 40(%rdi), %rax
  33103. movq 48(%r8), %rdx
  33104. movq %rax, 40(%r8)
  33105. sbbq 48(%rdi), %rdx
  33106. movq 56(%r8), %rax
  33107. movq %rdx, 48(%r8)
  33108. sbbq 56(%rdi), %rax
  33109. movq 64(%r8), %rdx
  33110. movq %rax, 56(%r8)
  33111. sbbq 64(%rdi), %rdx
  33112. movq 72(%r8), %rax
  33113. movq %rdx, 64(%r8)
  33114. sbbq 72(%rdi), %rax
  33115. movq 80(%r8), %rdx
  33116. movq %rax, 72(%r8)
  33117. sbbq 80(%rdi), %rdx
  33118. movq 88(%r8), %rax
  33119. movq %rdx, 80(%r8)
  33120. sbbq 88(%rdi), %rax
  33121. movq 96(%r8), %rdx
  33122. movq %rax, 88(%r8)
  33123. sbbq 96(%rdi), %rdx
  33124. movq 104(%r8), %rax
  33125. movq %rdx, 96(%r8)
  33126. sbbq 104(%rdi), %rax
  33127. movq 112(%r8), %rdx
  33128. movq %rax, 104(%r8)
  33129. sbbq 112(%rdi), %rdx
  33130. movq 120(%r8), %rax
  33131. movq %rdx, 112(%r8)
  33132. sbbq 120(%rdi), %rax
  33133. movq 128(%r8), %rdx
  33134. movq %rax, 120(%r8)
  33135. sbbq 128(%rdi), %rdx
  33136. movq 136(%r8), %rax
  33137. movq %rdx, 128(%r8)
  33138. sbbq 136(%rdi), %rax
  33139. movq 144(%r8), %rdx
  33140. movq %rax, 136(%r8)
  33141. sbbq 144(%rdi), %rdx
  33142. movq 152(%r8), %rax
  33143. movq %rdx, 144(%r8)
  33144. sbbq 152(%rdi), %rax
  33145. movq 160(%r8), %rdx
  33146. movq %rax, 152(%r8)
  33147. sbbq 160(%rdi), %rdx
  33148. movq 168(%r8), %rax
  33149. movq %rdx, 160(%r8)
  33150. sbbq 168(%rdi), %rax
  33151. movq 176(%r8), %rdx
  33152. movq %rax, 168(%r8)
  33153. sbbq 176(%rdi), %rdx
  33154. movq 184(%r8), %rax
  33155. movq %rdx, 176(%r8)
  33156. sbbq 184(%rdi), %rax
  33157. movq 192(%r8), %rdx
  33158. movq %rax, 184(%r8)
  33159. sbbq 192(%rdi), %rdx
  33160. movq 200(%r8), %rax
  33161. movq %rdx, 192(%r8)
  33162. sbbq 200(%rdi), %rax
  33163. movq 208(%r8), %rdx
  33164. movq %rax, 200(%r8)
  33165. sbbq 208(%rdi), %rdx
  33166. movq 216(%r8), %rax
  33167. movq %rdx, 208(%r8)
  33168. sbbq 216(%rdi), %rax
  33169. movq 224(%r8), %rdx
  33170. movq %rax, 216(%r8)
  33171. sbbq 224(%rdi), %rdx
  33172. movq 232(%r8), %rax
  33173. movq %rdx, 224(%r8)
  33174. sbbq 232(%rdi), %rax
  33175. movq 240(%r8), %rdx
  33176. movq %rax, 232(%r8)
  33177. sbbq 240(%rdi), %rdx
  33178. movq 248(%r8), %rax
  33179. movq %rdx, 240(%r8)
  33180. sbbq 248(%rdi), %rax
  33181. movq 256(%r8), %rdx
  33182. movq %rax, 248(%r8)
  33183. sbbq 256(%rdi), %rdx
  33184. movq 264(%r8), %rax
  33185. movq %rdx, 256(%r8)
  33186. sbbq 264(%rdi), %rax
  33187. movq 272(%r8), %rdx
  33188. movq %rax, 264(%r8)
  33189. sbbq 272(%rdi), %rdx
  33190. movq 280(%r8), %rax
  33191. movq %rdx, 272(%r8)
  33192. sbbq 280(%rdi), %rax
  33193. movq 288(%r8), %rdx
  33194. movq %rax, 280(%r8)
  33195. sbbq 288(%rdi), %rdx
  33196. movq 296(%r8), %rax
  33197. movq %rdx, 288(%r8)
  33198. sbbq 296(%rdi), %rax
  33199. movq 304(%r8), %rdx
  33200. movq %rax, 296(%r8)
  33201. sbbq 304(%rdi), %rdx
  33202. movq 312(%r8), %rax
  33203. movq %rdx, 304(%r8)
  33204. sbbq 312(%rdi), %rax
  33205. movq 320(%r8), %rdx
  33206. movq %rax, 312(%r8)
  33207. sbbq 320(%rdi), %rdx
  33208. movq 328(%r8), %rax
  33209. movq %rdx, 320(%r8)
  33210. sbbq 328(%rdi), %rax
  33211. movq 336(%r8), %rdx
  33212. movq %rax, 328(%r8)
  33213. sbbq 336(%rdi), %rdx
  33214. movq 344(%r8), %rax
  33215. movq %rdx, 336(%r8)
  33216. sbbq 344(%rdi), %rax
  33217. movq 352(%r8), %rdx
  33218. movq %rax, 344(%r8)
  33219. sbbq 352(%rdi), %rdx
  33220. movq 360(%r8), %rax
  33221. movq %rdx, 352(%r8)
  33222. sbbq 360(%rdi), %rax
  33223. movq 368(%r8), %rdx
  33224. movq %rax, 360(%r8)
  33225. sbbq 368(%rdi), %rdx
  33226. movq 376(%r8), %rax
  33227. movq %rdx, 368(%r8)
  33228. sbbq 376(%rdi), %rax
  33229. movq 384(%r8), %rdx
  33230. movq %rax, 376(%r8)
  33231. sbbq 384(%rdi), %rdx
  33232. movq 392(%r8), %rax
  33233. movq %rdx, 384(%r8)
  33234. sbbq 392(%rdi), %rax
  33235. movq 400(%r8), %rdx
  33236. movq %rax, 392(%r8)
  33237. sbbq 400(%rdi), %rdx
  33238. movq 408(%r8), %rax
  33239. movq %rdx, 400(%r8)
  33240. sbbq 408(%rdi), %rax
  33241. movq 416(%r8), %rdx
  33242. movq %rax, 408(%r8)
  33243. sbbq 416(%rdi), %rdx
  33244. movq 424(%r8), %rax
  33245. movq %rdx, 416(%r8)
  33246. sbbq 424(%rdi), %rax
  33247. movq 432(%r8), %rdx
  33248. movq %rax, 424(%r8)
  33249. sbbq 432(%rdi), %rdx
  33250. movq 440(%r8), %rax
  33251. movq %rdx, 432(%r8)
  33252. sbbq 440(%rdi), %rax
  33253. movq 448(%r8), %rdx
  33254. movq %rax, 440(%r8)
  33255. sbbq 448(%rdi), %rdx
  33256. movq 456(%r8), %rax
  33257. movq %rdx, 448(%r8)
  33258. sbbq 456(%rdi), %rax
  33259. movq 464(%r8), %rdx
  33260. movq %rax, 456(%r8)
  33261. sbbq 464(%rdi), %rdx
  33262. movq 472(%r8), %rax
  33263. movq %rdx, 464(%r8)
  33264. sbbq 472(%rdi), %rax
  33265. movq 480(%r8), %rdx
  33266. movq %rax, 472(%r8)
  33267. sbbq 480(%rdi), %rdx
  33268. movq 488(%r8), %rax
  33269. movq %rdx, 480(%r8)
  33270. sbbq 488(%rdi), %rax
  33271. movq 496(%r8), %rdx
  33272. movq %rax, 488(%r8)
  33273. sbbq 496(%rdi), %rdx
  33274. movq 504(%r8), %rax
  33275. movq %rdx, 496(%r8)
  33276. sbbq 504(%rdi), %rax
  33277. movq %rax, 504(%r8)
  33278. sbbq $0x00, %rcx
  33279. # Add in place
  33280. movq 256(%rdi), %rdx
  33281. addq (%r8), %rdx
  33282. movq 264(%rdi), %rax
  33283. movq %rdx, 256(%rdi)
  33284. adcq 8(%r8), %rax
  33285. movq 272(%rdi), %rdx
  33286. movq %rax, 264(%rdi)
  33287. adcq 16(%r8), %rdx
  33288. movq 280(%rdi), %rax
  33289. movq %rdx, 272(%rdi)
  33290. adcq 24(%r8), %rax
  33291. movq 288(%rdi), %rdx
  33292. movq %rax, 280(%rdi)
  33293. adcq 32(%r8), %rdx
  33294. movq 296(%rdi), %rax
  33295. movq %rdx, 288(%rdi)
  33296. adcq 40(%r8), %rax
  33297. movq 304(%rdi), %rdx
  33298. movq %rax, 296(%rdi)
  33299. adcq 48(%r8), %rdx
  33300. movq 312(%rdi), %rax
  33301. movq %rdx, 304(%rdi)
  33302. adcq 56(%r8), %rax
  33303. movq 320(%rdi), %rdx
  33304. movq %rax, 312(%rdi)
  33305. adcq 64(%r8), %rdx
  33306. movq 328(%rdi), %rax
  33307. movq %rdx, 320(%rdi)
  33308. adcq 72(%r8), %rax
  33309. movq 336(%rdi), %rdx
  33310. movq %rax, 328(%rdi)
  33311. adcq 80(%r8), %rdx
  33312. movq 344(%rdi), %rax
  33313. movq %rdx, 336(%rdi)
  33314. adcq 88(%r8), %rax
  33315. movq 352(%rdi), %rdx
  33316. movq %rax, 344(%rdi)
  33317. adcq 96(%r8), %rdx
  33318. movq 360(%rdi), %rax
  33319. movq %rdx, 352(%rdi)
  33320. adcq 104(%r8), %rax
  33321. movq 368(%rdi), %rdx
  33322. movq %rax, 360(%rdi)
  33323. adcq 112(%r8), %rdx
  33324. movq 376(%rdi), %rax
  33325. movq %rdx, 368(%rdi)
  33326. adcq 120(%r8), %rax
  33327. movq 384(%rdi), %rdx
  33328. movq %rax, 376(%rdi)
  33329. adcq 128(%r8), %rdx
  33330. movq 392(%rdi), %rax
  33331. movq %rdx, 384(%rdi)
  33332. adcq 136(%r8), %rax
  33333. movq 400(%rdi), %rdx
  33334. movq %rax, 392(%rdi)
  33335. adcq 144(%r8), %rdx
  33336. movq 408(%rdi), %rax
  33337. movq %rdx, 400(%rdi)
  33338. adcq 152(%r8), %rax
  33339. movq 416(%rdi), %rdx
  33340. movq %rax, 408(%rdi)
  33341. adcq 160(%r8), %rdx
  33342. movq 424(%rdi), %rax
  33343. movq %rdx, 416(%rdi)
  33344. adcq 168(%r8), %rax
  33345. movq 432(%rdi), %rdx
  33346. movq %rax, 424(%rdi)
  33347. adcq 176(%r8), %rdx
  33348. movq 440(%rdi), %rax
  33349. movq %rdx, 432(%rdi)
  33350. adcq 184(%r8), %rax
  33351. movq 448(%rdi), %rdx
  33352. movq %rax, 440(%rdi)
  33353. adcq 192(%r8), %rdx
  33354. movq 456(%rdi), %rax
  33355. movq %rdx, 448(%rdi)
  33356. adcq 200(%r8), %rax
  33357. movq 464(%rdi), %rdx
  33358. movq %rax, 456(%rdi)
  33359. adcq 208(%r8), %rdx
  33360. movq 472(%rdi), %rax
  33361. movq %rdx, 464(%rdi)
  33362. adcq 216(%r8), %rax
  33363. movq 480(%rdi), %rdx
  33364. movq %rax, 472(%rdi)
  33365. adcq 224(%r8), %rdx
  33366. movq 488(%rdi), %rax
  33367. movq %rdx, 480(%rdi)
  33368. adcq 232(%r8), %rax
  33369. movq 496(%rdi), %rdx
  33370. movq %rax, 488(%rdi)
  33371. adcq 240(%r8), %rdx
  33372. movq 504(%rdi), %rax
  33373. movq %rdx, 496(%rdi)
  33374. adcq 248(%r8), %rax
  33375. movq 512(%rdi), %rdx
  33376. movq %rax, 504(%rdi)
  33377. adcq 256(%r8), %rdx
  33378. movq 520(%rdi), %rax
  33379. movq %rdx, 512(%rdi)
  33380. adcq 264(%r8), %rax
  33381. movq 528(%rdi), %rdx
  33382. movq %rax, 520(%rdi)
  33383. adcq 272(%r8), %rdx
  33384. movq 536(%rdi), %rax
  33385. movq %rdx, 528(%rdi)
  33386. adcq 280(%r8), %rax
  33387. movq 544(%rdi), %rdx
  33388. movq %rax, 536(%rdi)
  33389. adcq 288(%r8), %rdx
  33390. movq 552(%rdi), %rax
  33391. movq %rdx, 544(%rdi)
  33392. adcq 296(%r8), %rax
  33393. movq 560(%rdi), %rdx
  33394. movq %rax, 552(%rdi)
  33395. adcq 304(%r8), %rdx
  33396. movq 568(%rdi), %rax
  33397. movq %rdx, 560(%rdi)
  33398. adcq 312(%r8), %rax
  33399. movq 576(%rdi), %rdx
  33400. movq %rax, 568(%rdi)
  33401. adcq 320(%r8), %rdx
  33402. movq 584(%rdi), %rax
  33403. movq %rdx, 576(%rdi)
  33404. adcq 328(%r8), %rax
  33405. movq 592(%rdi), %rdx
  33406. movq %rax, 584(%rdi)
  33407. adcq 336(%r8), %rdx
  33408. movq 600(%rdi), %rax
  33409. movq %rdx, 592(%rdi)
  33410. adcq 344(%r8), %rax
  33411. movq 608(%rdi), %rdx
  33412. movq %rax, 600(%rdi)
  33413. adcq 352(%r8), %rdx
  33414. movq 616(%rdi), %rax
  33415. movq %rdx, 608(%rdi)
  33416. adcq 360(%r8), %rax
  33417. movq 624(%rdi), %rdx
  33418. movq %rax, 616(%rdi)
  33419. adcq 368(%r8), %rdx
  33420. movq 632(%rdi), %rax
  33421. movq %rdx, 624(%rdi)
  33422. adcq 376(%r8), %rax
  33423. movq 640(%rdi), %rdx
  33424. movq %rax, 632(%rdi)
  33425. adcq 384(%r8), %rdx
  33426. movq 648(%rdi), %rax
  33427. movq %rdx, 640(%rdi)
  33428. adcq 392(%r8), %rax
  33429. movq 656(%rdi), %rdx
  33430. movq %rax, 648(%rdi)
  33431. adcq 400(%r8), %rdx
  33432. movq 664(%rdi), %rax
  33433. movq %rdx, 656(%rdi)
  33434. adcq 408(%r8), %rax
  33435. movq 672(%rdi), %rdx
  33436. movq %rax, 664(%rdi)
  33437. adcq 416(%r8), %rdx
  33438. movq 680(%rdi), %rax
  33439. movq %rdx, 672(%rdi)
  33440. adcq 424(%r8), %rax
  33441. movq 688(%rdi), %rdx
  33442. movq %rax, 680(%rdi)
  33443. adcq 432(%r8), %rdx
  33444. movq 696(%rdi), %rax
  33445. movq %rdx, 688(%rdi)
  33446. adcq 440(%r8), %rax
  33447. movq 704(%rdi), %rdx
  33448. movq %rax, 696(%rdi)
  33449. adcq 448(%r8), %rdx
  33450. movq 712(%rdi), %rax
  33451. movq %rdx, 704(%rdi)
  33452. adcq 456(%r8), %rax
  33453. movq 720(%rdi), %rdx
  33454. movq %rax, 712(%rdi)
  33455. adcq 464(%r8), %rdx
  33456. movq 728(%rdi), %rax
  33457. movq %rdx, 720(%rdi)
  33458. adcq 472(%r8), %rax
  33459. movq 736(%rdi), %rdx
  33460. movq %rax, 728(%rdi)
  33461. adcq 480(%r8), %rdx
  33462. movq 744(%rdi), %rax
  33463. movq %rdx, 736(%rdi)
  33464. adcq 488(%r8), %rax
  33465. movq 752(%rdi), %rdx
  33466. movq %rax, 744(%rdi)
  33467. adcq 496(%r8), %rdx
  33468. movq 760(%rdi), %rax
  33469. movq %rdx, 752(%rdi)
  33470. adcq 504(%r8), %rax
  33471. movq %rax, 760(%rdi)
  33472. adcq $0x00, %rcx
  33473. movq %rcx, 768(%rdi)
  33474. # Add in place
  33475. movq 512(%rdi), %rdx
  33476. xorq %rcx, %rcx
  33477. addq (%rsi), %rdx
  33478. movq 520(%rdi), %rax
  33479. movq %rdx, 512(%rdi)
  33480. adcq 8(%rsi), %rax
  33481. movq 528(%rdi), %rdx
  33482. movq %rax, 520(%rdi)
  33483. adcq 16(%rsi), %rdx
  33484. movq 536(%rdi), %rax
  33485. movq %rdx, 528(%rdi)
  33486. adcq 24(%rsi), %rax
  33487. movq 544(%rdi), %rdx
  33488. movq %rax, 536(%rdi)
  33489. adcq 32(%rsi), %rdx
  33490. movq 552(%rdi), %rax
  33491. movq %rdx, 544(%rdi)
  33492. adcq 40(%rsi), %rax
  33493. movq 560(%rdi), %rdx
  33494. movq %rax, 552(%rdi)
  33495. adcq 48(%rsi), %rdx
  33496. movq 568(%rdi), %rax
  33497. movq %rdx, 560(%rdi)
  33498. adcq 56(%rsi), %rax
  33499. movq 576(%rdi), %rdx
  33500. movq %rax, 568(%rdi)
  33501. adcq 64(%rsi), %rdx
  33502. movq 584(%rdi), %rax
  33503. movq %rdx, 576(%rdi)
  33504. adcq 72(%rsi), %rax
  33505. movq 592(%rdi), %rdx
  33506. movq %rax, 584(%rdi)
  33507. adcq 80(%rsi), %rdx
  33508. movq 600(%rdi), %rax
  33509. movq %rdx, 592(%rdi)
  33510. adcq 88(%rsi), %rax
  33511. movq 608(%rdi), %rdx
  33512. movq %rax, 600(%rdi)
  33513. adcq 96(%rsi), %rdx
  33514. movq 616(%rdi), %rax
  33515. movq %rdx, 608(%rdi)
  33516. adcq 104(%rsi), %rax
  33517. movq 624(%rdi), %rdx
  33518. movq %rax, 616(%rdi)
  33519. adcq 112(%rsi), %rdx
  33520. movq 632(%rdi), %rax
  33521. movq %rdx, 624(%rdi)
  33522. adcq 120(%rsi), %rax
  33523. movq 640(%rdi), %rdx
  33524. movq %rax, 632(%rdi)
  33525. adcq 128(%rsi), %rdx
  33526. movq 648(%rdi), %rax
  33527. movq %rdx, 640(%rdi)
  33528. adcq 136(%rsi), %rax
  33529. movq 656(%rdi), %rdx
  33530. movq %rax, 648(%rdi)
  33531. adcq 144(%rsi), %rdx
  33532. movq 664(%rdi), %rax
  33533. movq %rdx, 656(%rdi)
  33534. adcq 152(%rsi), %rax
  33535. movq 672(%rdi), %rdx
  33536. movq %rax, 664(%rdi)
  33537. adcq 160(%rsi), %rdx
  33538. movq 680(%rdi), %rax
  33539. movq %rdx, 672(%rdi)
  33540. adcq 168(%rsi), %rax
  33541. movq 688(%rdi), %rdx
  33542. movq %rax, 680(%rdi)
  33543. adcq 176(%rsi), %rdx
  33544. movq 696(%rdi), %rax
  33545. movq %rdx, 688(%rdi)
  33546. adcq 184(%rsi), %rax
  33547. movq 704(%rdi), %rdx
  33548. movq %rax, 696(%rdi)
  33549. adcq 192(%rsi), %rdx
  33550. movq 712(%rdi), %rax
  33551. movq %rdx, 704(%rdi)
  33552. adcq 200(%rsi), %rax
  33553. movq 720(%rdi), %rdx
  33554. movq %rax, 712(%rdi)
  33555. adcq 208(%rsi), %rdx
  33556. movq 728(%rdi), %rax
  33557. movq %rdx, 720(%rdi)
  33558. adcq 216(%rsi), %rax
  33559. movq 736(%rdi), %rdx
  33560. movq %rax, 728(%rdi)
  33561. adcq 224(%rsi), %rdx
  33562. movq 744(%rdi), %rax
  33563. movq %rdx, 736(%rdi)
  33564. adcq 232(%rsi), %rax
  33565. movq 752(%rdi), %rdx
  33566. movq %rax, 744(%rdi)
  33567. adcq 240(%rsi), %rdx
  33568. movq 760(%rdi), %rax
  33569. movq %rdx, 752(%rdi)
  33570. adcq 248(%rsi), %rax
  33571. movq 768(%rdi), %rdx
  33572. movq %rax, 760(%rdi)
  33573. adcq 256(%rsi), %rdx
  33574. movq %rdx, 768(%rdi)
  33575. adcq $0x00, %rcx
  33576. # Add to zero
  33577. movq 264(%rsi), %rdx
  33578. adcq $0x00, %rdx
  33579. movq 272(%rsi), %rax
  33580. movq %rdx, 776(%rdi)
  33581. adcq $0x00, %rax
  33582. movq 280(%rsi), %rdx
  33583. movq %rax, 784(%rdi)
  33584. adcq $0x00, %rdx
  33585. movq 288(%rsi), %rax
  33586. movq %rdx, 792(%rdi)
  33587. adcq $0x00, %rax
  33588. movq 296(%rsi), %rdx
  33589. movq %rax, 800(%rdi)
  33590. adcq $0x00, %rdx
  33591. movq 304(%rsi), %rax
  33592. movq %rdx, 808(%rdi)
  33593. adcq $0x00, %rax
  33594. movq 312(%rsi), %rdx
  33595. movq %rax, 816(%rdi)
  33596. adcq $0x00, %rdx
  33597. movq 320(%rsi), %rax
  33598. movq %rdx, 824(%rdi)
  33599. adcq $0x00, %rax
  33600. movq 328(%rsi), %rdx
  33601. movq %rax, 832(%rdi)
  33602. adcq $0x00, %rdx
  33603. movq 336(%rsi), %rax
  33604. movq %rdx, 840(%rdi)
  33605. adcq $0x00, %rax
  33606. movq 344(%rsi), %rdx
  33607. movq %rax, 848(%rdi)
  33608. adcq $0x00, %rdx
  33609. movq 352(%rsi), %rax
  33610. movq %rdx, 856(%rdi)
  33611. adcq $0x00, %rax
  33612. movq 360(%rsi), %rdx
  33613. movq %rax, 864(%rdi)
  33614. adcq $0x00, %rdx
  33615. movq 368(%rsi), %rax
  33616. movq %rdx, 872(%rdi)
  33617. adcq $0x00, %rax
  33618. movq 376(%rsi), %rdx
  33619. movq %rax, 880(%rdi)
  33620. adcq $0x00, %rdx
  33621. movq 384(%rsi), %rax
  33622. movq %rdx, 888(%rdi)
  33623. adcq $0x00, %rax
  33624. movq 392(%rsi), %rdx
  33625. movq %rax, 896(%rdi)
  33626. adcq $0x00, %rdx
  33627. movq 400(%rsi), %rax
  33628. movq %rdx, 904(%rdi)
  33629. adcq $0x00, %rax
  33630. movq 408(%rsi), %rdx
  33631. movq %rax, 912(%rdi)
  33632. adcq $0x00, %rdx
  33633. movq 416(%rsi), %rax
  33634. movq %rdx, 920(%rdi)
  33635. adcq $0x00, %rax
  33636. movq 424(%rsi), %rdx
  33637. movq %rax, 928(%rdi)
  33638. adcq $0x00, %rdx
  33639. movq 432(%rsi), %rax
  33640. movq %rdx, 936(%rdi)
  33641. adcq $0x00, %rax
  33642. movq 440(%rsi), %rdx
  33643. movq %rax, 944(%rdi)
  33644. adcq $0x00, %rdx
  33645. movq 448(%rsi), %rax
  33646. movq %rdx, 952(%rdi)
  33647. adcq $0x00, %rax
  33648. movq 456(%rsi), %rdx
  33649. movq %rax, 960(%rdi)
  33650. adcq $0x00, %rdx
  33651. movq 464(%rsi), %rax
  33652. movq %rdx, 968(%rdi)
  33653. adcq $0x00, %rax
  33654. movq 472(%rsi), %rdx
  33655. movq %rax, 976(%rdi)
  33656. adcq $0x00, %rdx
  33657. movq 480(%rsi), %rax
  33658. movq %rdx, 984(%rdi)
  33659. adcq $0x00, %rax
  33660. movq 488(%rsi), %rdx
  33661. movq %rax, 992(%rdi)
  33662. adcq $0x00, %rdx
  33663. movq 496(%rsi), %rax
  33664. movq %rdx, 1000(%rdi)
  33665. adcq $0x00, %rax
  33666. movq 504(%rsi), %rdx
  33667. movq %rax, 1008(%rdi)
  33668. adcq $0x00, %rdx
  33669. movq %rdx, 1016(%rdi)
  33670. addq $0x518, %rsp
  33671. repz retq
  33672. #ifndef __APPLE__
  33673. .size sp_4096_sqr_avx2_64,.-sp_4096_sqr_avx2_64
  33674. #endif /* __APPLE__ */
  33675. /* Mul a by digit b into r. (r = a * b)
  33676. *
  33677. * r A single precision integer.
  33678. * a A single precision integer.
  33679. * b A single precision digit.
  33680. */
  33681. #ifndef __APPLE__
  33682. .text
  33683. .globl sp_4096_mul_d_64
  33684. .type sp_4096_mul_d_64,@function
  33685. .align 16
  33686. sp_4096_mul_d_64:
  33687. #else
  33688. .section __TEXT,__text
  33689. .globl _sp_4096_mul_d_64
  33690. .p2align 4
  33691. _sp_4096_mul_d_64:
  33692. #endif /* __APPLE__ */
  33693. movq %rdx, %rcx
  33694. # A[0] * B
  33695. movq %rcx, %rax
  33696. xorq %r10, %r10
  33697. mulq (%rsi)
  33698. movq %rax, %r8
  33699. movq %rdx, %r9
  33700. movq %r8, (%rdi)
  33701. # A[1] * B
  33702. movq %rcx, %rax
  33703. xorq %r8, %r8
  33704. mulq 8(%rsi)
  33705. addq %rax, %r9
  33706. movq %r9, 8(%rdi)
  33707. adcq %rdx, %r10
  33708. adcq $0x00, %r8
  33709. # A[2] * B
  33710. movq %rcx, %rax
  33711. xorq %r9, %r9
  33712. mulq 16(%rsi)
  33713. addq %rax, %r10
  33714. movq %r10, 16(%rdi)
  33715. adcq %rdx, %r8
  33716. adcq $0x00, %r9
  33717. # A[3] * B
  33718. movq %rcx, %rax
  33719. xorq %r10, %r10
  33720. mulq 24(%rsi)
  33721. addq %rax, %r8
  33722. movq %r8, 24(%rdi)
  33723. adcq %rdx, %r9
  33724. adcq $0x00, %r10
  33725. # A[4] * B
  33726. movq %rcx, %rax
  33727. xorq %r8, %r8
  33728. mulq 32(%rsi)
  33729. addq %rax, %r9
  33730. movq %r9, 32(%rdi)
  33731. adcq %rdx, %r10
  33732. adcq $0x00, %r8
  33733. # A[5] * B
  33734. movq %rcx, %rax
  33735. xorq %r9, %r9
  33736. mulq 40(%rsi)
  33737. addq %rax, %r10
  33738. movq %r10, 40(%rdi)
  33739. adcq %rdx, %r8
  33740. adcq $0x00, %r9
  33741. # A[6] * B
  33742. movq %rcx, %rax
  33743. xorq %r10, %r10
  33744. mulq 48(%rsi)
  33745. addq %rax, %r8
  33746. movq %r8, 48(%rdi)
  33747. adcq %rdx, %r9
  33748. adcq $0x00, %r10
  33749. # A[7] * B
  33750. movq %rcx, %rax
  33751. xorq %r8, %r8
  33752. mulq 56(%rsi)
  33753. addq %rax, %r9
  33754. movq %r9, 56(%rdi)
  33755. adcq %rdx, %r10
  33756. adcq $0x00, %r8
  33757. # A[8] * B
  33758. movq %rcx, %rax
  33759. xorq %r9, %r9
  33760. mulq 64(%rsi)
  33761. addq %rax, %r10
  33762. movq %r10, 64(%rdi)
  33763. adcq %rdx, %r8
  33764. adcq $0x00, %r9
  33765. # A[9] * B
  33766. movq %rcx, %rax
  33767. xorq %r10, %r10
  33768. mulq 72(%rsi)
  33769. addq %rax, %r8
  33770. movq %r8, 72(%rdi)
  33771. adcq %rdx, %r9
  33772. adcq $0x00, %r10
  33773. # A[10] * B
  33774. movq %rcx, %rax
  33775. xorq %r8, %r8
  33776. mulq 80(%rsi)
  33777. addq %rax, %r9
  33778. movq %r9, 80(%rdi)
  33779. adcq %rdx, %r10
  33780. adcq $0x00, %r8
  33781. # A[11] * B
  33782. movq %rcx, %rax
  33783. xorq %r9, %r9
  33784. mulq 88(%rsi)
  33785. addq %rax, %r10
  33786. movq %r10, 88(%rdi)
  33787. adcq %rdx, %r8
  33788. adcq $0x00, %r9
  33789. # A[12] * B
  33790. movq %rcx, %rax
  33791. xorq %r10, %r10
  33792. mulq 96(%rsi)
  33793. addq %rax, %r8
  33794. movq %r8, 96(%rdi)
  33795. adcq %rdx, %r9
  33796. adcq $0x00, %r10
  33797. # A[13] * B
  33798. movq %rcx, %rax
  33799. xorq %r8, %r8
  33800. mulq 104(%rsi)
  33801. addq %rax, %r9
  33802. movq %r9, 104(%rdi)
  33803. adcq %rdx, %r10
  33804. adcq $0x00, %r8
  33805. # A[14] * B
  33806. movq %rcx, %rax
  33807. xorq %r9, %r9
  33808. mulq 112(%rsi)
  33809. addq %rax, %r10
  33810. movq %r10, 112(%rdi)
  33811. adcq %rdx, %r8
  33812. adcq $0x00, %r9
  33813. # A[15] * B
  33814. movq %rcx, %rax
  33815. xorq %r10, %r10
  33816. mulq 120(%rsi)
  33817. addq %rax, %r8
  33818. movq %r8, 120(%rdi)
  33819. adcq %rdx, %r9
  33820. adcq $0x00, %r10
  33821. # A[16] * B
  33822. movq %rcx, %rax
  33823. xorq %r8, %r8
  33824. mulq 128(%rsi)
  33825. addq %rax, %r9
  33826. movq %r9, 128(%rdi)
  33827. adcq %rdx, %r10
  33828. adcq $0x00, %r8
  33829. # A[17] * B
  33830. movq %rcx, %rax
  33831. xorq %r9, %r9
  33832. mulq 136(%rsi)
  33833. addq %rax, %r10
  33834. movq %r10, 136(%rdi)
  33835. adcq %rdx, %r8
  33836. adcq $0x00, %r9
  33837. # A[18] * B
  33838. movq %rcx, %rax
  33839. xorq %r10, %r10
  33840. mulq 144(%rsi)
  33841. addq %rax, %r8
  33842. movq %r8, 144(%rdi)
  33843. adcq %rdx, %r9
  33844. adcq $0x00, %r10
  33845. # A[19] * B
  33846. movq %rcx, %rax
  33847. xorq %r8, %r8
  33848. mulq 152(%rsi)
  33849. addq %rax, %r9
  33850. movq %r9, 152(%rdi)
  33851. adcq %rdx, %r10
  33852. adcq $0x00, %r8
  33853. # A[20] * B
  33854. movq %rcx, %rax
  33855. xorq %r9, %r9
  33856. mulq 160(%rsi)
  33857. addq %rax, %r10
  33858. movq %r10, 160(%rdi)
  33859. adcq %rdx, %r8
  33860. adcq $0x00, %r9
  33861. # A[21] * B
  33862. movq %rcx, %rax
  33863. xorq %r10, %r10
  33864. mulq 168(%rsi)
  33865. addq %rax, %r8
  33866. movq %r8, 168(%rdi)
  33867. adcq %rdx, %r9
  33868. adcq $0x00, %r10
  33869. # A[22] * B
  33870. movq %rcx, %rax
  33871. xorq %r8, %r8
  33872. mulq 176(%rsi)
  33873. addq %rax, %r9
  33874. movq %r9, 176(%rdi)
  33875. adcq %rdx, %r10
  33876. adcq $0x00, %r8
  33877. # A[23] * B
  33878. movq %rcx, %rax
  33879. xorq %r9, %r9
  33880. mulq 184(%rsi)
  33881. addq %rax, %r10
  33882. movq %r10, 184(%rdi)
  33883. adcq %rdx, %r8
  33884. adcq $0x00, %r9
  33885. # A[24] * B
  33886. movq %rcx, %rax
  33887. xorq %r10, %r10
  33888. mulq 192(%rsi)
  33889. addq %rax, %r8
  33890. movq %r8, 192(%rdi)
  33891. adcq %rdx, %r9
  33892. adcq $0x00, %r10
  33893. # A[25] * B
  33894. movq %rcx, %rax
  33895. xorq %r8, %r8
  33896. mulq 200(%rsi)
  33897. addq %rax, %r9
  33898. movq %r9, 200(%rdi)
  33899. adcq %rdx, %r10
  33900. adcq $0x00, %r8
  33901. # A[26] * B
  33902. movq %rcx, %rax
  33903. xorq %r9, %r9
  33904. mulq 208(%rsi)
  33905. addq %rax, %r10
  33906. movq %r10, 208(%rdi)
  33907. adcq %rdx, %r8
  33908. adcq $0x00, %r9
  33909. # A[27] * B
  33910. movq %rcx, %rax
  33911. xorq %r10, %r10
  33912. mulq 216(%rsi)
  33913. addq %rax, %r8
  33914. movq %r8, 216(%rdi)
  33915. adcq %rdx, %r9
  33916. adcq $0x00, %r10
  33917. # A[28] * B
  33918. movq %rcx, %rax
  33919. xorq %r8, %r8
  33920. mulq 224(%rsi)
  33921. addq %rax, %r9
  33922. movq %r9, 224(%rdi)
  33923. adcq %rdx, %r10
  33924. adcq $0x00, %r8
  33925. # A[29] * B
  33926. movq %rcx, %rax
  33927. xorq %r9, %r9
  33928. mulq 232(%rsi)
  33929. addq %rax, %r10
  33930. movq %r10, 232(%rdi)
  33931. adcq %rdx, %r8
  33932. adcq $0x00, %r9
  33933. # A[30] * B
  33934. movq %rcx, %rax
  33935. xorq %r10, %r10
  33936. mulq 240(%rsi)
  33937. addq %rax, %r8
  33938. movq %r8, 240(%rdi)
  33939. adcq %rdx, %r9
  33940. adcq $0x00, %r10
  33941. # A[31] * B
  33942. movq %rcx, %rax
  33943. xorq %r8, %r8
  33944. mulq 248(%rsi)
  33945. addq %rax, %r9
  33946. movq %r9, 248(%rdi)
  33947. adcq %rdx, %r10
  33948. adcq $0x00, %r8
  33949. # A[32] * B
  33950. movq %rcx, %rax
  33951. xorq %r9, %r9
  33952. mulq 256(%rsi)
  33953. addq %rax, %r10
  33954. movq %r10, 256(%rdi)
  33955. adcq %rdx, %r8
  33956. adcq $0x00, %r9
  33957. # A[33] * B
  33958. movq %rcx, %rax
  33959. xorq %r10, %r10
  33960. mulq 264(%rsi)
  33961. addq %rax, %r8
  33962. movq %r8, 264(%rdi)
  33963. adcq %rdx, %r9
  33964. adcq $0x00, %r10
  33965. # A[34] * B
  33966. movq %rcx, %rax
  33967. xorq %r8, %r8
  33968. mulq 272(%rsi)
  33969. addq %rax, %r9
  33970. movq %r9, 272(%rdi)
  33971. adcq %rdx, %r10
  33972. adcq $0x00, %r8
  33973. # A[35] * B
  33974. movq %rcx, %rax
  33975. xorq %r9, %r9
  33976. mulq 280(%rsi)
  33977. addq %rax, %r10
  33978. movq %r10, 280(%rdi)
  33979. adcq %rdx, %r8
  33980. adcq $0x00, %r9
  33981. # A[36] * B
  33982. movq %rcx, %rax
  33983. xorq %r10, %r10
  33984. mulq 288(%rsi)
  33985. addq %rax, %r8
  33986. movq %r8, 288(%rdi)
  33987. adcq %rdx, %r9
  33988. adcq $0x00, %r10
  33989. # A[37] * B
  33990. movq %rcx, %rax
  33991. xorq %r8, %r8
  33992. mulq 296(%rsi)
  33993. addq %rax, %r9
  33994. movq %r9, 296(%rdi)
  33995. adcq %rdx, %r10
  33996. adcq $0x00, %r8
  33997. # A[38] * B
  33998. movq %rcx, %rax
  33999. xorq %r9, %r9
  34000. mulq 304(%rsi)
  34001. addq %rax, %r10
  34002. movq %r10, 304(%rdi)
  34003. adcq %rdx, %r8
  34004. adcq $0x00, %r9
  34005. # A[39] * B
  34006. movq %rcx, %rax
  34007. xorq %r10, %r10
  34008. mulq 312(%rsi)
  34009. addq %rax, %r8
  34010. movq %r8, 312(%rdi)
  34011. adcq %rdx, %r9
  34012. adcq $0x00, %r10
  34013. # A[40] * B
  34014. movq %rcx, %rax
  34015. xorq %r8, %r8
  34016. mulq 320(%rsi)
  34017. addq %rax, %r9
  34018. movq %r9, 320(%rdi)
  34019. adcq %rdx, %r10
  34020. adcq $0x00, %r8
  34021. # A[41] * B
  34022. movq %rcx, %rax
  34023. xorq %r9, %r9
  34024. mulq 328(%rsi)
  34025. addq %rax, %r10
  34026. movq %r10, 328(%rdi)
  34027. adcq %rdx, %r8
  34028. adcq $0x00, %r9
  34029. # A[42] * B
  34030. movq %rcx, %rax
  34031. xorq %r10, %r10
  34032. mulq 336(%rsi)
  34033. addq %rax, %r8
  34034. movq %r8, 336(%rdi)
  34035. adcq %rdx, %r9
  34036. adcq $0x00, %r10
  34037. # A[43] * B
  34038. movq %rcx, %rax
  34039. xorq %r8, %r8
  34040. mulq 344(%rsi)
  34041. addq %rax, %r9
  34042. movq %r9, 344(%rdi)
  34043. adcq %rdx, %r10
  34044. adcq $0x00, %r8
  34045. # A[44] * B
  34046. movq %rcx, %rax
  34047. xorq %r9, %r9
  34048. mulq 352(%rsi)
  34049. addq %rax, %r10
  34050. movq %r10, 352(%rdi)
  34051. adcq %rdx, %r8
  34052. adcq $0x00, %r9
  34053. # A[45] * B
  34054. movq %rcx, %rax
  34055. xorq %r10, %r10
  34056. mulq 360(%rsi)
  34057. addq %rax, %r8
  34058. movq %r8, 360(%rdi)
  34059. adcq %rdx, %r9
  34060. adcq $0x00, %r10
  34061. # A[46] * B
  34062. movq %rcx, %rax
  34063. xorq %r8, %r8
  34064. mulq 368(%rsi)
  34065. addq %rax, %r9
  34066. movq %r9, 368(%rdi)
  34067. adcq %rdx, %r10
  34068. adcq $0x00, %r8
  34069. # A[47] * B
  34070. movq %rcx, %rax
  34071. xorq %r9, %r9
  34072. mulq 376(%rsi)
  34073. addq %rax, %r10
  34074. movq %r10, 376(%rdi)
  34075. adcq %rdx, %r8
  34076. adcq $0x00, %r9
  34077. # A[48] * B
  34078. movq %rcx, %rax
  34079. xorq %r10, %r10
  34080. mulq 384(%rsi)
  34081. addq %rax, %r8
  34082. movq %r8, 384(%rdi)
  34083. adcq %rdx, %r9
  34084. adcq $0x00, %r10
  34085. # A[49] * B
  34086. movq %rcx, %rax
  34087. xorq %r8, %r8
  34088. mulq 392(%rsi)
  34089. addq %rax, %r9
  34090. movq %r9, 392(%rdi)
  34091. adcq %rdx, %r10
  34092. adcq $0x00, %r8
  34093. # A[50] * B
  34094. movq %rcx, %rax
  34095. xorq %r9, %r9
  34096. mulq 400(%rsi)
  34097. addq %rax, %r10
  34098. movq %r10, 400(%rdi)
  34099. adcq %rdx, %r8
  34100. adcq $0x00, %r9
  34101. # A[51] * B
  34102. movq %rcx, %rax
  34103. xorq %r10, %r10
  34104. mulq 408(%rsi)
  34105. addq %rax, %r8
  34106. movq %r8, 408(%rdi)
  34107. adcq %rdx, %r9
  34108. adcq $0x00, %r10
  34109. # A[52] * B
  34110. movq %rcx, %rax
  34111. xorq %r8, %r8
  34112. mulq 416(%rsi)
  34113. addq %rax, %r9
  34114. movq %r9, 416(%rdi)
  34115. adcq %rdx, %r10
  34116. adcq $0x00, %r8
  34117. # A[53] * B
  34118. movq %rcx, %rax
  34119. xorq %r9, %r9
  34120. mulq 424(%rsi)
  34121. addq %rax, %r10
  34122. movq %r10, 424(%rdi)
  34123. adcq %rdx, %r8
  34124. adcq $0x00, %r9
  34125. # A[54] * B
  34126. movq %rcx, %rax
  34127. xorq %r10, %r10
  34128. mulq 432(%rsi)
  34129. addq %rax, %r8
  34130. movq %r8, 432(%rdi)
  34131. adcq %rdx, %r9
  34132. adcq $0x00, %r10
  34133. # A[55] * B
  34134. movq %rcx, %rax
  34135. xorq %r8, %r8
  34136. mulq 440(%rsi)
  34137. addq %rax, %r9
  34138. movq %r9, 440(%rdi)
  34139. adcq %rdx, %r10
  34140. adcq $0x00, %r8
  34141. # A[56] * B
  34142. movq %rcx, %rax
  34143. xorq %r9, %r9
  34144. mulq 448(%rsi)
  34145. addq %rax, %r10
  34146. movq %r10, 448(%rdi)
  34147. adcq %rdx, %r8
  34148. adcq $0x00, %r9
  34149. # A[57] * B
  34150. movq %rcx, %rax
  34151. xorq %r10, %r10
  34152. mulq 456(%rsi)
  34153. addq %rax, %r8
  34154. movq %r8, 456(%rdi)
  34155. adcq %rdx, %r9
  34156. adcq $0x00, %r10
  34157. # A[58] * B
  34158. movq %rcx, %rax
  34159. xorq %r8, %r8
  34160. mulq 464(%rsi)
  34161. addq %rax, %r9
  34162. movq %r9, 464(%rdi)
  34163. adcq %rdx, %r10
  34164. adcq $0x00, %r8
  34165. # A[59] * B
  34166. movq %rcx, %rax
  34167. xorq %r9, %r9
  34168. mulq 472(%rsi)
  34169. addq %rax, %r10
  34170. movq %r10, 472(%rdi)
  34171. adcq %rdx, %r8
  34172. adcq $0x00, %r9
  34173. # A[60] * B
  34174. movq %rcx, %rax
  34175. xorq %r10, %r10
  34176. mulq 480(%rsi)
  34177. addq %rax, %r8
  34178. movq %r8, 480(%rdi)
  34179. adcq %rdx, %r9
  34180. adcq $0x00, %r10
  34181. # A[61] * B
  34182. movq %rcx, %rax
  34183. xorq %r8, %r8
  34184. mulq 488(%rsi)
  34185. addq %rax, %r9
  34186. movq %r9, 488(%rdi)
  34187. adcq %rdx, %r10
  34188. adcq $0x00, %r8
  34189. # A[62] * B
  34190. movq %rcx, %rax
  34191. xorq %r9, %r9
  34192. mulq 496(%rsi)
  34193. addq %rax, %r10
  34194. movq %r10, 496(%rdi)
  34195. adcq %rdx, %r8
  34196. adcq $0x00, %r9
  34197. # A[63] * B
  34198. movq %rcx, %rax
  34199. mulq 504(%rsi)
  34200. addq %rax, %r8
  34201. adcq %rdx, %r9
  34202. movq %r8, 504(%rdi)
  34203. movq %r9, 512(%rdi)
  34204. repz retq
  34205. #ifndef __APPLE__
  34206. .size sp_4096_mul_d_64,.-sp_4096_mul_d_64
  34207. #endif /* __APPLE__ */
  34208. /* Conditionally subtract b from a using the mask m.
  34209. * m is -1 to subtract and 0 when not copying.
  34210. *
  34211. * r A single precision number representing condition subtract result.
  34212. * a A single precision number to subtract from.
  34213. * b A single precision number to subtract.
  34214. * m Mask value to apply.
  34215. */
  34216. #ifndef __APPLE__
  34217. .text
  34218. .globl sp_4096_cond_sub_64
  34219. .type sp_4096_cond_sub_64,@function
  34220. .align 16
  34221. sp_4096_cond_sub_64:
  34222. #else
  34223. .section __TEXT,__text
  34224. .globl _sp_4096_cond_sub_64
  34225. .p2align 4
  34226. _sp_4096_cond_sub_64:
  34227. #endif /* __APPLE__ */
  34228. subq $0x200, %rsp
  34229. movq $0x00, %rax
  34230. movq (%rdx), %r8
  34231. movq 8(%rdx), %r9
  34232. andq %rcx, %r8
  34233. andq %rcx, %r9
  34234. movq %r8, (%rsp)
  34235. movq %r9, 8(%rsp)
  34236. movq 16(%rdx), %r8
  34237. movq 24(%rdx), %r9
  34238. andq %rcx, %r8
  34239. andq %rcx, %r9
  34240. movq %r8, 16(%rsp)
  34241. movq %r9, 24(%rsp)
  34242. movq 32(%rdx), %r8
  34243. movq 40(%rdx), %r9
  34244. andq %rcx, %r8
  34245. andq %rcx, %r9
  34246. movq %r8, 32(%rsp)
  34247. movq %r9, 40(%rsp)
  34248. movq 48(%rdx), %r8
  34249. movq 56(%rdx), %r9
  34250. andq %rcx, %r8
  34251. andq %rcx, %r9
  34252. movq %r8, 48(%rsp)
  34253. movq %r9, 56(%rsp)
  34254. movq 64(%rdx), %r8
  34255. movq 72(%rdx), %r9
  34256. andq %rcx, %r8
  34257. andq %rcx, %r9
  34258. movq %r8, 64(%rsp)
  34259. movq %r9, 72(%rsp)
  34260. movq 80(%rdx), %r8
  34261. movq 88(%rdx), %r9
  34262. andq %rcx, %r8
  34263. andq %rcx, %r9
  34264. movq %r8, 80(%rsp)
  34265. movq %r9, 88(%rsp)
  34266. movq 96(%rdx), %r8
  34267. movq 104(%rdx), %r9
  34268. andq %rcx, %r8
  34269. andq %rcx, %r9
  34270. movq %r8, 96(%rsp)
  34271. movq %r9, 104(%rsp)
  34272. movq 112(%rdx), %r8
  34273. movq 120(%rdx), %r9
  34274. andq %rcx, %r8
  34275. andq %rcx, %r9
  34276. movq %r8, 112(%rsp)
  34277. movq %r9, 120(%rsp)
  34278. movq 128(%rdx), %r8
  34279. movq 136(%rdx), %r9
  34280. andq %rcx, %r8
  34281. andq %rcx, %r9
  34282. movq %r8, 128(%rsp)
  34283. movq %r9, 136(%rsp)
  34284. movq 144(%rdx), %r8
  34285. movq 152(%rdx), %r9
  34286. andq %rcx, %r8
  34287. andq %rcx, %r9
  34288. movq %r8, 144(%rsp)
  34289. movq %r9, 152(%rsp)
  34290. movq 160(%rdx), %r8
  34291. movq 168(%rdx), %r9
  34292. andq %rcx, %r8
  34293. andq %rcx, %r9
  34294. movq %r8, 160(%rsp)
  34295. movq %r9, 168(%rsp)
  34296. movq 176(%rdx), %r8
  34297. movq 184(%rdx), %r9
  34298. andq %rcx, %r8
  34299. andq %rcx, %r9
  34300. movq %r8, 176(%rsp)
  34301. movq %r9, 184(%rsp)
  34302. movq 192(%rdx), %r8
  34303. movq 200(%rdx), %r9
  34304. andq %rcx, %r8
  34305. andq %rcx, %r9
  34306. movq %r8, 192(%rsp)
  34307. movq %r9, 200(%rsp)
  34308. movq 208(%rdx), %r8
  34309. movq 216(%rdx), %r9
  34310. andq %rcx, %r8
  34311. andq %rcx, %r9
  34312. movq %r8, 208(%rsp)
  34313. movq %r9, 216(%rsp)
  34314. movq 224(%rdx), %r8
  34315. movq 232(%rdx), %r9
  34316. andq %rcx, %r8
  34317. andq %rcx, %r9
  34318. movq %r8, 224(%rsp)
  34319. movq %r9, 232(%rsp)
  34320. movq 240(%rdx), %r8
  34321. movq 248(%rdx), %r9
  34322. andq %rcx, %r8
  34323. andq %rcx, %r9
  34324. movq %r8, 240(%rsp)
  34325. movq %r9, 248(%rsp)
  34326. movq 256(%rdx), %r8
  34327. movq 264(%rdx), %r9
  34328. andq %rcx, %r8
  34329. andq %rcx, %r9
  34330. movq %r8, 256(%rsp)
  34331. movq %r9, 264(%rsp)
  34332. movq 272(%rdx), %r8
  34333. movq 280(%rdx), %r9
  34334. andq %rcx, %r8
  34335. andq %rcx, %r9
  34336. movq %r8, 272(%rsp)
  34337. movq %r9, 280(%rsp)
  34338. movq 288(%rdx), %r8
  34339. movq 296(%rdx), %r9
  34340. andq %rcx, %r8
  34341. andq %rcx, %r9
  34342. movq %r8, 288(%rsp)
  34343. movq %r9, 296(%rsp)
  34344. movq 304(%rdx), %r8
  34345. movq 312(%rdx), %r9
  34346. andq %rcx, %r8
  34347. andq %rcx, %r9
  34348. movq %r8, 304(%rsp)
  34349. movq %r9, 312(%rsp)
  34350. movq 320(%rdx), %r8
  34351. movq 328(%rdx), %r9
  34352. andq %rcx, %r8
  34353. andq %rcx, %r9
  34354. movq %r8, 320(%rsp)
  34355. movq %r9, 328(%rsp)
  34356. movq 336(%rdx), %r8
  34357. movq 344(%rdx), %r9
  34358. andq %rcx, %r8
  34359. andq %rcx, %r9
  34360. movq %r8, 336(%rsp)
  34361. movq %r9, 344(%rsp)
  34362. movq 352(%rdx), %r8
  34363. movq 360(%rdx), %r9
  34364. andq %rcx, %r8
  34365. andq %rcx, %r9
  34366. movq %r8, 352(%rsp)
  34367. movq %r9, 360(%rsp)
  34368. movq 368(%rdx), %r8
  34369. movq 376(%rdx), %r9
  34370. andq %rcx, %r8
  34371. andq %rcx, %r9
  34372. movq %r8, 368(%rsp)
  34373. movq %r9, 376(%rsp)
  34374. movq 384(%rdx), %r8
  34375. movq 392(%rdx), %r9
  34376. andq %rcx, %r8
  34377. andq %rcx, %r9
  34378. movq %r8, 384(%rsp)
  34379. movq %r9, 392(%rsp)
  34380. movq 400(%rdx), %r8
  34381. movq 408(%rdx), %r9
  34382. andq %rcx, %r8
  34383. andq %rcx, %r9
  34384. movq %r8, 400(%rsp)
  34385. movq %r9, 408(%rsp)
  34386. movq 416(%rdx), %r8
  34387. movq 424(%rdx), %r9
  34388. andq %rcx, %r8
  34389. andq %rcx, %r9
  34390. movq %r8, 416(%rsp)
  34391. movq %r9, 424(%rsp)
  34392. movq 432(%rdx), %r8
  34393. movq 440(%rdx), %r9
  34394. andq %rcx, %r8
  34395. andq %rcx, %r9
  34396. movq %r8, 432(%rsp)
  34397. movq %r9, 440(%rsp)
  34398. movq 448(%rdx), %r8
  34399. movq 456(%rdx), %r9
  34400. andq %rcx, %r8
  34401. andq %rcx, %r9
  34402. movq %r8, 448(%rsp)
  34403. movq %r9, 456(%rsp)
  34404. movq 464(%rdx), %r8
  34405. movq 472(%rdx), %r9
  34406. andq %rcx, %r8
  34407. andq %rcx, %r9
  34408. movq %r8, 464(%rsp)
  34409. movq %r9, 472(%rsp)
  34410. movq 480(%rdx), %r8
  34411. movq 488(%rdx), %r9
  34412. andq %rcx, %r8
  34413. andq %rcx, %r9
  34414. movq %r8, 480(%rsp)
  34415. movq %r9, 488(%rsp)
  34416. movq 496(%rdx), %r8
  34417. movq 504(%rdx), %r9
  34418. andq %rcx, %r8
  34419. andq %rcx, %r9
  34420. movq %r8, 496(%rsp)
  34421. movq %r9, 504(%rsp)
  34422. movq (%rsi), %r8
  34423. movq (%rsp), %rdx
  34424. subq %rdx, %r8
  34425. movq 8(%rsi), %r9
  34426. movq 8(%rsp), %rdx
  34427. sbbq %rdx, %r9
  34428. movq %r8, (%rdi)
  34429. movq 16(%rsi), %r8
  34430. movq 16(%rsp), %rdx
  34431. sbbq %rdx, %r8
  34432. movq %r9, 8(%rdi)
  34433. movq 24(%rsi), %r9
  34434. movq 24(%rsp), %rdx
  34435. sbbq %rdx, %r9
  34436. movq %r8, 16(%rdi)
  34437. movq 32(%rsi), %r8
  34438. movq 32(%rsp), %rdx
  34439. sbbq %rdx, %r8
  34440. movq %r9, 24(%rdi)
  34441. movq 40(%rsi), %r9
  34442. movq 40(%rsp), %rdx
  34443. sbbq %rdx, %r9
  34444. movq %r8, 32(%rdi)
  34445. movq 48(%rsi), %r8
  34446. movq 48(%rsp), %rdx
  34447. sbbq %rdx, %r8
  34448. movq %r9, 40(%rdi)
  34449. movq 56(%rsi), %r9
  34450. movq 56(%rsp), %rdx
  34451. sbbq %rdx, %r9
  34452. movq %r8, 48(%rdi)
  34453. movq 64(%rsi), %r8
  34454. movq 64(%rsp), %rdx
  34455. sbbq %rdx, %r8
  34456. movq %r9, 56(%rdi)
  34457. movq 72(%rsi), %r9
  34458. movq 72(%rsp), %rdx
  34459. sbbq %rdx, %r9
  34460. movq %r8, 64(%rdi)
  34461. movq 80(%rsi), %r8
  34462. movq 80(%rsp), %rdx
  34463. sbbq %rdx, %r8
  34464. movq %r9, 72(%rdi)
  34465. movq 88(%rsi), %r9
  34466. movq 88(%rsp), %rdx
  34467. sbbq %rdx, %r9
  34468. movq %r8, 80(%rdi)
  34469. movq 96(%rsi), %r8
  34470. movq 96(%rsp), %rdx
  34471. sbbq %rdx, %r8
  34472. movq %r9, 88(%rdi)
  34473. movq 104(%rsi), %r9
  34474. movq 104(%rsp), %rdx
  34475. sbbq %rdx, %r9
  34476. movq %r8, 96(%rdi)
  34477. movq 112(%rsi), %r8
  34478. movq 112(%rsp), %rdx
  34479. sbbq %rdx, %r8
  34480. movq %r9, 104(%rdi)
  34481. movq 120(%rsi), %r9
  34482. movq 120(%rsp), %rdx
  34483. sbbq %rdx, %r9
  34484. movq %r8, 112(%rdi)
  34485. movq 128(%rsi), %r8
  34486. movq 128(%rsp), %rdx
  34487. sbbq %rdx, %r8
  34488. movq %r9, 120(%rdi)
  34489. movq 136(%rsi), %r9
  34490. movq 136(%rsp), %rdx
  34491. sbbq %rdx, %r9
  34492. movq %r8, 128(%rdi)
  34493. movq 144(%rsi), %r8
  34494. movq 144(%rsp), %rdx
  34495. sbbq %rdx, %r8
  34496. movq %r9, 136(%rdi)
  34497. movq 152(%rsi), %r9
  34498. movq 152(%rsp), %rdx
  34499. sbbq %rdx, %r9
  34500. movq %r8, 144(%rdi)
  34501. movq 160(%rsi), %r8
  34502. movq 160(%rsp), %rdx
  34503. sbbq %rdx, %r8
  34504. movq %r9, 152(%rdi)
  34505. movq 168(%rsi), %r9
  34506. movq 168(%rsp), %rdx
  34507. sbbq %rdx, %r9
  34508. movq %r8, 160(%rdi)
  34509. movq 176(%rsi), %r8
  34510. movq 176(%rsp), %rdx
  34511. sbbq %rdx, %r8
  34512. movq %r9, 168(%rdi)
  34513. movq 184(%rsi), %r9
  34514. movq 184(%rsp), %rdx
  34515. sbbq %rdx, %r9
  34516. movq %r8, 176(%rdi)
  34517. movq 192(%rsi), %r8
  34518. movq 192(%rsp), %rdx
  34519. sbbq %rdx, %r8
  34520. movq %r9, 184(%rdi)
  34521. movq 200(%rsi), %r9
  34522. movq 200(%rsp), %rdx
  34523. sbbq %rdx, %r9
  34524. movq %r8, 192(%rdi)
  34525. movq 208(%rsi), %r8
  34526. movq 208(%rsp), %rdx
  34527. sbbq %rdx, %r8
  34528. movq %r9, 200(%rdi)
  34529. movq 216(%rsi), %r9
  34530. movq 216(%rsp), %rdx
  34531. sbbq %rdx, %r9
  34532. movq %r8, 208(%rdi)
  34533. movq 224(%rsi), %r8
  34534. movq 224(%rsp), %rdx
  34535. sbbq %rdx, %r8
  34536. movq %r9, 216(%rdi)
  34537. movq 232(%rsi), %r9
  34538. movq 232(%rsp), %rdx
  34539. sbbq %rdx, %r9
  34540. movq %r8, 224(%rdi)
  34541. movq 240(%rsi), %r8
  34542. movq 240(%rsp), %rdx
  34543. sbbq %rdx, %r8
  34544. movq %r9, 232(%rdi)
  34545. movq 248(%rsi), %r9
  34546. movq 248(%rsp), %rdx
  34547. sbbq %rdx, %r9
  34548. movq %r8, 240(%rdi)
  34549. movq 256(%rsi), %r8
  34550. movq 256(%rsp), %rdx
  34551. sbbq %rdx, %r8
  34552. movq %r9, 248(%rdi)
  34553. movq 264(%rsi), %r9
  34554. movq 264(%rsp), %rdx
  34555. sbbq %rdx, %r9
  34556. movq %r8, 256(%rdi)
  34557. movq 272(%rsi), %r8
  34558. movq 272(%rsp), %rdx
  34559. sbbq %rdx, %r8
  34560. movq %r9, 264(%rdi)
  34561. movq 280(%rsi), %r9
  34562. movq 280(%rsp), %rdx
  34563. sbbq %rdx, %r9
  34564. movq %r8, 272(%rdi)
  34565. movq 288(%rsi), %r8
  34566. movq 288(%rsp), %rdx
  34567. sbbq %rdx, %r8
  34568. movq %r9, 280(%rdi)
  34569. movq 296(%rsi), %r9
  34570. movq 296(%rsp), %rdx
  34571. sbbq %rdx, %r9
  34572. movq %r8, 288(%rdi)
  34573. movq 304(%rsi), %r8
  34574. movq 304(%rsp), %rdx
  34575. sbbq %rdx, %r8
  34576. movq %r9, 296(%rdi)
  34577. movq 312(%rsi), %r9
  34578. movq 312(%rsp), %rdx
  34579. sbbq %rdx, %r9
  34580. movq %r8, 304(%rdi)
  34581. movq 320(%rsi), %r8
  34582. movq 320(%rsp), %rdx
  34583. sbbq %rdx, %r8
  34584. movq %r9, 312(%rdi)
  34585. movq 328(%rsi), %r9
  34586. movq 328(%rsp), %rdx
  34587. sbbq %rdx, %r9
  34588. movq %r8, 320(%rdi)
  34589. movq 336(%rsi), %r8
  34590. movq 336(%rsp), %rdx
  34591. sbbq %rdx, %r8
  34592. movq %r9, 328(%rdi)
  34593. movq 344(%rsi), %r9
  34594. movq 344(%rsp), %rdx
  34595. sbbq %rdx, %r9
  34596. movq %r8, 336(%rdi)
  34597. movq 352(%rsi), %r8
  34598. movq 352(%rsp), %rdx
  34599. sbbq %rdx, %r8
  34600. movq %r9, 344(%rdi)
  34601. movq 360(%rsi), %r9
  34602. movq 360(%rsp), %rdx
  34603. sbbq %rdx, %r9
  34604. movq %r8, 352(%rdi)
  34605. movq 368(%rsi), %r8
  34606. movq 368(%rsp), %rdx
  34607. sbbq %rdx, %r8
  34608. movq %r9, 360(%rdi)
  34609. movq 376(%rsi), %r9
  34610. movq 376(%rsp), %rdx
  34611. sbbq %rdx, %r9
  34612. movq %r8, 368(%rdi)
  34613. movq 384(%rsi), %r8
  34614. movq 384(%rsp), %rdx
  34615. sbbq %rdx, %r8
  34616. movq %r9, 376(%rdi)
  34617. movq 392(%rsi), %r9
  34618. movq 392(%rsp), %rdx
  34619. sbbq %rdx, %r9
  34620. movq %r8, 384(%rdi)
  34621. movq 400(%rsi), %r8
  34622. movq 400(%rsp), %rdx
  34623. sbbq %rdx, %r8
  34624. movq %r9, 392(%rdi)
  34625. movq 408(%rsi), %r9
  34626. movq 408(%rsp), %rdx
  34627. sbbq %rdx, %r9
  34628. movq %r8, 400(%rdi)
  34629. movq 416(%rsi), %r8
  34630. movq 416(%rsp), %rdx
  34631. sbbq %rdx, %r8
  34632. movq %r9, 408(%rdi)
  34633. movq 424(%rsi), %r9
  34634. movq 424(%rsp), %rdx
  34635. sbbq %rdx, %r9
  34636. movq %r8, 416(%rdi)
  34637. movq 432(%rsi), %r8
  34638. movq 432(%rsp), %rdx
  34639. sbbq %rdx, %r8
  34640. movq %r9, 424(%rdi)
  34641. movq 440(%rsi), %r9
  34642. movq 440(%rsp), %rdx
  34643. sbbq %rdx, %r9
  34644. movq %r8, 432(%rdi)
  34645. movq 448(%rsi), %r8
  34646. movq 448(%rsp), %rdx
  34647. sbbq %rdx, %r8
  34648. movq %r9, 440(%rdi)
  34649. movq 456(%rsi), %r9
  34650. movq 456(%rsp), %rdx
  34651. sbbq %rdx, %r9
  34652. movq %r8, 448(%rdi)
  34653. movq 464(%rsi), %r8
  34654. movq 464(%rsp), %rdx
  34655. sbbq %rdx, %r8
  34656. movq %r9, 456(%rdi)
  34657. movq 472(%rsi), %r9
  34658. movq 472(%rsp), %rdx
  34659. sbbq %rdx, %r9
  34660. movq %r8, 464(%rdi)
  34661. movq 480(%rsi), %r8
  34662. movq 480(%rsp), %rdx
  34663. sbbq %rdx, %r8
  34664. movq %r9, 472(%rdi)
  34665. movq 488(%rsi), %r9
  34666. movq 488(%rsp), %rdx
  34667. sbbq %rdx, %r9
  34668. movq %r8, 480(%rdi)
  34669. movq 496(%rsi), %r8
  34670. movq 496(%rsp), %rdx
  34671. sbbq %rdx, %r8
  34672. movq %r9, 488(%rdi)
  34673. movq 504(%rsi), %r9
  34674. movq 504(%rsp), %rdx
  34675. sbbq %rdx, %r9
  34676. movq %r8, 496(%rdi)
  34677. movq %r9, 504(%rdi)
  34678. sbbq $0x00, %rax
  34679. addq $0x200, %rsp
  34680. repz retq
  34681. #ifndef __APPLE__
  34682. .size sp_4096_cond_sub_64,.-sp_4096_cond_sub_64
  34683. #endif /* __APPLE__ */
  34684. /* Reduce the number back to 4096 bits using Montgomery reduction.
  34685. *
  34686. * a A single precision number to reduce in place.
  34687. * m The single precision number representing the modulus.
  34688. * mp The digit representing the negative inverse of m mod 2^n.
  34689. */
  34690. #ifndef __APPLE__
  34691. .text
  34692. .globl sp_4096_mont_reduce_64
  34693. .type sp_4096_mont_reduce_64,@function
  34694. .align 16
  34695. sp_4096_mont_reduce_64:
  34696. #else
  34697. .section __TEXT,__text
  34698. .globl _sp_4096_mont_reduce_64
  34699. .p2align 4
  34700. _sp_4096_mont_reduce_64:
  34701. #endif /* __APPLE__ */
  34702. pushq %r12
  34703. pushq %r13
  34704. pushq %r14
  34705. pushq %r15
  34706. movq %rdx, %rcx
  34707. xorq %r15, %r15
  34708. # i = 64
  34709. movq $0x40, %r8
  34710. movq (%rdi), %r13
  34711. movq 8(%rdi), %r14
  34712. L_mont_loop_64:
  34713. # mu = a[i] * mp
  34714. movq %r13, %r11
  34715. imulq %rcx, %r11
  34716. # a[i+0] += m[0] * mu
  34717. movq %r11, %rax
  34718. xorq %r10, %r10
  34719. mulq (%rsi)
  34720. addq %rax, %r13
  34721. adcq %rdx, %r10
  34722. # a[i+1] += m[1] * mu
  34723. movq %r11, %rax
  34724. xorq %r9, %r9
  34725. mulq 8(%rsi)
  34726. movq %r14, %r13
  34727. addq %rax, %r13
  34728. adcq %rdx, %r9
  34729. addq %r10, %r13
  34730. adcq $0x00, %r9
  34731. # a[i+2] += m[2] * mu
  34732. movq %r11, %rax
  34733. xorq %r10, %r10
  34734. mulq 16(%rsi)
  34735. movq 16(%rdi), %r14
  34736. addq %rax, %r14
  34737. adcq %rdx, %r10
  34738. addq %r9, %r14
  34739. adcq $0x00, %r10
  34740. # a[i+3] += m[3] * mu
  34741. movq %r11, %rax
  34742. xorq %r9, %r9
  34743. mulq 24(%rsi)
  34744. movq 24(%rdi), %r12
  34745. addq %rax, %r12
  34746. adcq %rdx, %r9
  34747. addq %r10, %r12
  34748. movq %r12, 24(%rdi)
  34749. adcq $0x00, %r9
  34750. # a[i+4] += m[4] * mu
  34751. movq %r11, %rax
  34752. xorq %r10, %r10
  34753. mulq 32(%rsi)
  34754. movq 32(%rdi), %r12
  34755. addq %rax, %r12
  34756. adcq %rdx, %r10
  34757. addq %r9, %r12
  34758. movq %r12, 32(%rdi)
  34759. adcq $0x00, %r10
  34760. # a[i+5] += m[5] * mu
  34761. movq %r11, %rax
  34762. xorq %r9, %r9
  34763. mulq 40(%rsi)
  34764. movq 40(%rdi), %r12
  34765. addq %rax, %r12
  34766. adcq %rdx, %r9
  34767. addq %r10, %r12
  34768. movq %r12, 40(%rdi)
  34769. adcq $0x00, %r9
  34770. # a[i+6] += m[6] * mu
  34771. movq %r11, %rax
  34772. xorq %r10, %r10
  34773. mulq 48(%rsi)
  34774. movq 48(%rdi), %r12
  34775. addq %rax, %r12
  34776. adcq %rdx, %r10
  34777. addq %r9, %r12
  34778. movq %r12, 48(%rdi)
  34779. adcq $0x00, %r10
  34780. # a[i+7] += m[7] * mu
  34781. movq %r11, %rax
  34782. xorq %r9, %r9
  34783. mulq 56(%rsi)
  34784. movq 56(%rdi), %r12
  34785. addq %rax, %r12
  34786. adcq %rdx, %r9
  34787. addq %r10, %r12
  34788. movq %r12, 56(%rdi)
  34789. adcq $0x00, %r9
  34790. # a[i+8] += m[8] * mu
  34791. movq %r11, %rax
  34792. xorq %r10, %r10
  34793. mulq 64(%rsi)
  34794. movq 64(%rdi), %r12
  34795. addq %rax, %r12
  34796. adcq %rdx, %r10
  34797. addq %r9, %r12
  34798. movq %r12, 64(%rdi)
  34799. adcq $0x00, %r10
  34800. # a[i+9] += m[9] * mu
  34801. movq %r11, %rax
  34802. xorq %r9, %r9
  34803. mulq 72(%rsi)
  34804. movq 72(%rdi), %r12
  34805. addq %rax, %r12
  34806. adcq %rdx, %r9
  34807. addq %r10, %r12
  34808. movq %r12, 72(%rdi)
  34809. adcq $0x00, %r9
  34810. # a[i+10] += m[10] * mu
  34811. movq %r11, %rax
  34812. xorq %r10, %r10
  34813. mulq 80(%rsi)
  34814. movq 80(%rdi), %r12
  34815. addq %rax, %r12
  34816. adcq %rdx, %r10
  34817. addq %r9, %r12
  34818. movq %r12, 80(%rdi)
  34819. adcq $0x00, %r10
  34820. # a[i+11] += m[11] * mu
  34821. movq %r11, %rax
  34822. xorq %r9, %r9
  34823. mulq 88(%rsi)
  34824. movq 88(%rdi), %r12
  34825. addq %rax, %r12
  34826. adcq %rdx, %r9
  34827. addq %r10, %r12
  34828. movq %r12, 88(%rdi)
  34829. adcq $0x00, %r9
  34830. # a[i+12] += m[12] * mu
  34831. movq %r11, %rax
  34832. xorq %r10, %r10
  34833. mulq 96(%rsi)
  34834. movq 96(%rdi), %r12
  34835. addq %rax, %r12
  34836. adcq %rdx, %r10
  34837. addq %r9, %r12
  34838. movq %r12, 96(%rdi)
  34839. adcq $0x00, %r10
  34840. # a[i+13] += m[13] * mu
  34841. movq %r11, %rax
  34842. xorq %r9, %r9
  34843. mulq 104(%rsi)
  34844. movq 104(%rdi), %r12
  34845. addq %rax, %r12
  34846. adcq %rdx, %r9
  34847. addq %r10, %r12
  34848. movq %r12, 104(%rdi)
  34849. adcq $0x00, %r9
  34850. # a[i+14] += m[14] * mu
  34851. movq %r11, %rax
  34852. xorq %r10, %r10
  34853. mulq 112(%rsi)
  34854. movq 112(%rdi), %r12
  34855. addq %rax, %r12
  34856. adcq %rdx, %r10
  34857. addq %r9, %r12
  34858. movq %r12, 112(%rdi)
  34859. adcq $0x00, %r10
  34860. # a[i+15] += m[15] * mu
  34861. movq %r11, %rax
  34862. xorq %r9, %r9
  34863. mulq 120(%rsi)
  34864. movq 120(%rdi), %r12
  34865. addq %rax, %r12
  34866. adcq %rdx, %r9
  34867. addq %r10, %r12
  34868. movq %r12, 120(%rdi)
  34869. adcq $0x00, %r9
  34870. # a[i+16] += m[16] * mu
  34871. movq %r11, %rax
  34872. xorq %r10, %r10
  34873. mulq 128(%rsi)
  34874. movq 128(%rdi), %r12
  34875. addq %rax, %r12
  34876. adcq %rdx, %r10
  34877. addq %r9, %r12
  34878. movq %r12, 128(%rdi)
  34879. adcq $0x00, %r10
  34880. # a[i+17] += m[17] * mu
  34881. movq %r11, %rax
  34882. xorq %r9, %r9
  34883. mulq 136(%rsi)
  34884. movq 136(%rdi), %r12
  34885. addq %rax, %r12
  34886. adcq %rdx, %r9
  34887. addq %r10, %r12
  34888. movq %r12, 136(%rdi)
  34889. adcq $0x00, %r9
  34890. # a[i+18] += m[18] * mu
  34891. movq %r11, %rax
  34892. xorq %r10, %r10
  34893. mulq 144(%rsi)
  34894. movq 144(%rdi), %r12
  34895. addq %rax, %r12
  34896. adcq %rdx, %r10
  34897. addq %r9, %r12
  34898. movq %r12, 144(%rdi)
  34899. adcq $0x00, %r10
  34900. # a[i+19] += m[19] * mu
  34901. movq %r11, %rax
  34902. xorq %r9, %r9
  34903. mulq 152(%rsi)
  34904. movq 152(%rdi), %r12
  34905. addq %rax, %r12
  34906. adcq %rdx, %r9
  34907. addq %r10, %r12
  34908. movq %r12, 152(%rdi)
  34909. adcq $0x00, %r9
  34910. # a[i+20] += m[20] * mu
  34911. movq %r11, %rax
  34912. xorq %r10, %r10
  34913. mulq 160(%rsi)
  34914. movq 160(%rdi), %r12
  34915. addq %rax, %r12
  34916. adcq %rdx, %r10
  34917. addq %r9, %r12
  34918. movq %r12, 160(%rdi)
  34919. adcq $0x00, %r10
  34920. # a[i+21] += m[21] * mu
  34921. movq %r11, %rax
  34922. xorq %r9, %r9
  34923. mulq 168(%rsi)
  34924. movq 168(%rdi), %r12
  34925. addq %rax, %r12
  34926. adcq %rdx, %r9
  34927. addq %r10, %r12
  34928. movq %r12, 168(%rdi)
  34929. adcq $0x00, %r9
  34930. # a[i+22] += m[22] * mu
  34931. movq %r11, %rax
  34932. xorq %r10, %r10
  34933. mulq 176(%rsi)
  34934. movq 176(%rdi), %r12
  34935. addq %rax, %r12
  34936. adcq %rdx, %r10
  34937. addq %r9, %r12
  34938. movq %r12, 176(%rdi)
  34939. adcq $0x00, %r10
  34940. # a[i+23] += m[23] * mu
  34941. movq %r11, %rax
  34942. xorq %r9, %r9
  34943. mulq 184(%rsi)
  34944. movq 184(%rdi), %r12
  34945. addq %rax, %r12
  34946. adcq %rdx, %r9
  34947. addq %r10, %r12
  34948. movq %r12, 184(%rdi)
  34949. adcq $0x00, %r9
  34950. # a[i+24] += m[24] * mu
  34951. movq %r11, %rax
  34952. xorq %r10, %r10
  34953. mulq 192(%rsi)
  34954. movq 192(%rdi), %r12
  34955. addq %rax, %r12
  34956. adcq %rdx, %r10
  34957. addq %r9, %r12
  34958. movq %r12, 192(%rdi)
  34959. adcq $0x00, %r10
  34960. # a[i+25] += m[25] * mu
  34961. movq %r11, %rax
  34962. xorq %r9, %r9
  34963. mulq 200(%rsi)
  34964. movq 200(%rdi), %r12
  34965. addq %rax, %r12
  34966. adcq %rdx, %r9
  34967. addq %r10, %r12
  34968. movq %r12, 200(%rdi)
  34969. adcq $0x00, %r9
  34970. # a[i+26] += m[26] * mu
  34971. movq %r11, %rax
  34972. xorq %r10, %r10
  34973. mulq 208(%rsi)
  34974. movq 208(%rdi), %r12
  34975. addq %rax, %r12
  34976. adcq %rdx, %r10
  34977. addq %r9, %r12
  34978. movq %r12, 208(%rdi)
  34979. adcq $0x00, %r10
  34980. # a[i+27] += m[27] * mu
  34981. movq %r11, %rax
  34982. xorq %r9, %r9
  34983. mulq 216(%rsi)
  34984. movq 216(%rdi), %r12
  34985. addq %rax, %r12
  34986. adcq %rdx, %r9
  34987. addq %r10, %r12
  34988. movq %r12, 216(%rdi)
  34989. adcq $0x00, %r9
  34990. # a[i+28] += m[28] * mu
  34991. movq %r11, %rax
  34992. xorq %r10, %r10
  34993. mulq 224(%rsi)
  34994. movq 224(%rdi), %r12
  34995. addq %rax, %r12
  34996. adcq %rdx, %r10
  34997. addq %r9, %r12
  34998. movq %r12, 224(%rdi)
  34999. adcq $0x00, %r10
  35000. # a[i+29] += m[29] * mu
  35001. movq %r11, %rax
  35002. xorq %r9, %r9
  35003. mulq 232(%rsi)
  35004. movq 232(%rdi), %r12
  35005. addq %rax, %r12
  35006. adcq %rdx, %r9
  35007. addq %r10, %r12
  35008. movq %r12, 232(%rdi)
  35009. adcq $0x00, %r9
  35010. # a[i+30] += m[30] * mu
  35011. movq %r11, %rax
  35012. xorq %r10, %r10
  35013. mulq 240(%rsi)
  35014. movq 240(%rdi), %r12
  35015. addq %rax, %r12
  35016. adcq %rdx, %r10
  35017. addq %r9, %r12
  35018. movq %r12, 240(%rdi)
  35019. adcq $0x00, %r10
  35020. # a[i+31] += m[31] * mu
  35021. movq %r11, %rax
  35022. xorq %r9, %r9
  35023. mulq 248(%rsi)
  35024. movq 248(%rdi), %r12
  35025. addq %rax, %r12
  35026. adcq %rdx, %r9
  35027. addq %r10, %r12
  35028. movq %r12, 248(%rdi)
  35029. adcq $0x00, %r9
  35030. # a[i+32] += m[32] * mu
  35031. movq %r11, %rax
  35032. xorq %r10, %r10
  35033. mulq 256(%rsi)
  35034. movq 256(%rdi), %r12
  35035. addq %rax, %r12
  35036. adcq %rdx, %r10
  35037. addq %r9, %r12
  35038. movq %r12, 256(%rdi)
  35039. adcq $0x00, %r10
  35040. # a[i+33] += m[33] * mu
  35041. movq %r11, %rax
  35042. xorq %r9, %r9
  35043. mulq 264(%rsi)
  35044. movq 264(%rdi), %r12
  35045. addq %rax, %r12
  35046. adcq %rdx, %r9
  35047. addq %r10, %r12
  35048. movq %r12, 264(%rdi)
  35049. adcq $0x00, %r9
  35050. # a[i+34] += m[34] * mu
  35051. movq %r11, %rax
  35052. xorq %r10, %r10
  35053. mulq 272(%rsi)
  35054. movq 272(%rdi), %r12
  35055. addq %rax, %r12
  35056. adcq %rdx, %r10
  35057. addq %r9, %r12
  35058. movq %r12, 272(%rdi)
  35059. adcq $0x00, %r10
  35060. # a[i+35] += m[35] * mu
  35061. movq %r11, %rax
  35062. xorq %r9, %r9
  35063. mulq 280(%rsi)
  35064. movq 280(%rdi), %r12
  35065. addq %rax, %r12
  35066. adcq %rdx, %r9
  35067. addq %r10, %r12
  35068. movq %r12, 280(%rdi)
  35069. adcq $0x00, %r9
  35070. # a[i+36] += m[36] * mu
  35071. movq %r11, %rax
  35072. xorq %r10, %r10
  35073. mulq 288(%rsi)
  35074. movq 288(%rdi), %r12
  35075. addq %rax, %r12
  35076. adcq %rdx, %r10
  35077. addq %r9, %r12
  35078. movq %r12, 288(%rdi)
  35079. adcq $0x00, %r10
  35080. # a[i+37] += m[37] * mu
  35081. movq %r11, %rax
  35082. xorq %r9, %r9
  35083. mulq 296(%rsi)
  35084. movq 296(%rdi), %r12
  35085. addq %rax, %r12
  35086. adcq %rdx, %r9
  35087. addq %r10, %r12
  35088. movq %r12, 296(%rdi)
  35089. adcq $0x00, %r9
  35090. # a[i+38] += m[38] * mu
  35091. movq %r11, %rax
  35092. xorq %r10, %r10
  35093. mulq 304(%rsi)
  35094. movq 304(%rdi), %r12
  35095. addq %rax, %r12
  35096. adcq %rdx, %r10
  35097. addq %r9, %r12
  35098. movq %r12, 304(%rdi)
  35099. adcq $0x00, %r10
  35100. # a[i+39] += m[39] * mu
  35101. movq %r11, %rax
  35102. xorq %r9, %r9
  35103. mulq 312(%rsi)
  35104. movq 312(%rdi), %r12
  35105. addq %rax, %r12
  35106. adcq %rdx, %r9
  35107. addq %r10, %r12
  35108. movq %r12, 312(%rdi)
  35109. adcq $0x00, %r9
  35110. # a[i+40] += m[40] * mu
  35111. movq %r11, %rax
  35112. xorq %r10, %r10
  35113. mulq 320(%rsi)
  35114. movq 320(%rdi), %r12
  35115. addq %rax, %r12
  35116. adcq %rdx, %r10
  35117. addq %r9, %r12
  35118. movq %r12, 320(%rdi)
  35119. adcq $0x00, %r10
  35120. # a[i+41] += m[41] * mu
  35121. movq %r11, %rax
  35122. xorq %r9, %r9
  35123. mulq 328(%rsi)
  35124. movq 328(%rdi), %r12
  35125. addq %rax, %r12
  35126. adcq %rdx, %r9
  35127. addq %r10, %r12
  35128. movq %r12, 328(%rdi)
  35129. adcq $0x00, %r9
  35130. # a[i+42] += m[42] * mu
  35131. movq %r11, %rax
  35132. xorq %r10, %r10
  35133. mulq 336(%rsi)
  35134. movq 336(%rdi), %r12
  35135. addq %rax, %r12
  35136. adcq %rdx, %r10
  35137. addq %r9, %r12
  35138. movq %r12, 336(%rdi)
  35139. adcq $0x00, %r10
  35140. # a[i+43] += m[43] * mu
  35141. movq %r11, %rax
  35142. xorq %r9, %r9
  35143. mulq 344(%rsi)
  35144. movq 344(%rdi), %r12
  35145. addq %rax, %r12
  35146. adcq %rdx, %r9
  35147. addq %r10, %r12
  35148. movq %r12, 344(%rdi)
  35149. adcq $0x00, %r9
  35150. # a[i+44] += m[44] * mu
  35151. movq %r11, %rax
  35152. xorq %r10, %r10
  35153. mulq 352(%rsi)
  35154. movq 352(%rdi), %r12
  35155. addq %rax, %r12
  35156. adcq %rdx, %r10
  35157. addq %r9, %r12
  35158. movq %r12, 352(%rdi)
  35159. adcq $0x00, %r10
  35160. # a[i+45] += m[45] * mu
  35161. movq %r11, %rax
  35162. xorq %r9, %r9
  35163. mulq 360(%rsi)
  35164. movq 360(%rdi), %r12
  35165. addq %rax, %r12
  35166. adcq %rdx, %r9
  35167. addq %r10, %r12
  35168. movq %r12, 360(%rdi)
  35169. adcq $0x00, %r9
  35170. # a[i+46] += m[46] * mu
  35171. movq %r11, %rax
  35172. xorq %r10, %r10
  35173. mulq 368(%rsi)
  35174. movq 368(%rdi), %r12
  35175. addq %rax, %r12
  35176. adcq %rdx, %r10
  35177. addq %r9, %r12
  35178. movq %r12, 368(%rdi)
  35179. adcq $0x00, %r10
  35180. # a[i+47] += m[47] * mu
  35181. movq %r11, %rax
  35182. xorq %r9, %r9
  35183. mulq 376(%rsi)
  35184. movq 376(%rdi), %r12
  35185. addq %rax, %r12
  35186. adcq %rdx, %r9
  35187. addq %r10, %r12
  35188. movq %r12, 376(%rdi)
  35189. adcq $0x00, %r9
  35190. # a[i+48] += m[48] * mu
  35191. movq %r11, %rax
  35192. xorq %r10, %r10
  35193. mulq 384(%rsi)
  35194. movq 384(%rdi), %r12
  35195. addq %rax, %r12
  35196. adcq %rdx, %r10
  35197. addq %r9, %r12
  35198. movq %r12, 384(%rdi)
  35199. adcq $0x00, %r10
  35200. # a[i+49] += m[49] * mu
  35201. movq %r11, %rax
  35202. xorq %r9, %r9
  35203. mulq 392(%rsi)
  35204. movq 392(%rdi), %r12
  35205. addq %rax, %r12
  35206. adcq %rdx, %r9
  35207. addq %r10, %r12
  35208. movq %r12, 392(%rdi)
  35209. adcq $0x00, %r9
  35210. # a[i+50] += m[50] * mu
  35211. movq %r11, %rax
  35212. xorq %r10, %r10
  35213. mulq 400(%rsi)
  35214. movq 400(%rdi), %r12
  35215. addq %rax, %r12
  35216. adcq %rdx, %r10
  35217. addq %r9, %r12
  35218. movq %r12, 400(%rdi)
  35219. adcq $0x00, %r10
  35220. # a[i+51] += m[51] * mu
  35221. movq %r11, %rax
  35222. xorq %r9, %r9
  35223. mulq 408(%rsi)
  35224. movq 408(%rdi), %r12
  35225. addq %rax, %r12
  35226. adcq %rdx, %r9
  35227. addq %r10, %r12
  35228. movq %r12, 408(%rdi)
  35229. adcq $0x00, %r9
  35230. # a[i+52] += m[52] * mu
  35231. movq %r11, %rax
  35232. xorq %r10, %r10
  35233. mulq 416(%rsi)
  35234. movq 416(%rdi), %r12
  35235. addq %rax, %r12
  35236. adcq %rdx, %r10
  35237. addq %r9, %r12
  35238. movq %r12, 416(%rdi)
  35239. adcq $0x00, %r10
  35240. # a[i+53] += m[53] * mu
  35241. movq %r11, %rax
  35242. xorq %r9, %r9
  35243. mulq 424(%rsi)
  35244. movq 424(%rdi), %r12
  35245. addq %rax, %r12
  35246. adcq %rdx, %r9
  35247. addq %r10, %r12
  35248. movq %r12, 424(%rdi)
  35249. adcq $0x00, %r9
  35250. # a[i+54] += m[54] * mu
  35251. movq %r11, %rax
  35252. xorq %r10, %r10
  35253. mulq 432(%rsi)
  35254. movq 432(%rdi), %r12
  35255. addq %rax, %r12
  35256. adcq %rdx, %r10
  35257. addq %r9, %r12
  35258. movq %r12, 432(%rdi)
  35259. adcq $0x00, %r10
  35260. # a[i+55] += m[55] * mu
  35261. movq %r11, %rax
  35262. xorq %r9, %r9
  35263. mulq 440(%rsi)
  35264. movq 440(%rdi), %r12
  35265. addq %rax, %r12
  35266. adcq %rdx, %r9
  35267. addq %r10, %r12
  35268. movq %r12, 440(%rdi)
  35269. adcq $0x00, %r9
  35270. # a[i+56] += m[56] * mu
  35271. movq %r11, %rax
  35272. xorq %r10, %r10
  35273. mulq 448(%rsi)
  35274. movq 448(%rdi), %r12
  35275. addq %rax, %r12
  35276. adcq %rdx, %r10
  35277. addq %r9, %r12
  35278. movq %r12, 448(%rdi)
  35279. adcq $0x00, %r10
  35280. # a[i+57] += m[57] * mu
  35281. movq %r11, %rax
  35282. xorq %r9, %r9
  35283. mulq 456(%rsi)
  35284. movq 456(%rdi), %r12
  35285. addq %rax, %r12
  35286. adcq %rdx, %r9
  35287. addq %r10, %r12
  35288. movq %r12, 456(%rdi)
  35289. adcq $0x00, %r9
  35290. # a[i+58] += m[58] * mu
  35291. movq %r11, %rax
  35292. xorq %r10, %r10
  35293. mulq 464(%rsi)
  35294. movq 464(%rdi), %r12
  35295. addq %rax, %r12
  35296. adcq %rdx, %r10
  35297. addq %r9, %r12
  35298. movq %r12, 464(%rdi)
  35299. adcq $0x00, %r10
  35300. # a[i+59] += m[59] * mu
  35301. movq %r11, %rax
  35302. xorq %r9, %r9
  35303. mulq 472(%rsi)
  35304. movq 472(%rdi), %r12
  35305. addq %rax, %r12
  35306. adcq %rdx, %r9
  35307. addq %r10, %r12
  35308. movq %r12, 472(%rdi)
  35309. adcq $0x00, %r9
  35310. # a[i+60] += m[60] * mu
  35311. movq %r11, %rax
  35312. xorq %r10, %r10
  35313. mulq 480(%rsi)
  35314. movq 480(%rdi), %r12
  35315. addq %rax, %r12
  35316. adcq %rdx, %r10
  35317. addq %r9, %r12
  35318. movq %r12, 480(%rdi)
  35319. adcq $0x00, %r10
  35320. # a[i+61] += m[61] * mu
  35321. movq %r11, %rax
  35322. xorq %r9, %r9
  35323. mulq 488(%rsi)
  35324. movq 488(%rdi), %r12
  35325. addq %rax, %r12
  35326. adcq %rdx, %r9
  35327. addq %r10, %r12
  35328. movq %r12, 488(%rdi)
  35329. adcq $0x00, %r9
  35330. # a[i+62] += m[62] * mu
  35331. movq %r11, %rax
  35332. xorq %r10, %r10
  35333. mulq 496(%rsi)
  35334. movq 496(%rdi), %r12
  35335. addq %rax, %r12
  35336. adcq %rdx, %r10
  35337. addq %r9, %r12
  35338. movq %r12, 496(%rdi)
  35339. adcq $0x00, %r10
  35340. # a[i+63] += m[63] * mu
  35341. movq %r11, %rax
  35342. mulq 504(%rsi)
  35343. movq 504(%rdi), %r12
  35344. addq %rax, %r10
  35345. adcq %r15, %rdx
  35346. movq $0x00, %r15
  35347. adcq $0x00, %r15
  35348. addq %r10, %r12
  35349. movq %r12, 504(%rdi)
  35350. adcq %rdx, 512(%rdi)
  35351. adcq $0x00, %r15
  35352. # i -= 1
  35353. addq $8, %rdi
  35354. decq %r8
  35355. jnz L_mont_loop_64
  35356. movq %r13, (%rdi)
  35357. movq %r14, 8(%rdi)
  35358. negq %r15
  35359. movq %r15, %rcx
  35360. movq %rsi, %rdx
  35361. movq %rdi, %rsi
  35362. movq %rdi, %rdi
  35363. subq $0x200, %rdi
  35364. #ifndef __APPLE__
  35365. callq sp_4096_cond_sub_64@plt
  35366. #else
  35367. callq _sp_4096_cond_sub_64
  35368. #endif /* __APPLE__ */
  35369. popq %r15
  35370. popq %r14
  35371. popq %r13
  35372. popq %r12
  35373. repz retq
  35374. #ifndef __APPLE__
  35375. .size sp_4096_mont_reduce_64,.-sp_4096_mont_reduce_64
  35376. #endif /* __APPLE__ */
  35377. /* Conditionally subtract b from a using the mask m.
  35378. * m is -1 to subtract and 0 when not copying.
  35379. *
  35380. * r A single precision number representing condition subtract result.
  35381. * a A single precision number to subtract from.
  35382. * b A single precision number to subtract.
  35383. * m Mask value to apply.
  35384. */
  35385. #ifndef __APPLE__
  35386. .text
  35387. .globl sp_4096_cond_sub_avx2_64
  35388. .type sp_4096_cond_sub_avx2_64,@function
  35389. .align 16
  35390. sp_4096_cond_sub_avx2_64:
  35391. #else
  35392. .section __TEXT,__text
  35393. .globl _sp_4096_cond_sub_avx2_64
  35394. .p2align 4
  35395. _sp_4096_cond_sub_avx2_64:
  35396. #endif /* __APPLE__ */
  35397. movq $0x00, %rax
  35398. movq (%rdx), %r10
  35399. movq (%rsi), %r8
  35400. pextq %rcx, %r10, %r10
  35401. subq %r10, %r8
  35402. movq 8(%rdx), %r10
  35403. movq 8(%rsi), %r9
  35404. pextq %rcx, %r10, %r10
  35405. movq %r8, (%rdi)
  35406. sbbq %r10, %r9
  35407. movq 16(%rdx), %r8
  35408. movq 16(%rsi), %r10
  35409. pextq %rcx, %r8, %r8
  35410. movq %r9, 8(%rdi)
  35411. sbbq %r8, %r10
  35412. movq 24(%rdx), %r9
  35413. movq 24(%rsi), %r8
  35414. pextq %rcx, %r9, %r9
  35415. movq %r10, 16(%rdi)
  35416. sbbq %r9, %r8
  35417. movq 32(%rdx), %r10
  35418. movq 32(%rsi), %r9
  35419. pextq %rcx, %r10, %r10
  35420. movq %r8, 24(%rdi)
  35421. sbbq %r10, %r9
  35422. movq 40(%rdx), %r8
  35423. movq 40(%rsi), %r10
  35424. pextq %rcx, %r8, %r8
  35425. movq %r9, 32(%rdi)
  35426. sbbq %r8, %r10
  35427. movq 48(%rdx), %r9
  35428. movq 48(%rsi), %r8
  35429. pextq %rcx, %r9, %r9
  35430. movq %r10, 40(%rdi)
  35431. sbbq %r9, %r8
  35432. movq 56(%rdx), %r10
  35433. movq 56(%rsi), %r9
  35434. pextq %rcx, %r10, %r10
  35435. movq %r8, 48(%rdi)
  35436. sbbq %r10, %r9
  35437. movq 64(%rdx), %r8
  35438. movq 64(%rsi), %r10
  35439. pextq %rcx, %r8, %r8
  35440. movq %r9, 56(%rdi)
  35441. sbbq %r8, %r10
  35442. movq 72(%rdx), %r9
  35443. movq 72(%rsi), %r8
  35444. pextq %rcx, %r9, %r9
  35445. movq %r10, 64(%rdi)
  35446. sbbq %r9, %r8
  35447. movq 80(%rdx), %r10
  35448. movq 80(%rsi), %r9
  35449. pextq %rcx, %r10, %r10
  35450. movq %r8, 72(%rdi)
  35451. sbbq %r10, %r9
  35452. movq 88(%rdx), %r8
  35453. movq 88(%rsi), %r10
  35454. pextq %rcx, %r8, %r8
  35455. movq %r9, 80(%rdi)
  35456. sbbq %r8, %r10
  35457. movq 96(%rdx), %r9
  35458. movq 96(%rsi), %r8
  35459. pextq %rcx, %r9, %r9
  35460. movq %r10, 88(%rdi)
  35461. sbbq %r9, %r8
  35462. movq 104(%rdx), %r10
  35463. movq 104(%rsi), %r9
  35464. pextq %rcx, %r10, %r10
  35465. movq %r8, 96(%rdi)
  35466. sbbq %r10, %r9
  35467. movq 112(%rdx), %r8
  35468. movq 112(%rsi), %r10
  35469. pextq %rcx, %r8, %r8
  35470. movq %r9, 104(%rdi)
  35471. sbbq %r8, %r10
  35472. movq 120(%rdx), %r9
  35473. movq 120(%rsi), %r8
  35474. pextq %rcx, %r9, %r9
  35475. movq %r10, 112(%rdi)
  35476. sbbq %r9, %r8
  35477. movq 128(%rdx), %r10
  35478. movq 128(%rsi), %r9
  35479. pextq %rcx, %r10, %r10
  35480. movq %r8, 120(%rdi)
  35481. sbbq %r10, %r9
  35482. movq 136(%rdx), %r8
  35483. movq 136(%rsi), %r10
  35484. pextq %rcx, %r8, %r8
  35485. movq %r9, 128(%rdi)
  35486. sbbq %r8, %r10
  35487. movq 144(%rdx), %r9
  35488. movq 144(%rsi), %r8
  35489. pextq %rcx, %r9, %r9
  35490. movq %r10, 136(%rdi)
  35491. sbbq %r9, %r8
  35492. movq 152(%rdx), %r10
  35493. movq 152(%rsi), %r9
  35494. pextq %rcx, %r10, %r10
  35495. movq %r8, 144(%rdi)
  35496. sbbq %r10, %r9
  35497. movq 160(%rdx), %r8
  35498. movq 160(%rsi), %r10
  35499. pextq %rcx, %r8, %r8
  35500. movq %r9, 152(%rdi)
  35501. sbbq %r8, %r10
  35502. movq 168(%rdx), %r9
  35503. movq 168(%rsi), %r8
  35504. pextq %rcx, %r9, %r9
  35505. movq %r10, 160(%rdi)
  35506. sbbq %r9, %r8
  35507. movq 176(%rdx), %r10
  35508. movq 176(%rsi), %r9
  35509. pextq %rcx, %r10, %r10
  35510. movq %r8, 168(%rdi)
  35511. sbbq %r10, %r9
  35512. movq 184(%rdx), %r8
  35513. movq 184(%rsi), %r10
  35514. pextq %rcx, %r8, %r8
  35515. movq %r9, 176(%rdi)
  35516. sbbq %r8, %r10
  35517. movq 192(%rdx), %r9
  35518. movq 192(%rsi), %r8
  35519. pextq %rcx, %r9, %r9
  35520. movq %r10, 184(%rdi)
  35521. sbbq %r9, %r8
  35522. movq 200(%rdx), %r10
  35523. movq 200(%rsi), %r9
  35524. pextq %rcx, %r10, %r10
  35525. movq %r8, 192(%rdi)
  35526. sbbq %r10, %r9
  35527. movq 208(%rdx), %r8
  35528. movq 208(%rsi), %r10
  35529. pextq %rcx, %r8, %r8
  35530. movq %r9, 200(%rdi)
  35531. sbbq %r8, %r10
  35532. movq 216(%rdx), %r9
  35533. movq 216(%rsi), %r8
  35534. pextq %rcx, %r9, %r9
  35535. movq %r10, 208(%rdi)
  35536. sbbq %r9, %r8
  35537. movq 224(%rdx), %r10
  35538. movq 224(%rsi), %r9
  35539. pextq %rcx, %r10, %r10
  35540. movq %r8, 216(%rdi)
  35541. sbbq %r10, %r9
  35542. movq 232(%rdx), %r8
  35543. movq 232(%rsi), %r10
  35544. pextq %rcx, %r8, %r8
  35545. movq %r9, 224(%rdi)
  35546. sbbq %r8, %r10
  35547. movq 240(%rdx), %r9
  35548. movq 240(%rsi), %r8
  35549. pextq %rcx, %r9, %r9
  35550. movq %r10, 232(%rdi)
  35551. sbbq %r9, %r8
  35552. movq 248(%rdx), %r10
  35553. movq 248(%rsi), %r9
  35554. pextq %rcx, %r10, %r10
  35555. movq %r8, 240(%rdi)
  35556. sbbq %r10, %r9
  35557. movq 256(%rdx), %r8
  35558. movq 256(%rsi), %r10
  35559. pextq %rcx, %r8, %r8
  35560. movq %r9, 248(%rdi)
  35561. sbbq %r8, %r10
  35562. movq 264(%rdx), %r9
  35563. movq 264(%rsi), %r8
  35564. pextq %rcx, %r9, %r9
  35565. movq %r10, 256(%rdi)
  35566. sbbq %r9, %r8
  35567. movq 272(%rdx), %r10
  35568. movq 272(%rsi), %r9
  35569. pextq %rcx, %r10, %r10
  35570. movq %r8, 264(%rdi)
  35571. sbbq %r10, %r9
  35572. movq 280(%rdx), %r8
  35573. movq 280(%rsi), %r10
  35574. pextq %rcx, %r8, %r8
  35575. movq %r9, 272(%rdi)
  35576. sbbq %r8, %r10
  35577. movq 288(%rdx), %r9
  35578. movq 288(%rsi), %r8
  35579. pextq %rcx, %r9, %r9
  35580. movq %r10, 280(%rdi)
  35581. sbbq %r9, %r8
  35582. movq 296(%rdx), %r10
  35583. movq 296(%rsi), %r9
  35584. pextq %rcx, %r10, %r10
  35585. movq %r8, 288(%rdi)
  35586. sbbq %r10, %r9
  35587. movq 304(%rdx), %r8
  35588. movq 304(%rsi), %r10
  35589. pextq %rcx, %r8, %r8
  35590. movq %r9, 296(%rdi)
  35591. sbbq %r8, %r10
  35592. movq 312(%rdx), %r9
  35593. movq 312(%rsi), %r8
  35594. pextq %rcx, %r9, %r9
  35595. movq %r10, 304(%rdi)
  35596. sbbq %r9, %r8
  35597. movq 320(%rdx), %r10
  35598. movq 320(%rsi), %r9
  35599. pextq %rcx, %r10, %r10
  35600. movq %r8, 312(%rdi)
  35601. sbbq %r10, %r9
  35602. movq 328(%rdx), %r8
  35603. movq 328(%rsi), %r10
  35604. pextq %rcx, %r8, %r8
  35605. movq %r9, 320(%rdi)
  35606. sbbq %r8, %r10
  35607. movq 336(%rdx), %r9
  35608. movq 336(%rsi), %r8
  35609. pextq %rcx, %r9, %r9
  35610. movq %r10, 328(%rdi)
  35611. sbbq %r9, %r8
  35612. movq 344(%rdx), %r10
  35613. movq 344(%rsi), %r9
  35614. pextq %rcx, %r10, %r10
  35615. movq %r8, 336(%rdi)
  35616. sbbq %r10, %r9
  35617. movq 352(%rdx), %r8
  35618. movq 352(%rsi), %r10
  35619. pextq %rcx, %r8, %r8
  35620. movq %r9, 344(%rdi)
  35621. sbbq %r8, %r10
  35622. movq 360(%rdx), %r9
  35623. movq 360(%rsi), %r8
  35624. pextq %rcx, %r9, %r9
  35625. movq %r10, 352(%rdi)
  35626. sbbq %r9, %r8
  35627. movq 368(%rdx), %r10
  35628. movq 368(%rsi), %r9
  35629. pextq %rcx, %r10, %r10
  35630. movq %r8, 360(%rdi)
  35631. sbbq %r10, %r9
  35632. movq 376(%rdx), %r8
  35633. movq 376(%rsi), %r10
  35634. pextq %rcx, %r8, %r8
  35635. movq %r9, 368(%rdi)
  35636. sbbq %r8, %r10
  35637. movq 384(%rdx), %r9
  35638. movq 384(%rsi), %r8
  35639. pextq %rcx, %r9, %r9
  35640. movq %r10, 376(%rdi)
  35641. sbbq %r9, %r8
  35642. movq 392(%rdx), %r10
  35643. movq 392(%rsi), %r9
  35644. pextq %rcx, %r10, %r10
  35645. movq %r8, 384(%rdi)
  35646. sbbq %r10, %r9
  35647. movq 400(%rdx), %r8
  35648. movq 400(%rsi), %r10
  35649. pextq %rcx, %r8, %r8
  35650. movq %r9, 392(%rdi)
  35651. sbbq %r8, %r10
  35652. movq 408(%rdx), %r9
  35653. movq 408(%rsi), %r8
  35654. pextq %rcx, %r9, %r9
  35655. movq %r10, 400(%rdi)
  35656. sbbq %r9, %r8
  35657. movq 416(%rdx), %r10
  35658. movq 416(%rsi), %r9
  35659. pextq %rcx, %r10, %r10
  35660. movq %r8, 408(%rdi)
  35661. sbbq %r10, %r9
  35662. movq 424(%rdx), %r8
  35663. movq 424(%rsi), %r10
  35664. pextq %rcx, %r8, %r8
  35665. movq %r9, 416(%rdi)
  35666. sbbq %r8, %r10
  35667. movq 432(%rdx), %r9
  35668. movq 432(%rsi), %r8
  35669. pextq %rcx, %r9, %r9
  35670. movq %r10, 424(%rdi)
  35671. sbbq %r9, %r8
  35672. movq 440(%rdx), %r10
  35673. movq 440(%rsi), %r9
  35674. pextq %rcx, %r10, %r10
  35675. movq %r8, 432(%rdi)
  35676. sbbq %r10, %r9
  35677. movq 448(%rdx), %r8
  35678. movq 448(%rsi), %r10
  35679. pextq %rcx, %r8, %r8
  35680. movq %r9, 440(%rdi)
  35681. sbbq %r8, %r10
  35682. movq 456(%rdx), %r9
  35683. movq 456(%rsi), %r8
  35684. pextq %rcx, %r9, %r9
  35685. movq %r10, 448(%rdi)
  35686. sbbq %r9, %r8
  35687. movq 464(%rdx), %r10
  35688. movq 464(%rsi), %r9
  35689. pextq %rcx, %r10, %r10
  35690. movq %r8, 456(%rdi)
  35691. sbbq %r10, %r9
  35692. movq 472(%rdx), %r8
  35693. movq 472(%rsi), %r10
  35694. pextq %rcx, %r8, %r8
  35695. movq %r9, 464(%rdi)
  35696. sbbq %r8, %r10
  35697. movq 480(%rdx), %r9
  35698. movq 480(%rsi), %r8
  35699. pextq %rcx, %r9, %r9
  35700. movq %r10, 472(%rdi)
  35701. sbbq %r9, %r8
  35702. movq 488(%rdx), %r10
  35703. movq 488(%rsi), %r9
  35704. pextq %rcx, %r10, %r10
  35705. movq %r8, 480(%rdi)
  35706. sbbq %r10, %r9
  35707. movq 496(%rdx), %r8
  35708. movq 496(%rsi), %r10
  35709. pextq %rcx, %r8, %r8
  35710. movq %r9, 488(%rdi)
  35711. sbbq %r8, %r10
  35712. movq 504(%rdx), %r9
  35713. movq 504(%rsi), %r8
  35714. pextq %rcx, %r9, %r9
  35715. movq %r10, 496(%rdi)
  35716. sbbq %r9, %r8
  35717. movq %r8, 504(%rdi)
  35718. sbbq $0x00, %rax
  35719. repz retq
  35720. #ifndef __APPLE__
  35721. .size sp_4096_cond_sub_avx2_64,.-sp_4096_cond_sub_avx2_64
  35722. #endif /* __APPLE__ */
  35723. #ifdef HAVE_INTEL_AVX2
  35724. /* Mul a by digit b into r. (r = a * b)
  35725. *
  35726. * r A single precision integer.
  35727. * a A single precision integer.
  35728. * b A single precision digit.
  35729. */
  35730. #ifndef __APPLE__
  35731. .text
  35732. .globl sp_4096_mul_d_avx2_64
  35733. .type sp_4096_mul_d_avx2_64,@function
  35734. .align 16
  35735. sp_4096_mul_d_avx2_64:
  35736. #else
  35737. .section __TEXT,__text
  35738. .globl _sp_4096_mul_d_avx2_64
  35739. .p2align 4
  35740. _sp_4096_mul_d_avx2_64:
  35741. #endif /* __APPLE__ */
  35742. movq %rdx, %rax
  35743. # A[0] * B
  35744. movq %rax, %rdx
  35745. xorq %r11, %r11
  35746. mulxq (%rsi), %r9, %r10
  35747. movq %r9, (%rdi)
  35748. # A[1] * B
  35749. mulxq 8(%rsi), %rcx, %r8
  35750. movq %r11, %r9
  35751. adcxq %rcx, %r10
  35752. movq %r10, 8(%rdi)
  35753. adoxq %r8, %r9
  35754. # A[2] * B
  35755. mulxq 16(%rsi), %rcx, %r8
  35756. movq %r11, %r10
  35757. adcxq %rcx, %r9
  35758. movq %r9, 16(%rdi)
  35759. adoxq %r8, %r10
  35760. # A[3] * B
  35761. mulxq 24(%rsi), %rcx, %r8
  35762. movq %r11, %r9
  35763. adcxq %rcx, %r10
  35764. movq %r10, 24(%rdi)
  35765. adoxq %r8, %r9
  35766. # A[4] * B
  35767. mulxq 32(%rsi), %rcx, %r8
  35768. movq %r11, %r10
  35769. adcxq %rcx, %r9
  35770. movq %r9, 32(%rdi)
  35771. adoxq %r8, %r10
  35772. # A[5] * B
  35773. mulxq 40(%rsi), %rcx, %r8
  35774. movq %r11, %r9
  35775. adcxq %rcx, %r10
  35776. movq %r10, 40(%rdi)
  35777. adoxq %r8, %r9
  35778. # A[6] * B
  35779. mulxq 48(%rsi), %rcx, %r8
  35780. movq %r11, %r10
  35781. adcxq %rcx, %r9
  35782. movq %r9, 48(%rdi)
  35783. adoxq %r8, %r10
  35784. # A[7] * B
  35785. mulxq 56(%rsi), %rcx, %r8
  35786. movq %r11, %r9
  35787. adcxq %rcx, %r10
  35788. movq %r10, 56(%rdi)
  35789. adoxq %r8, %r9
  35790. # A[8] * B
  35791. mulxq 64(%rsi), %rcx, %r8
  35792. movq %r11, %r10
  35793. adcxq %rcx, %r9
  35794. movq %r9, 64(%rdi)
  35795. adoxq %r8, %r10
  35796. # A[9] * B
  35797. mulxq 72(%rsi), %rcx, %r8
  35798. movq %r11, %r9
  35799. adcxq %rcx, %r10
  35800. movq %r10, 72(%rdi)
  35801. adoxq %r8, %r9
  35802. # A[10] * B
  35803. mulxq 80(%rsi), %rcx, %r8
  35804. movq %r11, %r10
  35805. adcxq %rcx, %r9
  35806. movq %r9, 80(%rdi)
  35807. adoxq %r8, %r10
  35808. # A[11] * B
  35809. mulxq 88(%rsi), %rcx, %r8
  35810. movq %r11, %r9
  35811. adcxq %rcx, %r10
  35812. movq %r10, 88(%rdi)
  35813. adoxq %r8, %r9
  35814. # A[12] * B
  35815. mulxq 96(%rsi), %rcx, %r8
  35816. movq %r11, %r10
  35817. adcxq %rcx, %r9
  35818. movq %r9, 96(%rdi)
  35819. adoxq %r8, %r10
  35820. # A[13] * B
  35821. mulxq 104(%rsi), %rcx, %r8
  35822. movq %r11, %r9
  35823. adcxq %rcx, %r10
  35824. movq %r10, 104(%rdi)
  35825. adoxq %r8, %r9
  35826. # A[14] * B
  35827. mulxq 112(%rsi), %rcx, %r8
  35828. movq %r11, %r10
  35829. adcxq %rcx, %r9
  35830. movq %r9, 112(%rdi)
  35831. adoxq %r8, %r10
  35832. # A[15] * B
  35833. mulxq 120(%rsi), %rcx, %r8
  35834. movq %r11, %r9
  35835. adcxq %rcx, %r10
  35836. movq %r10, 120(%rdi)
  35837. adoxq %r8, %r9
  35838. # A[16] * B
  35839. mulxq 128(%rsi), %rcx, %r8
  35840. movq %r11, %r10
  35841. adcxq %rcx, %r9
  35842. movq %r9, 128(%rdi)
  35843. adoxq %r8, %r10
  35844. # A[17] * B
  35845. mulxq 136(%rsi), %rcx, %r8
  35846. movq %r11, %r9
  35847. adcxq %rcx, %r10
  35848. movq %r10, 136(%rdi)
  35849. adoxq %r8, %r9
  35850. # A[18] * B
  35851. mulxq 144(%rsi), %rcx, %r8
  35852. movq %r11, %r10
  35853. adcxq %rcx, %r9
  35854. movq %r9, 144(%rdi)
  35855. adoxq %r8, %r10
  35856. # A[19] * B
  35857. mulxq 152(%rsi), %rcx, %r8
  35858. movq %r11, %r9
  35859. adcxq %rcx, %r10
  35860. movq %r10, 152(%rdi)
  35861. adoxq %r8, %r9
  35862. # A[20] * B
  35863. mulxq 160(%rsi), %rcx, %r8
  35864. movq %r11, %r10
  35865. adcxq %rcx, %r9
  35866. movq %r9, 160(%rdi)
  35867. adoxq %r8, %r10
  35868. # A[21] * B
  35869. mulxq 168(%rsi), %rcx, %r8
  35870. movq %r11, %r9
  35871. adcxq %rcx, %r10
  35872. movq %r10, 168(%rdi)
  35873. adoxq %r8, %r9
  35874. # A[22] * B
  35875. mulxq 176(%rsi), %rcx, %r8
  35876. movq %r11, %r10
  35877. adcxq %rcx, %r9
  35878. movq %r9, 176(%rdi)
  35879. adoxq %r8, %r10
  35880. # A[23] * B
  35881. mulxq 184(%rsi), %rcx, %r8
  35882. movq %r11, %r9
  35883. adcxq %rcx, %r10
  35884. movq %r10, 184(%rdi)
  35885. adoxq %r8, %r9
  35886. # A[24] * B
  35887. mulxq 192(%rsi), %rcx, %r8
  35888. movq %r11, %r10
  35889. adcxq %rcx, %r9
  35890. movq %r9, 192(%rdi)
  35891. adoxq %r8, %r10
  35892. # A[25] * B
  35893. mulxq 200(%rsi), %rcx, %r8
  35894. movq %r11, %r9
  35895. adcxq %rcx, %r10
  35896. movq %r10, 200(%rdi)
  35897. adoxq %r8, %r9
  35898. # A[26] * B
  35899. mulxq 208(%rsi), %rcx, %r8
  35900. movq %r11, %r10
  35901. adcxq %rcx, %r9
  35902. movq %r9, 208(%rdi)
  35903. adoxq %r8, %r10
  35904. # A[27] * B
  35905. mulxq 216(%rsi), %rcx, %r8
  35906. movq %r11, %r9
  35907. adcxq %rcx, %r10
  35908. movq %r10, 216(%rdi)
  35909. adoxq %r8, %r9
  35910. # A[28] * B
  35911. mulxq 224(%rsi), %rcx, %r8
  35912. movq %r11, %r10
  35913. adcxq %rcx, %r9
  35914. movq %r9, 224(%rdi)
  35915. adoxq %r8, %r10
  35916. # A[29] * B
  35917. mulxq 232(%rsi), %rcx, %r8
  35918. movq %r11, %r9
  35919. adcxq %rcx, %r10
  35920. movq %r10, 232(%rdi)
  35921. adoxq %r8, %r9
  35922. # A[30] * B
  35923. mulxq 240(%rsi), %rcx, %r8
  35924. movq %r11, %r10
  35925. adcxq %rcx, %r9
  35926. movq %r9, 240(%rdi)
  35927. adoxq %r8, %r10
  35928. # A[31] * B
  35929. mulxq 248(%rsi), %rcx, %r8
  35930. movq %r11, %r9
  35931. adcxq %rcx, %r10
  35932. movq %r10, 248(%rdi)
  35933. adoxq %r8, %r9
  35934. # A[32] * B
  35935. mulxq 256(%rsi), %rcx, %r8
  35936. movq %r11, %r10
  35937. adcxq %rcx, %r9
  35938. movq %r9, 256(%rdi)
  35939. adoxq %r8, %r10
  35940. # A[33] * B
  35941. mulxq 264(%rsi), %rcx, %r8
  35942. movq %r11, %r9
  35943. adcxq %rcx, %r10
  35944. movq %r10, 264(%rdi)
  35945. adoxq %r8, %r9
  35946. # A[34] * B
  35947. mulxq 272(%rsi), %rcx, %r8
  35948. movq %r11, %r10
  35949. adcxq %rcx, %r9
  35950. movq %r9, 272(%rdi)
  35951. adoxq %r8, %r10
  35952. # A[35] * B
  35953. mulxq 280(%rsi), %rcx, %r8
  35954. movq %r11, %r9
  35955. adcxq %rcx, %r10
  35956. movq %r10, 280(%rdi)
  35957. adoxq %r8, %r9
  35958. # A[36] * B
  35959. mulxq 288(%rsi), %rcx, %r8
  35960. movq %r11, %r10
  35961. adcxq %rcx, %r9
  35962. movq %r9, 288(%rdi)
  35963. adoxq %r8, %r10
  35964. # A[37] * B
  35965. mulxq 296(%rsi), %rcx, %r8
  35966. movq %r11, %r9
  35967. adcxq %rcx, %r10
  35968. movq %r10, 296(%rdi)
  35969. adoxq %r8, %r9
  35970. # A[38] * B
  35971. mulxq 304(%rsi), %rcx, %r8
  35972. movq %r11, %r10
  35973. adcxq %rcx, %r9
  35974. movq %r9, 304(%rdi)
  35975. adoxq %r8, %r10
  35976. # A[39] * B
  35977. mulxq 312(%rsi), %rcx, %r8
  35978. movq %r11, %r9
  35979. adcxq %rcx, %r10
  35980. movq %r10, 312(%rdi)
  35981. adoxq %r8, %r9
  35982. # A[40] * B
  35983. mulxq 320(%rsi), %rcx, %r8
  35984. movq %r11, %r10
  35985. adcxq %rcx, %r9
  35986. movq %r9, 320(%rdi)
  35987. adoxq %r8, %r10
  35988. # A[41] * B
  35989. mulxq 328(%rsi), %rcx, %r8
  35990. movq %r11, %r9
  35991. adcxq %rcx, %r10
  35992. movq %r10, 328(%rdi)
  35993. adoxq %r8, %r9
  35994. # A[42] * B
  35995. mulxq 336(%rsi), %rcx, %r8
  35996. movq %r11, %r10
  35997. adcxq %rcx, %r9
  35998. movq %r9, 336(%rdi)
  35999. adoxq %r8, %r10
  36000. # A[43] * B
  36001. mulxq 344(%rsi), %rcx, %r8
  36002. movq %r11, %r9
  36003. adcxq %rcx, %r10
  36004. movq %r10, 344(%rdi)
  36005. adoxq %r8, %r9
  36006. # A[44] * B
  36007. mulxq 352(%rsi), %rcx, %r8
  36008. movq %r11, %r10
  36009. adcxq %rcx, %r9
  36010. movq %r9, 352(%rdi)
  36011. adoxq %r8, %r10
  36012. # A[45] * B
  36013. mulxq 360(%rsi), %rcx, %r8
  36014. movq %r11, %r9
  36015. adcxq %rcx, %r10
  36016. movq %r10, 360(%rdi)
  36017. adoxq %r8, %r9
  36018. # A[46] * B
  36019. mulxq 368(%rsi), %rcx, %r8
  36020. movq %r11, %r10
  36021. adcxq %rcx, %r9
  36022. movq %r9, 368(%rdi)
  36023. adoxq %r8, %r10
  36024. # A[47] * B
  36025. mulxq 376(%rsi), %rcx, %r8
  36026. movq %r11, %r9
  36027. adcxq %rcx, %r10
  36028. movq %r10, 376(%rdi)
  36029. adoxq %r8, %r9
  36030. # A[48] * B
  36031. mulxq 384(%rsi), %rcx, %r8
  36032. movq %r11, %r10
  36033. adcxq %rcx, %r9
  36034. movq %r9, 384(%rdi)
  36035. adoxq %r8, %r10
  36036. # A[49] * B
  36037. mulxq 392(%rsi), %rcx, %r8
  36038. movq %r11, %r9
  36039. adcxq %rcx, %r10
  36040. movq %r10, 392(%rdi)
  36041. adoxq %r8, %r9
  36042. # A[50] * B
  36043. mulxq 400(%rsi), %rcx, %r8
  36044. movq %r11, %r10
  36045. adcxq %rcx, %r9
  36046. movq %r9, 400(%rdi)
  36047. adoxq %r8, %r10
  36048. # A[51] * B
  36049. mulxq 408(%rsi), %rcx, %r8
  36050. movq %r11, %r9
  36051. adcxq %rcx, %r10
  36052. movq %r10, 408(%rdi)
  36053. adoxq %r8, %r9
  36054. # A[52] * B
  36055. mulxq 416(%rsi), %rcx, %r8
  36056. movq %r11, %r10
  36057. adcxq %rcx, %r9
  36058. movq %r9, 416(%rdi)
  36059. adoxq %r8, %r10
  36060. # A[53] * B
  36061. mulxq 424(%rsi), %rcx, %r8
  36062. movq %r11, %r9
  36063. adcxq %rcx, %r10
  36064. movq %r10, 424(%rdi)
  36065. adoxq %r8, %r9
  36066. # A[54] * B
  36067. mulxq 432(%rsi), %rcx, %r8
  36068. movq %r11, %r10
  36069. adcxq %rcx, %r9
  36070. movq %r9, 432(%rdi)
  36071. adoxq %r8, %r10
  36072. # A[55] * B
  36073. mulxq 440(%rsi), %rcx, %r8
  36074. movq %r11, %r9
  36075. adcxq %rcx, %r10
  36076. movq %r10, 440(%rdi)
  36077. adoxq %r8, %r9
  36078. # A[56] * B
  36079. mulxq 448(%rsi), %rcx, %r8
  36080. movq %r11, %r10
  36081. adcxq %rcx, %r9
  36082. movq %r9, 448(%rdi)
  36083. adoxq %r8, %r10
  36084. # A[57] * B
  36085. mulxq 456(%rsi), %rcx, %r8
  36086. movq %r11, %r9
  36087. adcxq %rcx, %r10
  36088. movq %r10, 456(%rdi)
  36089. adoxq %r8, %r9
  36090. # A[58] * B
  36091. mulxq 464(%rsi), %rcx, %r8
  36092. movq %r11, %r10
  36093. adcxq %rcx, %r9
  36094. movq %r9, 464(%rdi)
  36095. adoxq %r8, %r10
  36096. # A[59] * B
  36097. mulxq 472(%rsi), %rcx, %r8
  36098. movq %r11, %r9
  36099. adcxq %rcx, %r10
  36100. movq %r10, 472(%rdi)
  36101. adoxq %r8, %r9
  36102. # A[60] * B
  36103. mulxq 480(%rsi), %rcx, %r8
  36104. movq %r11, %r10
  36105. adcxq %rcx, %r9
  36106. movq %r9, 480(%rdi)
  36107. adoxq %r8, %r10
  36108. # A[61] * B
  36109. mulxq 488(%rsi), %rcx, %r8
  36110. movq %r11, %r9
  36111. adcxq %rcx, %r10
  36112. movq %r10, 488(%rdi)
  36113. adoxq %r8, %r9
  36114. # A[62] * B
  36115. mulxq 496(%rsi), %rcx, %r8
  36116. movq %r11, %r10
  36117. adcxq %rcx, %r9
  36118. movq %r9, 496(%rdi)
  36119. adoxq %r8, %r10
  36120. # A[63] * B
  36121. mulxq 504(%rsi), %rcx, %r8
  36122. movq %r11, %r9
  36123. adcxq %rcx, %r10
  36124. adoxq %r8, %r9
  36125. adcxq %r11, %r9
  36126. movq %r10, 504(%rdi)
  36127. movq %r9, 512(%rdi)
  36128. repz retq
  36129. #ifndef __APPLE__
  36130. .size sp_4096_mul_d_avx2_64,.-sp_4096_mul_d_avx2_64
  36131. #endif /* __APPLE__ */
  36132. #endif /* HAVE_INTEL_AVX2 */
  36133. /* Compare a with b in constant time.
  36134. *
  36135. * a A single precision integer.
  36136. * b A single precision integer.
  36137. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  36138. * respectively.
  36139. */
  36140. #ifndef __APPLE__
  36141. .text
  36142. .globl sp_4096_cmp_64
  36143. .type sp_4096_cmp_64,@function
  36144. .align 16
  36145. sp_4096_cmp_64:
  36146. #else
  36147. .section __TEXT,__text
  36148. .globl _sp_4096_cmp_64
  36149. .p2align 4
  36150. _sp_4096_cmp_64:
  36151. #endif /* __APPLE__ */
  36152. xorq %rcx, %rcx
  36153. movq $-1, %rdx
  36154. movq $-1, %rax
  36155. movq $0x01, %r8
  36156. movq 504(%rdi), %r9
  36157. movq 504(%rsi), %r10
  36158. andq %rdx, %r9
  36159. andq %rdx, %r10
  36160. subq %r10, %r9
  36161. cmova %r8, %rax
  36162. cmovc %rdx, %rax
  36163. cmovnz %rcx, %rdx
  36164. movq 496(%rdi), %r9
  36165. movq 496(%rsi), %r10
  36166. andq %rdx, %r9
  36167. andq %rdx, %r10
  36168. subq %r10, %r9
  36169. cmova %r8, %rax
  36170. cmovc %rdx, %rax
  36171. cmovnz %rcx, %rdx
  36172. movq 488(%rdi), %r9
  36173. movq 488(%rsi), %r10
  36174. andq %rdx, %r9
  36175. andq %rdx, %r10
  36176. subq %r10, %r9
  36177. cmova %r8, %rax
  36178. cmovc %rdx, %rax
  36179. cmovnz %rcx, %rdx
  36180. movq 480(%rdi), %r9
  36181. movq 480(%rsi), %r10
  36182. andq %rdx, %r9
  36183. andq %rdx, %r10
  36184. subq %r10, %r9
  36185. cmova %r8, %rax
  36186. cmovc %rdx, %rax
  36187. cmovnz %rcx, %rdx
  36188. movq 472(%rdi), %r9
  36189. movq 472(%rsi), %r10
  36190. andq %rdx, %r9
  36191. andq %rdx, %r10
  36192. subq %r10, %r9
  36193. cmova %r8, %rax
  36194. cmovc %rdx, %rax
  36195. cmovnz %rcx, %rdx
  36196. movq 464(%rdi), %r9
  36197. movq 464(%rsi), %r10
  36198. andq %rdx, %r9
  36199. andq %rdx, %r10
  36200. subq %r10, %r9
  36201. cmova %r8, %rax
  36202. cmovc %rdx, %rax
  36203. cmovnz %rcx, %rdx
  36204. movq 456(%rdi), %r9
  36205. movq 456(%rsi), %r10
  36206. andq %rdx, %r9
  36207. andq %rdx, %r10
  36208. subq %r10, %r9
  36209. cmova %r8, %rax
  36210. cmovc %rdx, %rax
  36211. cmovnz %rcx, %rdx
  36212. movq 448(%rdi), %r9
  36213. movq 448(%rsi), %r10
  36214. andq %rdx, %r9
  36215. andq %rdx, %r10
  36216. subq %r10, %r9
  36217. cmova %r8, %rax
  36218. cmovc %rdx, %rax
  36219. cmovnz %rcx, %rdx
  36220. movq 440(%rdi), %r9
  36221. movq 440(%rsi), %r10
  36222. andq %rdx, %r9
  36223. andq %rdx, %r10
  36224. subq %r10, %r9
  36225. cmova %r8, %rax
  36226. cmovc %rdx, %rax
  36227. cmovnz %rcx, %rdx
  36228. movq 432(%rdi), %r9
  36229. movq 432(%rsi), %r10
  36230. andq %rdx, %r9
  36231. andq %rdx, %r10
  36232. subq %r10, %r9
  36233. cmova %r8, %rax
  36234. cmovc %rdx, %rax
  36235. cmovnz %rcx, %rdx
  36236. movq 424(%rdi), %r9
  36237. movq 424(%rsi), %r10
  36238. andq %rdx, %r9
  36239. andq %rdx, %r10
  36240. subq %r10, %r9
  36241. cmova %r8, %rax
  36242. cmovc %rdx, %rax
  36243. cmovnz %rcx, %rdx
  36244. movq 416(%rdi), %r9
  36245. movq 416(%rsi), %r10
  36246. andq %rdx, %r9
  36247. andq %rdx, %r10
  36248. subq %r10, %r9
  36249. cmova %r8, %rax
  36250. cmovc %rdx, %rax
  36251. cmovnz %rcx, %rdx
  36252. movq 408(%rdi), %r9
  36253. movq 408(%rsi), %r10
  36254. andq %rdx, %r9
  36255. andq %rdx, %r10
  36256. subq %r10, %r9
  36257. cmova %r8, %rax
  36258. cmovc %rdx, %rax
  36259. cmovnz %rcx, %rdx
  36260. movq 400(%rdi), %r9
  36261. movq 400(%rsi), %r10
  36262. andq %rdx, %r9
  36263. andq %rdx, %r10
  36264. subq %r10, %r9
  36265. cmova %r8, %rax
  36266. cmovc %rdx, %rax
  36267. cmovnz %rcx, %rdx
  36268. movq 392(%rdi), %r9
  36269. movq 392(%rsi), %r10
  36270. andq %rdx, %r9
  36271. andq %rdx, %r10
  36272. subq %r10, %r9
  36273. cmova %r8, %rax
  36274. cmovc %rdx, %rax
  36275. cmovnz %rcx, %rdx
  36276. movq 384(%rdi), %r9
  36277. movq 384(%rsi), %r10
  36278. andq %rdx, %r9
  36279. andq %rdx, %r10
  36280. subq %r10, %r9
  36281. cmova %r8, %rax
  36282. cmovc %rdx, %rax
  36283. cmovnz %rcx, %rdx
  36284. movq 376(%rdi), %r9
  36285. movq 376(%rsi), %r10
  36286. andq %rdx, %r9
  36287. andq %rdx, %r10
  36288. subq %r10, %r9
  36289. cmova %r8, %rax
  36290. cmovc %rdx, %rax
  36291. cmovnz %rcx, %rdx
  36292. movq 368(%rdi), %r9
  36293. movq 368(%rsi), %r10
  36294. andq %rdx, %r9
  36295. andq %rdx, %r10
  36296. subq %r10, %r9
  36297. cmova %r8, %rax
  36298. cmovc %rdx, %rax
  36299. cmovnz %rcx, %rdx
  36300. movq 360(%rdi), %r9
  36301. movq 360(%rsi), %r10
  36302. andq %rdx, %r9
  36303. andq %rdx, %r10
  36304. subq %r10, %r9
  36305. cmova %r8, %rax
  36306. cmovc %rdx, %rax
  36307. cmovnz %rcx, %rdx
  36308. movq 352(%rdi), %r9
  36309. movq 352(%rsi), %r10
  36310. andq %rdx, %r9
  36311. andq %rdx, %r10
  36312. subq %r10, %r9
  36313. cmova %r8, %rax
  36314. cmovc %rdx, %rax
  36315. cmovnz %rcx, %rdx
  36316. movq 344(%rdi), %r9
  36317. movq 344(%rsi), %r10
  36318. andq %rdx, %r9
  36319. andq %rdx, %r10
  36320. subq %r10, %r9
  36321. cmova %r8, %rax
  36322. cmovc %rdx, %rax
  36323. cmovnz %rcx, %rdx
  36324. movq 336(%rdi), %r9
  36325. movq 336(%rsi), %r10
  36326. andq %rdx, %r9
  36327. andq %rdx, %r10
  36328. subq %r10, %r9
  36329. cmova %r8, %rax
  36330. cmovc %rdx, %rax
  36331. cmovnz %rcx, %rdx
  36332. movq 328(%rdi), %r9
  36333. movq 328(%rsi), %r10
  36334. andq %rdx, %r9
  36335. andq %rdx, %r10
  36336. subq %r10, %r9
  36337. cmova %r8, %rax
  36338. cmovc %rdx, %rax
  36339. cmovnz %rcx, %rdx
  36340. movq 320(%rdi), %r9
  36341. movq 320(%rsi), %r10
  36342. andq %rdx, %r9
  36343. andq %rdx, %r10
  36344. subq %r10, %r9
  36345. cmova %r8, %rax
  36346. cmovc %rdx, %rax
  36347. cmovnz %rcx, %rdx
  36348. movq 312(%rdi), %r9
  36349. movq 312(%rsi), %r10
  36350. andq %rdx, %r9
  36351. andq %rdx, %r10
  36352. subq %r10, %r9
  36353. cmova %r8, %rax
  36354. cmovc %rdx, %rax
  36355. cmovnz %rcx, %rdx
  36356. movq 304(%rdi), %r9
  36357. movq 304(%rsi), %r10
  36358. andq %rdx, %r9
  36359. andq %rdx, %r10
  36360. subq %r10, %r9
  36361. cmova %r8, %rax
  36362. cmovc %rdx, %rax
  36363. cmovnz %rcx, %rdx
  36364. movq 296(%rdi), %r9
  36365. movq 296(%rsi), %r10
  36366. andq %rdx, %r9
  36367. andq %rdx, %r10
  36368. subq %r10, %r9
  36369. cmova %r8, %rax
  36370. cmovc %rdx, %rax
  36371. cmovnz %rcx, %rdx
  36372. movq 288(%rdi), %r9
  36373. movq 288(%rsi), %r10
  36374. andq %rdx, %r9
  36375. andq %rdx, %r10
  36376. subq %r10, %r9
  36377. cmova %r8, %rax
  36378. cmovc %rdx, %rax
  36379. cmovnz %rcx, %rdx
  36380. movq 280(%rdi), %r9
  36381. movq 280(%rsi), %r10
  36382. andq %rdx, %r9
  36383. andq %rdx, %r10
  36384. subq %r10, %r9
  36385. cmova %r8, %rax
  36386. cmovc %rdx, %rax
  36387. cmovnz %rcx, %rdx
  36388. movq 272(%rdi), %r9
  36389. movq 272(%rsi), %r10
  36390. andq %rdx, %r9
  36391. andq %rdx, %r10
  36392. subq %r10, %r9
  36393. cmova %r8, %rax
  36394. cmovc %rdx, %rax
  36395. cmovnz %rcx, %rdx
  36396. movq 264(%rdi), %r9
  36397. movq 264(%rsi), %r10
  36398. andq %rdx, %r9
  36399. andq %rdx, %r10
  36400. subq %r10, %r9
  36401. cmova %r8, %rax
  36402. cmovc %rdx, %rax
  36403. cmovnz %rcx, %rdx
  36404. movq 256(%rdi), %r9
  36405. movq 256(%rsi), %r10
  36406. andq %rdx, %r9
  36407. andq %rdx, %r10
  36408. subq %r10, %r9
  36409. cmova %r8, %rax
  36410. cmovc %rdx, %rax
  36411. cmovnz %rcx, %rdx
  36412. movq 248(%rdi), %r9
  36413. movq 248(%rsi), %r10
  36414. andq %rdx, %r9
  36415. andq %rdx, %r10
  36416. subq %r10, %r9
  36417. cmova %r8, %rax
  36418. cmovc %rdx, %rax
  36419. cmovnz %rcx, %rdx
  36420. movq 240(%rdi), %r9
  36421. movq 240(%rsi), %r10
  36422. andq %rdx, %r9
  36423. andq %rdx, %r10
  36424. subq %r10, %r9
  36425. cmova %r8, %rax
  36426. cmovc %rdx, %rax
  36427. cmovnz %rcx, %rdx
  36428. movq 232(%rdi), %r9
  36429. movq 232(%rsi), %r10
  36430. andq %rdx, %r9
  36431. andq %rdx, %r10
  36432. subq %r10, %r9
  36433. cmova %r8, %rax
  36434. cmovc %rdx, %rax
  36435. cmovnz %rcx, %rdx
  36436. movq 224(%rdi), %r9
  36437. movq 224(%rsi), %r10
  36438. andq %rdx, %r9
  36439. andq %rdx, %r10
  36440. subq %r10, %r9
  36441. cmova %r8, %rax
  36442. cmovc %rdx, %rax
  36443. cmovnz %rcx, %rdx
  36444. movq 216(%rdi), %r9
  36445. movq 216(%rsi), %r10
  36446. andq %rdx, %r9
  36447. andq %rdx, %r10
  36448. subq %r10, %r9
  36449. cmova %r8, %rax
  36450. cmovc %rdx, %rax
  36451. cmovnz %rcx, %rdx
  36452. movq 208(%rdi), %r9
  36453. movq 208(%rsi), %r10
  36454. andq %rdx, %r9
  36455. andq %rdx, %r10
  36456. subq %r10, %r9
  36457. cmova %r8, %rax
  36458. cmovc %rdx, %rax
  36459. cmovnz %rcx, %rdx
  36460. movq 200(%rdi), %r9
  36461. movq 200(%rsi), %r10
  36462. andq %rdx, %r9
  36463. andq %rdx, %r10
  36464. subq %r10, %r9
  36465. cmova %r8, %rax
  36466. cmovc %rdx, %rax
  36467. cmovnz %rcx, %rdx
  36468. movq 192(%rdi), %r9
  36469. movq 192(%rsi), %r10
  36470. andq %rdx, %r9
  36471. andq %rdx, %r10
  36472. subq %r10, %r9
  36473. cmova %r8, %rax
  36474. cmovc %rdx, %rax
  36475. cmovnz %rcx, %rdx
  36476. movq 184(%rdi), %r9
  36477. movq 184(%rsi), %r10
  36478. andq %rdx, %r9
  36479. andq %rdx, %r10
  36480. subq %r10, %r9
  36481. cmova %r8, %rax
  36482. cmovc %rdx, %rax
  36483. cmovnz %rcx, %rdx
  36484. movq 176(%rdi), %r9
  36485. movq 176(%rsi), %r10
  36486. andq %rdx, %r9
  36487. andq %rdx, %r10
  36488. subq %r10, %r9
  36489. cmova %r8, %rax
  36490. cmovc %rdx, %rax
  36491. cmovnz %rcx, %rdx
  36492. movq 168(%rdi), %r9
  36493. movq 168(%rsi), %r10
  36494. andq %rdx, %r9
  36495. andq %rdx, %r10
  36496. subq %r10, %r9
  36497. cmova %r8, %rax
  36498. cmovc %rdx, %rax
  36499. cmovnz %rcx, %rdx
  36500. movq 160(%rdi), %r9
  36501. movq 160(%rsi), %r10
  36502. andq %rdx, %r9
  36503. andq %rdx, %r10
  36504. subq %r10, %r9
  36505. cmova %r8, %rax
  36506. cmovc %rdx, %rax
  36507. cmovnz %rcx, %rdx
  36508. movq 152(%rdi), %r9
  36509. movq 152(%rsi), %r10
  36510. andq %rdx, %r9
  36511. andq %rdx, %r10
  36512. subq %r10, %r9
  36513. cmova %r8, %rax
  36514. cmovc %rdx, %rax
  36515. cmovnz %rcx, %rdx
  36516. movq 144(%rdi), %r9
  36517. movq 144(%rsi), %r10
  36518. andq %rdx, %r9
  36519. andq %rdx, %r10
  36520. subq %r10, %r9
  36521. cmova %r8, %rax
  36522. cmovc %rdx, %rax
  36523. cmovnz %rcx, %rdx
  36524. movq 136(%rdi), %r9
  36525. movq 136(%rsi), %r10
  36526. andq %rdx, %r9
  36527. andq %rdx, %r10
  36528. subq %r10, %r9
  36529. cmova %r8, %rax
  36530. cmovc %rdx, %rax
  36531. cmovnz %rcx, %rdx
  36532. movq 128(%rdi), %r9
  36533. movq 128(%rsi), %r10
  36534. andq %rdx, %r9
  36535. andq %rdx, %r10
  36536. subq %r10, %r9
  36537. cmova %r8, %rax
  36538. cmovc %rdx, %rax
  36539. cmovnz %rcx, %rdx
  36540. movq 120(%rdi), %r9
  36541. movq 120(%rsi), %r10
  36542. andq %rdx, %r9
  36543. andq %rdx, %r10
  36544. subq %r10, %r9
  36545. cmova %r8, %rax
  36546. cmovc %rdx, %rax
  36547. cmovnz %rcx, %rdx
  36548. movq 112(%rdi), %r9
  36549. movq 112(%rsi), %r10
  36550. andq %rdx, %r9
  36551. andq %rdx, %r10
  36552. subq %r10, %r9
  36553. cmova %r8, %rax
  36554. cmovc %rdx, %rax
  36555. cmovnz %rcx, %rdx
  36556. movq 104(%rdi), %r9
  36557. movq 104(%rsi), %r10
  36558. andq %rdx, %r9
  36559. andq %rdx, %r10
  36560. subq %r10, %r9
  36561. cmova %r8, %rax
  36562. cmovc %rdx, %rax
  36563. cmovnz %rcx, %rdx
  36564. movq 96(%rdi), %r9
  36565. movq 96(%rsi), %r10
  36566. andq %rdx, %r9
  36567. andq %rdx, %r10
  36568. subq %r10, %r9
  36569. cmova %r8, %rax
  36570. cmovc %rdx, %rax
  36571. cmovnz %rcx, %rdx
  36572. movq 88(%rdi), %r9
  36573. movq 88(%rsi), %r10
  36574. andq %rdx, %r9
  36575. andq %rdx, %r10
  36576. subq %r10, %r9
  36577. cmova %r8, %rax
  36578. cmovc %rdx, %rax
  36579. cmovnz %rcx, %rdx
  36580. movq 80(%rdi), %r9
  36581. movq 80(%rsi), %r10
  36582. andq %rdx, %r9
  36583. andq %rdx, %r10
  36584. subq %r10, %r9
  36585. cmova %r8, %rax
  36586. cmovc %rdx, %rax
  36587. cmovnz %rcx, %rdx
  36588. movq 72(%rdi), %r9
  36589. movq 72(%rsi), %r10
  36590. andq %rdx, %r9
  36591. andq %rdx, %r10
  36592. subq %r10, %r9
  36593. cmova %r8, %rax
  36594. cmovc %rdx, %rax
  36595. cmovnz %rcx, %rdx
  36596. movq 64(%rdi), %r9
  36597. movq 64(%rsi), %r10
  36598. andq %rdx, %r9
  36599. andq %rdx, %r10
  36600. subq %r10, %r9
  36601. cmova %r8, %rax
  36602. cmovc %rdx, %rax
  36603. cmovnz %rcx, %rdx
  36604. movq 56(%rdi), %r9
  36605. movq 56(%rsi), %r10
  36606. andq %rdx, %r9
  36607. andq %rdx, %r10
  36608. subq %r10, %r9
  36609. cmova %r8, %rax
  36610. cmovc %rdx, %rax
  36611. cmovnz %rcx, %rdx
  36612. movq 48(%rdi), %r9
  36613. movq 48(%rsi), %r10
  36614. andq %rdx, %r9
  36615. andq %rdx, %r10
  36616. subq %r10, %r9
  36617. cmova %r8, %rax
  36618. cmovc %rdx, %rax
  36619. cmovnz %rcx, %rdx
  36620. movq 40(%rdi), %r9
  36621. movq 40(%rsi), %r10
  36622. andq %rdx, %r9
  36623. andq %rdx, %r10
  36624. subq %r10, %r9
  36625. cmova %r8, %rax
  36626. cmovc %rdx, %rax
  36627. cmovnz %rcx, %rdx
  36628. movq 32(%rdi), %r9
  36629. movq 32(%rsi), %r10
  36630. andq %rdx, %r9
  36631. andq %rdx, %r10
  36632. subq %r10, %r9
  36633. cmova %r8, %rax
  36634. cmovc %rdx, %rax
  36635. cmovnz %rcx, %rdx
  36636. movq 24(%rdi), %r9
  36637. movq 24(%rsi), %r10
  36638. andq %rdx, %r9
  36639. andq %rdx, %r10
  36640. subq %r10, %r9
  36641. cmova %r8, %rax
  36642. cmovc %rdx, %rax
  36643. cmovnz %rcx, %rdx
  36644. movq 16(%rdi), %r9
  36645. movq 16(%rsi), %r10
  36646. andq %rdx, %r9
  36647. andq %rdx, %r10
  36648. subq %r10, %r9
  36649. cmova %r8, %rax
  36650. cmovc %rdx, %rax
  36651. cmovnz %rcx, %rdx
  36652. movq 8(%rdi), %r9
  36653. movq 8(%rsi), %r10
  36654. andq %rdx, %r9
  36655. andq %rdx, %r10
  36656. subq %r10, %r9
  36657. cmova %r8, %rax
  36658. cmovc %rdx, %rax
  36659. cmovnz %rcx, %rdx
  36660. movq (%rdi), %r9
  36661. movq (%rsi), %r10
  36662. andq %rdx, %r9
  36663. andq %rdx, %r10
  36664. subq %r10, %r9
  36665. cmova %r8, %rax
  36666. cmovc %rdx, %rax
  36667. cmovnz %rcx, %rdx
  36668. xorq %rdx, %rax
  36669. repz retq
  36670. #ifndef __APPLE__
  36671. .size sp_4096_cmp_64,.-sp_4096_cmp_64
  36672. #endif /* __APPLE__ */
  36673. /* Sub b from a into r. (r = a - b)
  36674. *
  36675. * r A single precision integer.
  36676. * a A single precision integer.
  36677. * b A single precision integer.
  36678. */
  36679. #ifndef __APPLE__
  36680. .text
  36681. .globl sp_4096_sub_64
  36682. .type sp_4096_sub_64,@function
  36683. .align 16
  36684. sp_4096_sub_64:
  36685. #else
  36686. .section __TEXT,__text
  36687. .globl _sp_4096_sub_64
  36688. .p2align 4
  36689. _sp_4096_sub_64:
  36690. #endif /* __APPLE__ */
  36691. movq (%rsi), %rcx
  36692. xorq %rax, %rax
  36693. subq (%rdx), %rcx
  36694. movq 8(%rsi), %r8
  36695. movq %rcx, (%rdi)
  36696. sbbq 8(%rdx), %r8
  36697. movq 16(%rsi), %rcx
  36698. movq %r8, 8(%rdi)
  36699. sbbq 16(%rdx), %rcx
  36700. movq 24(%rsi), %r8
  36701. movq %rcx, 16(%rdi)
  36702. sbbq 24(%rdx), %r8
  36703. movq 32(%rsi), %rcx
  36704. movq %r8, 24(%rdi)
  36705. sbbq 32(%rdx), %rcx
  36706. movq 40(%rsi), %r8
  36707. movq %rcx, 32(%rdi)
  36708. sbbq 40(%rdx), %r8
  36709. movq 48(%rsi), %rcx
  36710. movq %r8, 40(%rdi)
  36711. sbbq 48(%rdx), %rcx
  36712. movq 56(%rsi), %r8
  36713. movq %rcx, 48(%rdi)
  36714. sbbq 56(%rdx), %r8
  36715. movq 64(%rsi), %rcx
  36716. movq %r8, 56(%rdi)
  36717. sbbq 64(%rdx), %rcx
  36718. movq 72(%rsi), %r8
  36719. movq %rcx, 64(%rdi)
  36720. sbbq 72(%rdx), %r8
  36721. movq 80(%rsi), %rcx
  36722. movq %r8, 72(%rdi)
  36723. sbbq 80(%rdx), %rcx
  36724. movq 88(%rsi), %r8
  36725. movq %rcx, 80(%rdi)
  36726. sbbq 88(%rdx), %r8
  36727. movq 96(%rsi), %rcx
  36728. movq %r8, 88(%rdi)
  36729. sbbq 96(%rdx), %rcx
  36730. movq 104(%rsi), %r8
  36731. movq %rcx, 96(%rdi)
  36732. sbbq 104(%rdx), %r8
  36733. movq 112(%rsi), %rcx
  36734. movq %r8, 104(%rdi)
  36735. sbbq 112(%rdx), %rcx
  36736. movq 120(%rsi), %r8
  36737. movq %rcx, 112(%rdi)
  36738. sbbq 120(%rdx), %r8
  36739. movq 128(%rsi), %rcx
  36740. movq %r8, 120(%rdi)
  36741. sbbq 128(%rdx), %rcx
  36742. movq 136(%rsi), %r8
  36743. movq %rcx, 128(%rdi)
  36744. sbbq 136(%rdx), %r8
  36745. movq 144(%rsi), %rcx
  36746. movq %r8, 136(%rdi)
  36747. sbbq 144(%rdx), %rcx
  36748. movq 152(%rsi), %r8
  36749. movq %rcx, 144(%rdi)
  36750. sbbq 152(%rdx), %r8
  36751. movq 160(%rsi), %rcx
  36752. movq %r8, 152(%rdi)
  36753. sbbq 160(%rdx), %rcx
  36754. movq 168(%rsi), %r8
  36755. movq %rcx, 160(%rdi)
  36756. sbbq 168(%rdx), %r8
  36757. movq 176(%rsi), %rcx
  36758. movq %r8, 168(%rdi)
  36759. sbbq 176(%rdx), %rcx
  36760. movq 184(%rsi), %r8
  36761. movq %rcx, 176(%rdi)
  36762. sbbq 184(%rdx), %r8
  36763. movq 192(%rsi), %rcx
  36764. movq %r8, 184(%rdi)
  36765. sbbq 192(%rdx), %rcx
  36766. movq 200(%rsi), %r8
  36767. movq %rcx, 192(%rdi)
  36768. sbbq 200(%rdx), %r8
  36769. movq 208(%rsi), %rcx
  36770. movq %r8, 200(%rdi)
  36771. sbbq 208(%rdx), %rcx
  36772. movq 216(%rsi), %r8
  36773. movq %rcx, 208(%rdi)
  36774. sbbq 216(%rdx), %r8
  36775. movq 224(%rsi), %rcx
  36776. movq %r8, 216(%rdi)
  36777. sbbq 224(%rdx), %rcx
  36778. movq 232(%rsi), %r8
  36779. movq %rcx, 224(%rdi)
  36780. sbbq 232(%rdx), %r8
  36781. movq 240(%rsi), %rcx
  36782. movq %r8, 232(%rdi)
  36783. sbbq 240(%rdx), %rcx
  36784. movq 248(%rsi), %r8
  36785. movq %rcx, 240(%rdi)
  36786. sbbq 248(%rdx), %r8
  36787. movq 256(%rsi), %rcx
  36788. movq %r8, 248(%rdi)
  36789. sbbq 256(%rdx), %rcx
  36790. movq 264(%rsi), %r8
  36791. movq %rcx, 256(%rdi)
  36792. sbbq 264(%rdx), %r8
  36793. movq 272(%rsi), %rcx
  36794. movq %r8, 264(%rdi)
  36795. sbbq 272(%rdx), %rcx
  36796. movq 280(%rsi), %r8
  36797. movq %rcx, 272(%rdi)
  36798. sbbq 280(%rdx), %r8
  36799. movq 288(%rsi), %rcx
  36800. movq %r8, 280(%rdi)
  36801. sbbq 288(%rdx), %rcx
  36802. movq 296(%rsi), %r8
  36803. movq %rcx, 288(%rdi)
  36804. sbbq 296(%rdx), %r8
  36805. movq 304(%rsi), %rcx
  36806. movq %r8, 296(%rdi)
  36807. sbbq 304(%rdx), %rcx
  36808. movq 312(%rsi), %r8
  36809. movq %rcx, 304(%rdi)
  36810. sbbq 312(%rdx), %r8
  36811. movq 320(%rsi), %rcx
  36812. movq %r8, 312(%rdi)
  36813. sbbq 320(%rdx), %rcx
  36814. movq 328(%rsi), %r8
  36815. movq %rcx, 320(%rdi)
  36816. sbbq 328(%rdx), %r8
  36817. movq 336(%rsi), %rcx
  36818. movq %r8, 328(%rdi)
  36819. sbbq 336(%rdx), %rcx
  36820. movq 344(%rsi), %r8
  36821. movq %rcx, 336(%rdi)
  36822. sbbq 344(%rdx), %r8
  36823. movq 352(%rsi), %rcx
  36824. movq %r8, 344(%rdi)
  36825. sbbq 352(%rdx), %rcx
  36826. movq 360(%rsi), %r8
  36827. movq %rcx, 352(%rdi)
  36828. sbbq 360(%rdx), %r8
  36829. movq 368(%rsi), %rcx
  36830. movq %r8, 360(%rdi)
  36831. sbbq 368(%rdx), %rcx
  36832. movq 376(%rsi), %r8
  36833. movq %rcx, 368(%rdi)
  36834. sbbq 376(%rdx), %r8
  36835. movq 384(%rsi), %rcx
  36836. movq %r8, 376(%rdi)
  36837. sbbq 384(%rdx), %rcx
  36838. movq 392(%rsi), %r8
  36839. movq %rcx, 384(%rdi)
  36840. sbbq 392(%rdx), %r8
  36841. movq 400(%rsi), %rcx
  36842. movq %r8, 392(%rdi)
  36843. sbbq 400(%rdx), %rcx
  36844. movq 408(%rsi), %r8
  36845. movq %rcx, 400(%rdi)
  36846. sbbq 408(%rdx), %r8
  36847. movq 416(%rsi), %rcx
  36848. movq %r8, 408(%rdi)
  36849. sbbq 416(%rdx), %rcx
  36850. movq 424(%rsi), %r8
  36851. movq %rcx, 416(%rdi)
  36852. sbbq 424(%rdx), %r8
  36853. movq 432(%rsi), %rcx
  36854. movq %r8, 424(%rdi)
  36855. sbbq 432(%rdx), %rcx
  36856. movq 440(%rsi), %r8
  36857. movq %rcx, 432(%rdi)
  36858. sbbq 440(%rdx), %r8
  36859. movq 448(%rsi), %rcx
  36860. movq %r8, 440(%rdi)
  36861. sbbq 448(%rdx), %rcx
  36862. movq 456(%rsi), %r8
  36863. movq %rcx, 448(%rdi)
  36864. sbbq 456(%rdx), %r8
  36865. movq 464(%rsi), %rcx
  36866. movq %r8, 456(%rdi)
  36867. sbbq 464(%rdx), %rcx
  36868. movq 472(%rsi), %r8
  36869. movq %rcx, 464(%rdi)
  36870. sbbq 472(%rdx), %r8
  36871. movq 480(%rsi), %rcx
  36872. movq %r8, 472(%rdi)
  36873. sbbq 480(%rdx), %rcx
  36874. movq 488(%rsi), %r8
  36875. movq %rcx, 480(%rdi)
  36876. sbbq 488(%rdx), %r8
  36877. movq 496(%rsi), %rcx
  36878. movq %r8, 488(%rdi)
  36879. sbbq 496(%rdx), %rcx
  36880. movq 504(%rsi), %r8
  36881. movq %rcx, 496(%rdi)
  36882. sbbq 504(%rdx), %r8
  36883. movq %r8, 504(%rdi)
  36884. sbbq $0x00, %rax
  36885. repz retq
  36886. #ifndef __APPLE__
  36887. .size sp_4096_sub_64,.-sp_4096_sub_64
  36888. #endif /* __APPLE__ */
  36889. #ifdef HAVE_INTEL_AVX2
  36890. /* Reduce the number back to 4096 bits using Montgomery reduction.
  36891. *
  36892. * a A single precision number to reduce in place.
  36893. * m The single precision number representing the modulus.
  36894. * mp The digit representing the negative inverse of m mod 2^n.
  36895. */
  36896. #ifndef __APPLE__
  36897. .text
  36898. .globl sp_4096_mont_reduce_avx2_64
  36899. .type sp_4096_mont_reduce_avx2_64,@function
  36900. .align 16
  36901. sp_4096_mont_reduce_avx2_64:
  36902. #else
  36903. .section __TEXT,__text
  36904. .globl _sp_4096_mont_reduce_avx2_64
  36905. .p2align 4
  36906. _sp_4096_mont_reduce_avx2_64:
  36907. #endif /* __APPLE__ */
  36908. pushq %r12
  36909. pushq %r13
  36910. pushq %r14
  36911. movq %rdx, %r8
  36912. xorq %r14, %r14
  36913. # i = 64
  36914. movq $0x40, %r9
  36915. movq (%rdi), %r13
  36916. addq $0x100, %rdi
  36917. xorq %r12, %r12
  36918. L_mont_loop_avx2_64:
  36919. # mu = a[i] * mp
  36920. movq %r13, %rdx
  36921. movq %r13, %r10
  36922. imulq %r8, %rdx
  36923. xorq %r12, %r12
  36924. # a[i+0] += m[0] * mu
  36925. mulxq (%rsi), %rax, %rcx
  36926. movq -248(%rdi), %r13
  36927. adcxq %rax, %r10
  36928. adoxq %rcx, %r13
  36929. # a[i+1] += m[1] * mu
  36930. mulxq 8(%rsi), %rax, %rcx
  36931. movq -240(%rdi), %r10
  36932. adcxq %rax, %r13
  36933. adoxq %rcx, %r10
  36934. # a[i+2] += m[2] * mu
  36935. mulxq 16(%rsi), %rax, %rcx
  36936. movq -232(%rdi), %r11
  36937. adcxq %rax, %r10
  36938. adoxq %rcx, %r11
  36939. movq %r10, -240(%rdi)
  36940. # a[i+3] += m[3] * mu
  36941. mulxq 24(%rsi), %rax, %rcx
  36942. movq -224(%rdi), %r10
  36943. adcxq %rax, %r11
  36944. adoxq %rcx, %r10
  36945. movq %r11, -232(%rdi)
  36946. # a[i+4] += m[4] * mu
  36947. mulxq 32(%rsi), %rax, %rcx
  36948. movq -216(%rdi), %r11
  36949. adcxq %rax, %r10
  36950. adoxq %rcx, %r11
  36951. movq %r10, -224(%rdi)
  36952. # a[i+5] += m[5] * mu
  36953. mulxq 40(%rsi), %rax, %rcx
  36954. movq -208(%rdi), %r10
  36955. adcxq %rax, %r11
  36956. adoxq %rcx, %r10
  36957. movq %r11, -216(%rdi)
  36958. # a[i+6] += m[6] * mu
  36959. mulxq 48(%rsi), %rax, %rcx
  36960. movq -200(%rdi), %r11
  36961. adcxq %rax, %r10
  36962. adoxq %rcx, %r11
  36963. movq %r10, -208(%rdi)
  36964. # a[i+7] += m[7] * mu
  36965. mulxq 56(%rsi), %rax, %rcx
  36966. movq -192(%rdi), %r10
  36967. adcxq %rax, %r11
  36968. adoxq %rcx, %r10
  36969. movq %r11, -200(%rdi)
  36970. # a[i+8] += m[8] * mu
  36971. mulxq 64(%rsi), %rax, %rcx
  36972. movq -184(%rdi), %r11
  36973. adcxq %rax, %r10
  36974. adoxq %rcx, %r11
  36975. movq %r10, -192(%rdi)
  36976. # a[i+9] += m[9] * mu
  36977. mulxq 72(%rsi), %rax, %rcx
  36978. movq -176(%rdi), %r10
  36979. adcxq %rax, %r11
  36980. adoxq %rcx, %r10
  36981. movq %r11, -184(%rdi)
  36982. # a[i+10] += m[10] * mu
  36983. mulxq 80(%rsi), %rax, %rcx
  36984. movq -168(%rdi), %r11
  36985. adcxq %rax, %r10
  36986. adoxq %rcx, %r11
  36987. movq %r10, -176(%rdi)
  36988. # a[i+11] += m[11] * mu
  36989. mulxq 88(%rsi), %rax, %rcx
  36990. movq -160(%rdi), %r10
  36991. adcxq %rax, %r11
  36992. adoxq %rcx, %r10
  36993. movq %r11, -168(%rdi)
  36994. # a[i+12] += m[12] * mu
  36995. mulxq 96(%rsi), %rax, %rcx
  36996. movq -152(%rdi), %r11
  36997. adcxq %rax, %r10
  36998. adoxq %rcx, %r11
  36999. movq %r10, -160(%rdi)
  37000. # a[i+13] += m[13] * mu
  37001. mulxq 104(%rsi), %rax, %rcx
  37002. movq -144(%rdi), %r10
  37003. adcxq %rax, %r11
  37004. adoxq %rcx, %r10
  37005. movq %r11, -152(%rdi)
  37006. # a[i+14] += m[14] * mu
  37007. mulxq 112(%rsi), %rax, %rcx
  37008. movq -136(%rdi), %r11
  37009. adcxq %rax, %r10
  37010. adoxq %rcx, %r11
  37011. movq %r10, -144(%rdi)
  37012. # a[i+15] += m[15] * mu
  37013. mulxq 120(%rsi), %rax, %rcx
  37014. movq -128(%rdi), %r10
  37015. adcxq %rax, %r11
  37016. adoxq %rcx, %r10
  37017. movq %r11, -136(%rdi)
  37018. # a[i+16] += m[16] * mu
  37019. mulxq 128(%rsi), %rax, %rcx
  37020. movq -120(%rdi), %r11
  37021. adcxq %rax, %r10
  37022. adoxq %rcx, %r11
  37023. movq %r10, -128(%rdi)
  37024. # a[i+17] += m[17] * mu
  37025. mulxq 136(%rsi), %rax, %rcx
  37026. movq -112(%rdi), %r10
  37027. adcxq %rax, %r11
  37028. adoxq %rcx, %r10
  37029. movq %r11, -120(%rdi)
  37030. # a[i+18] += m[18] * mu
  37031. mulxq 144(%rsi), %rax, %rcx
  37032. movq -104(%rdi), %r11
  37033. adcxq %rax, %r10
  37034. adoxq %rcx, %r11
  37035. movq %r10, -112(%rdi)
  37036. # a[i+19] += m[19] * mu
  37037. mulxq 152(%rsi), %rax, %rcx
  37038. movq -96(%rdi), %r10
  37039. adcxq %rax, %r11
  37040. adoxq %rcx, %r10
  37041. movq %r11, -104(%rdi)
  37042. # a[i+20] += m[20] * mu
  37043. mulxq 160(%rsi), %rax, %rcx
  37044. movq -88(%rdi), %r11
  37045. adcxq %rax, %r10
  37046. adoxq %rcx, %r11
  37047. movq %r10, -96(%rdi)
  37048. # a[i+21] += m[21] * mu
  37049. mulxq 168(%rsi), %rax, %rcx
  37050. movq -80(%rdi), %r10
  37051. adcxq %rax, %r11
  37052. adoxq %rcx, %r10
  37053. movq %r11, -88(%rdi)
  37054. # a[i+22] += m[22] * mu
  37055. mulxq 176(%rsi), %rax, %rcx
  37056. movq -72(%rdi), %r11
  37057. adcxq %rax, %r10
  37058. adoxq %rcx, %r11
  37059. movq %r10, -80(%rdi)
  37060. # a[i+23] += m[23] * mu
  37061. mulxq 184(%rsi), %rax, %rcx
  37062. movq -64(%rdi), %r10
  37063. adcxq %rax, %r11
  37064. adoxq %rcx, %r10
  37065. movq %r11, -72(%rdi)
  37066. # a[i+24] += m[24] * mu
  37067. mulxq 192(%rsi), %rax, %rcx
  37068. movq -56(%rdi), %r11
  37069. adcxq %rax, %r10
  37070. adoxq %rcx, %r11
  37071. movq %r10, -64(%rdi)
  37072. # a[i+25] += m[25] * mu
  37073. mulxq 200(%rsi), %rax, %rcx
  37074. movq -48(%rdi), %r10
  37075. adcxq %rax, %r11
  37076. adoxq %rcx, %r10
  37077. movq %r11, -56(%rdi)
  37078. # a[i+26] += m[26] * mu
  37079. mulxq 208(%rsi), %rax, %rcx
  37080. movq -40(%rdi), %r11
  37081. adcxq %rax, %r10
  37082. adoxq %rcx, %r11
  37083. movq %r10, -48(%rdi)
  37084. # a[i+27] += m[27] * mu
  37085. mulxq 216(%rsi), %rax, %rcx
  37086. movq -32(%rdi), %r10
  37087. adcxq %rax, %r11
  37088. adoxq %rcx, %r10
  37089. movq %r11, -40(%rdi)
  37090. # a[i+28] += m[28] * mu
  37091. mulxq 224(%rsi), %rax, %rcx
  37092. movq -24(%rdi), %r11
  37093. adcxq %rax, %r10
  37094. adoxq %rcx, %r11
  37095. movq %r10, -32(%rdi)
  37096. # a[i+29] += m[29] * mu
  37097. mulxq 232(%rsi), %rax, %rcx
  37098. movq -16(%rdi), %r10
  37099. adcxq %rax, %r11
  37100. adoxq %rcx, %r10
  37101. movq %r11, -24(%rdi)
  37102. # a[i+30] += m[30] * mu
  37103. mulxq 240(%rsi), %rax, %rcx
  37104. movq -8(%rdi), %r11
  37105. adcxq %rax, %r10
  37106. adoxq %rcx, %r11
  37107. movq %r10, -16(%rdi)
  37108. # a[i+31] += m[31] * mu
  37109. mulxq 248(%rsi), %rax, %rcx
  37110. movq (%rdi), %r10
  37111. adcxq %rax, %r11
  37112. adoxq %rcx, %r10
  37113. movq %r11, -8(%rdi)
  37114. # a[i+32] += m[32] * mu
  37115. mulxq 256(%rsi), %rax, %rcx
  37116. movq 8(%rdi), %r11
  37117. adcxq %rax, %r10
  37118. adoxq %rcx, %r11
  37119. movq %r10, (%rdi)
  37120. # a[i+33] += m[33] * mu
  37121. mulxq 264(%rsi), %rax, %rcx
  37122. movq 16(%rdi), %r10
  37123. adcxq %rax, %r11
  37124. adoxq %rcx, %r10
  37125. movq %r11, 8(%rdi)
  37126. # a[i+34] += m[34] * mu
  37127. mulxq 272(%rsi), %rax, %rcx
  37128. movq 24(%rdi), %r11
  37129. adcxq %rax, %r10
  37130. adoxq %rcx, %r11
  37131. movq %r10, 16(%rdi)
  37132. # a[i+35] += m[35] * mu
  37133. mulxq 280(%rsi), %rax, %rcx
  37134. movq 32(%rdi), %r10
  37135. adcxq %rax, %r11
  37136. adoxq %rcx, %r10
  37137. movq %r11, 24(%rdi)
  37138. # a[i+36] += m[36] * mu
  37139. mulxq 288(%rsi), %rax, %rcx
  37140. movq 40(%rdi), %r11
  37141. adcxq %rax, %r10
  37142. adoxq %rcx, %r11
  37143. movq %r10, 32(%rdi)
  37144. # a[i+37] += m[37] * mu
  37145. mulxq 296(%rsi), %rax, %rcx
  37146. movq 48(%rdi), %r10
  37147. adcxq %rax, %r11
  37148. adoxq %rcx, %r10
  37149. movq %r11, 40(%rdi)
  37150. # a[i+38] += m[38] * mu
  37151. mulxq 304(%rsi), %rax, %rcx
  37152. movq 56(%rdi), %r11
  37153. adcxq %rax, %r10
  37154. adoxq %rcx, %r11
  37155. movq %r10, 48(%rdi)
  37156. # a[i+39] += m[39] * mu
  37157. mulxq 312(%rsi), %rax, %rcx
  37158. movq 64(%rdi), %r10
  37159. adcxq %rax, %r11
  37160. adoxq %rcx, %r10
  37161. movq %r11, 56(%rdi)
  37162. # a[i+40] += m[40] * mu
  37163. mulxq 320(%rsi), %rax, %rcx
  37164. movq 72(%rdi), %r11
  37165. adcxq %rax, %r10
  37166. adoxq %rcx, %r11
  37167. movq %r10, 64(%rdi)
  37168. # a[i+41] += m[41] * mu
  37169. mulxq 328(%rsi), %rax, %rcx
  37170. movq 80(%rdi), %r10
  37171. adcxq %rax, %r11
  37172. adoxq %rcx, %r10
  37173. movq %r11, 72(%rdi)
  37174. # a[i+42] += m[42] * mu
  37175. mulxq 336(%rsi), %rax, %rcx
  37176. movq 88(%rdi), %r11
  37177. adcxq %rax, %r10
  37178. adoxq %rcx, %r11
  37179. movq %r10, 80(%rdi)
  37180. # a[i+43] += m[43] * mu
  37181. mulxq 344(%rsi), %rax, %rcx
  37182. movq 96(%rdi), %r10
  37183. adcxq %rax, %r11
  37184. adoxq %rcx, %r10
  37185. movq %r11, 88(%rdi)
  37186. # a[i+44] += m[44] * mu
  37187. mulxq 352(%rsi), %rax, %rcx
  37188. movq 104(%rdi), %r11
  37189. adcxq %rax, %r10
  37190. adoxq %rcx, %r11
  37191. movq %r10, 96(%rdi)
  37192. # a[i+45] += m[45] * mu
  37193. mulxq 360(%rsi), %rax, %rcx
  37194. movq 112(%rdi), %r10
  37195. adcxq %rax, %r11
  37196. adoxq %rcx, %r10
  37197. movq %r11, 104(%rdi)
  37198. # a[i+46] += m[46] * mu
  37199. mulxq 368(%rsi), %rax, %rcx
  37200. movq 120(%rdi), %r11
  37201. adcxq %rax, %r10
  37202. adoxq %rcx, %r11
  37203. movq %r10, 112(%rdi)
  37204. # a[i+47] += m[47] * mu
  37205. mulxq 376(%rsi), %rax, %rcx
  37206. movq 128(%rdi), %r10
  37207. adcxq %rax, %r11
  37208. adoxq %rcx, %r10
  37209. movq %r11, 120(%rdi)
  37210. # a[i+48] += m[48] * mu
  37211. mulxq 384(%rsi), %rax, %rcx
  37212. movq 136(%rdi), %r11
  37213. adcxq %rax, %r10
  37214. adoxq %rcx, %r11
  37215. movq %r10, 128(%rdi)
  37216. # a[i+49] += m[49] * mu
  37217. mulxq 392(%rsi), %rax, %rcx
  37218. movq 144(%rdi), %r10
  37219. adcxq %rax, %r11
  37220. adoxq %rcx, %r10
  37221. movq %r11, 136(%rdi)
  37222. # a[i+50] += m[50] * mu
  37223. mulxq 400(%rsi), %rax, %rcx
  37224. movq 152(%rdi), %r11
  37225. adcxq %rax, %r10
  37226. adoxq %rcx, %r11
  37227. movq %r10, 144(%rdi)
  37228. # a[i+51] += m[51] * mu
  37229. mulxq 408(%rsi), %rax, %rcx
  37230. movq 160(%rdi), %r10
  37231. adcxq %rax, %r11
  37232. adoxq %rcx, %r10
  37233. movq %r11, 152(%rdi)
  37234. # a[i+52] += m[52] * mu
  37235. mulxq 416(%rsi), %rax, %rcx
  37236. movq 168(%rdi), %r11
  37237. adcxq %rax, %r10
  37238. adoxq %rcx, %r11
  37239. movq %r10, 160(%rdi)
  37240. # a[i+53] += m[53] * mu
  37241. mulxq 424(%rsi), %rax, %rcx
  37242. movq 176(%rdi), %r10
  37243. adcxq %rax, %r11
  37244. adoxq %rcx, %r10
  37245. movq %r11, 168(%rdi)
  37246. # a[i+54] += m[54] * mu
  37247. mulxq 432(%rsi), %rax, %rcx
  37248. movq 184(%rdi), %r11
  37249. adcxq %rax, %r10
  37250. adoxq %rcx, %r11
  37251. movq %r10, 176(%rdi)
  37252. # a[i+55] += m[55] * mu
  37253. mulxq 440(%rsi), %rax, %rcx
  37254. movq 192(%rdi), %r10
  37255. adcxq %rax, %r11
  37256. adoxq %rcx, %r10
  37257. movq %r11, 184(%rdi)
  37258. # a[i+56] += m[56] * mu
  37259. mulxq 448(%rsi), %rax, %rcx
  37260. movq 200(%rdi), %r11
  37261. adcxq %rax, %r10
  37262. adoxq %rcx, %r11
  37263. movq %r10, 192(%rdi)
  37264. # a[i+57] += m[57] * mu
  37265. mulxq 456(%rsi), %rax, %rcx
  37266. movq 208(%rdi), %r10
  37267. adcxq %rax, %r11
  37268. adoxq %rcx, %r10
  37269. movq %r11, 200(%rdi)
  37270. # a[i+58] += m[58] * mu
  37271. mulxq 464(%rsi), %rax, %rcx
  37272. movq 216(%rdi), %r11
  37273. adcxq %rax, %r10
  37274. adoxq %rcx, %r11
  37275. movq %r10, 208(%rdi)
  37276. # a[i+59] += m[59] * mu
  37277. mulxq 472(%rsi), %rax, %rcx
  37278. movq 224(%rdi), %r10
  37279. adcxq %rax, %r11
  37280. adoxq %rcx, %r10
  37281. movq %r11, 216(%rdi)
  37282. # a[i+60] += m[60] * mu
  37283. mulxq 480(%rsi), %rax, %rcx
  37284. movq 232(%rdi), %r11
  37285. adcxq %rax, %r10
  37286. adoxq %rcx, %r11
  37287. movq %r10, 224(%rdi)
  37288. # a[i+61] += m[61] * mu
  37289. mulxq 488(%rsi), %rax, %rcx
  37290. movq 240(%rdi), %r10
  37291. adcxq %rax, %r11
  37292. adoxq %rcx, %r10
  37293. movq %r11, 232(%rdi)
  37294. # a[i+62] += m[62] * mu
  37295. mulxq 496(%rsi), %rax, %rcx
  37296. movq 248(%rdi), %r11
  37297. adcxq %rax, %r10
  37298. adoxq %rcx, %r11
  37299. movq %r10, 240(%rdi)
  37300. # a[i+63] += m[63] * mu
  37301. mulxq 504(%rsi), %rax, %rcx
  37302. movq 256(%rdi), %r10
  37303. adcxq %rax, %r11
  37304. adoxq %rcx, %r10
  37305. movq %r11, 248(%rdi)
  37306. adcxq %r14, %r10
  37307. movq %r10, 256(%rdi)
  37308. movq %r12, %r14
  37309. adoxq %r12, %r14
  37310. adcxq %r12, %r14
  37311. # a += 1
  37312. addq $8, %rdi
  37313. # i -= 1
  37314. subq $0x01, %r9
  37315. jnz L_mont_loop_avx2_64
  37316. subq $0x100, %rdi
  37317. negq %r14
  37318. movq %rdi, %r8
  37319. subq $0x200, %rdi
  37320. movq (%rsi), %rcx
  37321. movq %r13, %rdx
  37322. pextq %r14, %rcx, %rcx
  37323. subq %rcx, %rdx
  37324. movq 8(%rsi), %rcx
  37325. movq 8(%r8), %rax
  37326. pextq %r14, %rcx, %rcx
  37327. movq %rdx, (%rdi)
  37328. sbbq %rcx, %rax
  37329. movq 16(%rsi), %rdx
  37330. movq 16(%r8), %rcx
  37331. pextq %r14, %rdx, %rdx
  37332. movq %rax, 8(%rdi)
  37333. sbbq %rdx, %rcx
  37334. movq 24(%rsi), %rax
  37335. movq 24(%r8), %rdx
  37336. pextq %r14, %rax, %rax
  37337. movq %rcx, 16(%rdi)
  37338. sbbq %rax, %rdx
  37339. movq 32(%rsi), %rcx
  37340. movq 32(%r8), %rax
  37341. pextq %r14, %rcx, %rcx
  37342. movq %rdx, 24(%rdi)
  37343. sbbq %rcx, %rax
  37344. movq 40(%rsi), %rdx
  37345. movq 40(%r8), %rcx
  37346. pextq %r14, %rdx, %rdx
  37347. movq %rax, 32(%rdi)
  37348. sbbq %rdx, %rcx
  37349. movq 48(%rsi), %rax
  37350. movq 48(%r8), %rdx
  37351. pextq %r14, %rax, %rax
  37352. movq %rcx, 40(%rdi)
  37353. sbbq %rax, %rdx
  37354. movq 56(%rsi), %rcx
  37355. movq 56(%r8), %rax
  37356. pextq %r14, %rcx, %rcx
  37357. movq %rdx, 48(%rdi)
  37358. sbbq %rcx, %rax
  37359. movq 64(%rsi), %rdx
  37360. movq 64(%r8), %rcx
  37361. pextq %r14, %rdx, %rdx
  37362. movq %rax, 56(%rdi)
  37363. sbbq %rdx, %rcx
  37364. movq 72(%rsi), %rax
  37365. movq 72(%r8), %rdx
  37366. pextq %r14, %rax, %rax
  37367. movq %rcx, 64(%rdi)
  37368. sbbq %rax, %rdx
  37369. movq 80(%rsi), %rcx
  37370. movq 80(%r8), %rax
  37371. pextq %r14, %rcx, %rcx
  37372. movq %rdx, 72(%rdi)
  37373. sbbq %rcx, %rax
  37374. movq 88(%rsi), %rdx
  37375. movq 88(%r8), %rcx
  37376. pextq %r14, %rdx, %rdx
  37377. movq %rax, 80(%rdi)
  37378. sbbq %rdx, %rcx
  37379. movq 96(%rsi), %rax
  37380. movq 96(%r8), %rdx
  37381. pextq %r14, %rax, %rax
  37382. movq %rcx, 88(%rdi)
  37383. sbbq %rax, %rdx
  37384. movq 104(%rsi), %rcx
  37385. movq 104(%r8), %rax
  37386. pextq %r14, %rcx, %rcx
  37387. movq %rdx, 96(%rdi)
  37388. sbbq %rcx, %rax
  37389. movq 112(%rsi), %rdx
  37390. movq 112(%r8), %rcx
  37391. pextq %r14, %rdx, %rdx
  37392. movq %rax, 104(%rdi)
  37393. sbbq %rdx, %rcx
  37394. movq 120(%rsi), %rax
  37395. movq 120(%r8), %rdx
  37396. pextq %r14, %rax, %rax
  37397. movq %rcx, 112(%rdi)
  37398. sbbq %rax, %rdx
  37399. movq 128(%rsi), %rcx
  37400. movq 128(%r8), %rax
  37401. pextq %r14, %rcx, %rcx
  37402. movq %rdx, 120(%rdi)
  37403. sbbq %rcx, %rax
  37404. movq 136(%rsi), %rdx
  37405. movq 136(%r8), %rcx
  37406. pextq %r14, %rdx, %rdx
  37407. movq %rax, 128(%rdi)
  37408. sbbq %rdx, %rcx
  37409. movq 144(%rsi), %rax
  37410. movq 144(%r8), %rdx
  37411. pextq %r14, %rax, %rax
  37412. movq %rcx, 136(%rdi)
  37413. sbbq %rax, %rdx
  37414. movq 152(%rsi), %rcx
  37415. movq 152(%r8), %rax
  37416. pextq %r14, %rcx, %rcx
  37417. movq %rdx, 144(%rdi)
  37418. sbbq %rcx, %rax
  37419. movq 160(%rsi), %rdx
  37420. movq 160(%r8), %rcx
  37421. pextq %r14, %rdx, %rdx
  37422. movq %rax, 152(%rdi)
  37423. sbbq %rdx, %rcx
  37424. movq 168(%rsi), %rax
  37425. movq 168(%r8), %rdx
  37426. pextq %r14, %rax, %rax
  37427. movq %rcx, 160(%rdi)
  37428. sbbq %rax, %rdx
  37429. movq 176(%rsi), %rcx
  37430. movq 176(%r8), %rax
  37431. pextq %r14, %rcx, %rcx
  37432. movq %rdx, 168(%rdi)
  37433. sbbq %rcx, %rax
  37434. movq 184(%rsi), %rdx
  37435. movq 184(%r8), %rcx
  37436. pextq %r14, %rdx, %rdx
  37437. movq %rax, 176(%rdi)
  37438. sbbq %rdx, %rcx
  37439. movq 192(%rsi), %rax
  37440. movq 192(%r8), %rdx
  37441. pextq %r14, %rax, %rax
  37442. movq %rcx, 184(%rdi)
  37443. sbbq %rax, %rdx
  37444. movq 200(%rsi), %rcx
  37445. movq 200(%r8), %rax
  37446. pextq %r14, %rcx, %rcx
  37447. movq %rdx, 192(%rdi)
  37448. sbbq %rcx, %rax
  37449. movq 208(%rsi), %rdx
  37450. movq 208(%r8), %rcx
  37451. pextq %r14, %rdx, %rdx
  37452. movq %rax, 200(%rdi)
  37453. sbbq %rdx, %rcx
  37454. movq 216(%rsi), %rax
  37455. movq 216(%r8), %rdx
  37456. pextq %r14, %rax, %rax
  37457. movq %rcx, 208(%rdi)
  37458. sbbq %rax, %rdx
  37459. movq 224(%rsi), %rcx
  37460. movq 224(%r8), %rax
  37461. pextq %r14, %rcx, %rcx
  37462. movq %rdx, 216(%rdi)
  37463. sbbq %rcx, %rax
  37464. movq 232(%rsi), %rdx
  37465. movq 232(%r8), %rcx
  37466. pextq %r14, %rdx, %rdx
  37467. movq %rax, 224(%rdi)
  37468. sbbq %rdx, %rcx
  37469. movq 240(%rsi), %rax
  37470. movq 240(%r8), %rdx
  37471. pextq %r14, %rax, %rax
  37472. movq %rcx, 232(%rdi)
  37473. sbbq %rax, %rdx
  37474. movq 248(%rsi), %rcx
  37475. movq 248(%r8), %rax
  37476. pextq %r14, %rcx, %rcx
  37477. movq %rdx, 240(%rdi)
  37478. sbbq %rcx, %rax
  37479. movq 256(%rsi), %rdx
  37480. movq 256(%r8), %rcx
  37481. pextq %r14, %rdx, %rdx
  37482. movq %rax, 248(%rdi)
  37483. sbbq %rdx, %rcx
  37484. movq 264(%rsi), %rax
  37485. movq 264(%r8), %rdx
  37486. pextq %r14, %rax, %rax
  37487. movq %rcx, 256(%rdi)
  37488. sbbq %rax, %rdx
  37489. movq 272(%rsi), %rcx
  37490. movq 272(%r8), %rax
  37491. pextq %r14, %rcx, %rcx
  37492. movq %rdx, 264(%rdi)
  37493. sbbq %rcx, %rax
  37494. movq 280(%rsi), %rdx
  37495. movq 280(%r8), %rcx
  37496. pextq %r14, %rdx, %rdx
  37497. movq %rax, 272(%rdi)
  37498. sbbq %rdx, %rcx
  37499. movq 288(%rsi), %rax
  37500. movq 288(%r8), %rdx
  37501. pextq %r14, %rax, %rax
  37502. movq %rcx, 280(%rdi)
  37503. sbbq %rax, %rdx
  37504. movq 296(%rsi), %rcx
  37505. movq 296(%r8), %rax
  37506. pextq %r14, %rcx, %rcx
  37507. movq %rdx, 288(%rdi)
  37508. sbbq %rcx, %rax
  37509. movq 304(%rsi), %rdx
  37510. movq 304(%r8), %rcx
  37511. pextq %r14, %rdx, %rdx
  37512. movq %rax, 296(%rdi)
  37513. sbbq %rdx, %rcx
  37514. movq 312(%rsi), %rax
  37515. movq 312(%r8), %rdx
  37516. pextq %r14, %rax, %rax
  37517. movq %rcx, 304(%rdi)
  37518. sbbq %rax, %rdx
  37519. movq 320(%rsi), %rcx
  37520. movq 320(%r8), %rax
  37521. pextq %r14, %rcx, %rcx
  37522. movq %rdx, 312(%rdi)
  37523. sbbq %rcx, %rax
  37524. movq 328(%rsi), %rdx
  37525. movq 328(%r8), %rcx
  37526. pextq %r14, %rdx, %rdx
  37527. movq %rax, 320(%rdi)
  37528. sbbq %rdx, %rcx
  37529. movq 336(%rsi), %rax
  37530. movq 336(%r8), %rdx
  37531. pextq %r14, %rax, %rax
  37532. movq %rcx, 328(%rdi)
  37533. sbbq %rax, %rdx
  37534. movq 344(%rsi), %rcx
  37535. movq 344(%r8), %rax
  37536. pextq %r14, %rcx, %rcx
  37537. movq %rdx, 336(%rdi)
  37538. sbbq %rcx, %rax
  37539. movq 352(%rsi), %rdx
  37540. movq 352(%r8), %rcx
  37541. pextq %r14, %rdx, %rdx
  37542. movq %rax, 344(%rdi)
  37543. sbbq %rdx, %rcx
  37544. movq 360(%rsi), %rax
  37545. movq 360(%r8), %rdx
  37546. pextq %r14, %rax, %rax
  37547. movq %rcx, 352(%rdi)
  37548. sbbq %rax, %rdx
  37549. movq 368(%rsi), %rcx
  37550. movq 368(%r8), %rax
  37551. pextq %r14, %rcx, %rcx
  37552. movq %rdx, 360(%rdi)
  37553. sbbq %rcx, %rax
  37554. movq 376(%rsi), %rdx
  37555. movq 376(%r8), %rcx
  37556. pextq %r14, %rdx, %rdx
  37557. movq %rax, 368(%rdi)
  37558. sbbq %rdx, %rcx
  37559. movq 384(%rsi), %rax
  37560. movq 384(%r8), %rdx
  37561. pextq %r14, %rax, %rax
  37562. movq %rcx, 376(%rdi)
  37563. sbbq %rax, %rdx
  37564. movq 392(%rsi), %rcx
  37565. movq 392(%r8), %rax
  37566. pextq %r14, %rcx, %rcx
  37567. movq %rdx, 384(%rdi)
  37568. sbbq %rcx, %rax
  37569. movq 400(%rsi), %rdx
  37570. movq 400(%r8), %rcx
  37571. pextq %r14, %rdx, %rdx
  37572. movq %rax, 392(%rdi)
  37573. sbbq %rdx, %rcx
  37574. movq 408(%rsi), %rax
  37575. movq 408(%r8), %rdx
  37576. pextq %r14, %rax, %rax
  37577. movq %rcx, 400(%rdi)
  37578. sbbq %rax, %rdx
  37579. movq 416(%rsi), %rcx
  37580. movq 416(%r8), %rax
  37581. pextq %r14, %rcx, %rcx
  37582. movq %rdx, 408(%rdi)
  37583. sbbq %rcx, %rax
  37584. movq 424(%rsi), %rdx
  37585. movq 424(%r8), %rcx
  37586. pextq %r14, %rdx, %rdx
  37587. movq %rax, 416(%rdi)
  37588. sbbq %rdx, %rcx
  37589. movq 432(%rsi), %rax
  37590. movq 432(%r8), %rdx
  37591. pextq %r14, %rax, %rax
  37592. movq %rcx, 424(%rdi)
  37593. sbbq %rax, %rdx
  37594. movq 440(%rsi), %rcx
  37595. movq 440(%r8), %rax
  37596. pextq %r14, %rcx, %rcx
  37597. movq %rdx, 432(%rdi)
  37598. sbbq %rcx, %rax
  37599. movq 448(%rsi), %rdx
  37600. movq 448(%r8), %rcx
  37601. pextq %r14, %rdx, %rdx
  37602. movq %rax, 440(%rdi)
  37603. sbbq %rdx, %rcx
  37604. movq 456(%rsi), %rax
  37605. movq 456(%r8), %rdx
  37606. pextq %r14, %rax, %rax
  37607. movq %rcx, 448(%rdi)
  37608. sbbq %rax, %rdx
  37609. movq 464(%rsi), %rcx
  37610. movq 464(%r8), %rax
  37611. pextq %r14, %rcx, %rcx
  37612. movq %rdx, 456(%rdi)
  37613. sbbq %rcx, %rax
  37614. movq 472(%rsi), %rdx
  37615. movq 472(%r8), %rcx
  37616. pextq %r14, %rdx, %rdx
  37617. movq %rax, 464(%rdi)
  37618. sbbq %rdx, %rcx
  37619. movq 480(%rsi), %rax
  37620. movq 480(%r8), %rdx
  37621. pextq %r14, %rax, %rax
  37622. movq %rcx, 472(%rdi)
  37623. sbbq %rax, %rdx
  37624. movq 488(%rsi), %rcx
  37625. movq 488(%r8), %rax
  37626. pextq %r14, %rcx, %rcx
  37627. movq %rdx, 480(%rdi)
  37628. sbbq %rcx, %rax
  37629. movq 496(%rsi), %rdx
  37630. movq 496(%r8), %rcx
  37631. pextq %r14, %rdx, %rdx
  37632. movq %rax, 488(%rdi)
  37633. sbbq %rdx, %rcx
  37634. movq 504(%rsi), %rax
  37635. movq 504(%r8), %rdx
  37636. pextq %r14, %rax, %rax
  37637. movq %rcx, 496(%rdi)
  37638. sbbq %rax, %rdx
  37639. movq %rdx, 504(%rdi)
  37640. popq %r14
  37641. popq %r13
  37642. popq %r12
  37643. repz retq
  37644. #ifndef __APPLE__
  37645. .size sp_4096_mont_reduce_avx2_64,.-sp_4096_mont_reduce_avx2_64
  37646. #endif /* __APPLE__ */
  37647. #endif /* HAVE_INTEL_AVX2 */
  37648. /* Conditionally add a and b using the mask m.
  37649. * m is -1 to add and 0 when not.
  37650. *
  37651. * r A single precision number representing conditional add result.
  37652. * a A single precision number to add with.
  37653. * b A single precision number to add.
  37654. * m Mask value to apply.
  37655. */
  37656. #ifndef __APPLE__
  37657. .text
  37658. .globl sp_4096_cond_add_32
  37659. .type sp_4096_cond_add_32,@function
  37660. .align 16
  37661. sp_4096_cond_add_32:
  37662. #else
  37663. .section __TEXT,__text
  37664. .globl _sp_4096_cond_add_32
  37665. .p2align 4
  37666. _sp_4096_cond_add_32:
  37667. #endif /* __APPLE__ */
  37668. subq $0x100, %rsp
  37669. movq $0x00, %rax
  37670. movq (%rdx), %r8
  37671. movq 8(%rdx), %r9
  37672. andq %rcx, %r8
  37673. andq %rcx, %r9
  37674. movq %r8, (%rsp)
  37675. movq %r9, 8(%rsp)
  37676. movq 16(%rdx), %r8
  37677. movq 24(%rdx), %r9
  37678. andq %rcx, %r8
  37679. andq %rcx, %r9
  37680. movq %r8, 16(%rsp)
  37681. movq %r9, 24(%rsp)
  37682. movq 32(%rdx), %r8
  37683. movq 40(%rdx), %r9
  37684. andq %rcx, %r8
  37685. andq %rcx, %r9
  37686. movq %r8, 32(%rsp)
  37687. movq %r9, 40(%rsp)
  37688. movq 48(%rdx), %r8
  37689. movq 56(%rdx), %r9
  37690. andq %rcx, %r8
  37691. andq %rcx, %r9
  37692. movq %r8, 48(%rsp)
  37693. movq %r9, 56(%rsp)
  37694. movq 64(%rdx), %r8
  37695. movq 72(%rdx), %r9
  37696. andq %rcx, %r8
  37697. andq %rcx, %r9
  37698. movq %r8, 64(%rsp)
  37699. movq %r9, 72(%rsp)
  37700. movq 80(%rdx), %r8
  37701. movq 88(%rdx), %r9
  37702. andq %rcx, %r8
  37703. andq %rcx, %r9
  37704. movq %r8, 80(%rsp)
  37705. movq %r9, 88(%rsp)
  37706. movq 96(%rdx), %r8
  37707. movq 104(%rdx), %r9
  37708. andq %rcx, %r8
  37709. andq %rcx, %r9
  37710. movq %r8, 96(%rsp)
  37711. movq %r9, 104(%rsp)
  37712. movq 112(%rdx), %r8
  37713. movq 120(%rdx), %r9
  37714. andq %rcx, %r8
  37715. andq %rcx, %r9
  37716. movq %r8, 112(%rsp)
  37717. movq %r9, 120(%rsp)
  37718. movq 128(%rdx), %r8
  37719. movq 136(%rdx), %r9
  37720. andq %rcx, %r8
  37721. andq %rcx, %r9
  37722. movq %r8, 128(%rsp)
  37723. movq %r9, 136(%rsp)
  37724. movq 144(%rdx), %r8
  37725. movq 152(%rdx), %r9
  37726. andq %rcx, %r8
  37727. andq %rcx, %r9
  37728. movq %r8, 144(%rsp)
  37729. movq %r9, 152(%rsp)
  37730. movq 160(%rdx), %r8
  37731. movq 168(%rdx), %r9
  37732. andq %rcx, %r8
  37733. andq %rcx, %r9
  37734. movq %r8, 160(%rsp)
  37735. movq %r9, 168(%rsp)
  37736. movq 176(%rdx), %r8
  37737. movq 184(%rdx), %r9
  37738. andq %rcx, %r8
  37739. andq %rcx, %r9
  37740. movq %r8, 176(%rsp)
  37741. movq %r9, 184(%rsp)
  37742. movq 192(%rdx), %r8
  37743. movq 200(%rdx), %r9
  37744. andq %rcx, %r8
  37745. andq %rcx, %r9
  37746. movq %r8, 192(%rsp)
  37747. movq %r9, 200(%rsp)
  37748. movq 208(%rdx), %r8
  37749. movq 216(%rdx), %r9
  37750. andq %rcx, %r8
  37751. andq %rcx, %r9
  37752. movq %r8, 208(%rsp)
  37753. movq %r9, 216(%rsp)
  37754. movq 224(%rdx), %r8
  37755. movq 232(%rdx), %r9
  37756. andq %rcx, %r8
  37757. andq %rcx, %r9
  37758. movq %r8, 224(%rsp)
  37759. movq %r9, 232(%rsp)
  37760. movq 240(%rdx), %r8
  37761. movq 248(%rdx), %r9
  37762. andq %rcx, %r8
  37763. andq %rcx, %r9
  37764. movq %r8, 240(%rsp)
  37765. movq %r9, 248(%rsp)
  37766. movq (%rsi), %r8
  37767. movq (%rsp), %rdx
  37768. addq %rdx, %r8
  37769. movq 8(%rsi), %r9
  37770. movq 8(%rsp), %rdx
  37771. adcq %rdx, %r9
  37772. movq %r8, (%rdi)
  37773. movq 16(%rsi), %r8
  37774. movq 16(%rsp), %rdx
  37775. adcq %rdx, %r8
  37776. movq %r9, 8(%rdi)
  37777. movq 24(%rsi), %r9
  37778. movq 24(%rsp), %rdx
  37779. adcq %rdx, %r9
  37780. movq %r8, 16(%rdi)
  37781. movq 32(%rsi), %r8
  37782. movq 32(%rsp), %rdx
  37783. adcq %rdx, %r8
  37784. movq %r9, 24(%rdi)
  37785. movq 40(%rsi), %r9
  37786. movq 40(%rsp), %rdx
  37787. adcq %rdx, %r9
  37788. movq %r8, 32(%rdi)
  37789. movq 48(%rsi), %r8
  37790. movq 48(%rsp), %rdx
  37791. adcq %rdx, %r8
  37792. movq %r9, 40(%rdi)
  37793. movq 56(%rsi), %r9
  37794. movq 56(%rsp), %rdx
  37795. adcq %rdx, %r9
  37796. movq %r8, 48(%rdi)
  37797. movq 64(%rsi), %r8
  37798. movq 64(%rsp), %rdx
  37799. adcq %rdx, %r8
  37800. movq %r9, 56(%rdi)
  37801. movq 72(%rsi), %r9
  37802. movq 72(%rsp), %rdx
  37803. adcq %rdx, %r9
  37804. movq %r8, 64(%rdi)
  37805. movq 80(%rsi), %r8
  37806. movq 80(%rsp), %rdx
  37807. adcq %rdx, %r8
  37808. movq %r9, 72(%rdi)
  37809. movq 88(%rsi), %r9
  37810. movq 88(%rsp), %rdx
  37811. adcq %rdx, %r9
  37812. movq %r8, 80(%rdi)
  37813. movq 96(%rsi), %r8
  37814. movq 96(%rsp), %rdx
  37815. adcq %rdx, %r8
  37816. movq %r9, 88(%rdi)
  37817. movq 104(%rsi), %r9
  37818. movq 104(%rsp), %rdx
  37819. adcq %rdx, %r9
  37820. movq %r8, 96(%rdi)
  37821. movq 112(%rsi), %r8
  37822. movq 112(%rsp), %rdx
  37823. adcq %rdx, %r8
  37824. movq %r9, 104(%rdi)
  37825. movq 120(%rsi), %r9
  37826. movq 120(%rsp), %rdx
  37827. adcq %rdx, %r9
  37828. movq %r8, 112(%rdi)
  37829. movq 128(%rsi), %r8
  37830. movq 128(%rsp), %rdx
  37831. adcq %rdx, %r8
  37832. movq %r9, 120(%rdi)
  37833. movq 136(%rsi), %r9
  37834. movq 136(%rsp), %rdx
  37835. adcq %rdx, %r9
  37836. movq %r8, 128(%rdi)
  37837. movq 144(%rsi), %r8
  37838. movq 144(%rsp), %rdx
  37839. adcq %rdx, %r8
  37840. movq %r9, 136(%rdi)
  37841. movq 152(%rsi), %r9
  37842. movq 152(%rsp), %rdx
  37843. adcq %rdx, %r9
  37844. movq %r8, 144(%rdi)
  37845. movq 160(%rsi), %r8
  37846. movq 160(%rsp), %rdx
  37847. adcq %rdx, %r8
  37848. movq %r9, 152(%rdi)
  37849. movq 168(%rsi), %r9
  37850. movq 168(%rsp), %rdx
  37851. adcq %rdx, %r9
  37852. movq %r8, 160(%rdi)
  37853. movq 176(%rsi), %r8
  37854. movq 176(%rsp), %rdx
  37855. adcq %rdx, %r8
  37856. movq %r9, 168(%rdi)
  37857. movq 184(%rsi), %r9
  37858. movq 184(%rsp), %rdx
  37859. adcq %rdx, %r9
  37860. movq %r8, 176(%rdi)
  37861. movq 192(%rsi), %r8
  37862. movq 192(%rsp), %rdx
  37863. adcq %rdx, %r8
  37864. movq %r9, 184(%rdi)
  37865. movq 200(%rsi), %r9
  37866. movq 200(%rsp), %rdx
  37867. adcq %rdx, %r9
  37868. movq %r8, 192(%rdi)
  37869. movq 208(%rsi), %r8
  37870. movq 208(%rsp), %rdx
  37871. adcq %rdx, %r8
  37872. movq %r9, 200(%rdi)
  37873. movq 216(%rsi), %r9
  37874. movq 216(%rsp), %rdx
  37875. adcq %rdx, %r9
  37876. movq %r8, 208(%rdi)
  37877. movq 224(%rsi), %r8
  37878. movq 224(%rsp), %rdx
  37879. adcq %rdx, %r8
  37880. movq %r9, 216(%rdi)
  37881. movq 232(%rsi), %r9
  37882. movq 232(%rsp), %rdx
  37883. adcq %rdx, %r9
  37884. movq %r8, 224(%rdi)
  37885. movq 240(%rsi), %r8
  37886. movq 240(%rsp), %rdx
  37887. adcq %rdx, %r8
  37888. movq %r9, 232(%rdi)
  37889. movq 248(%rsi), %r9
  37890. movq 248(%rsp), %rdx
  37891. adcq %rdx, %r9
  37892. movq %r8, 240(%rdi)
  37893. movq %r9, 248(%rdi)
  37894. adcq $0x00, %rax
  37895. addq $0x100, %rsp
  37896. repz retq
  37897. #ifndef __APPLE__
  37898. .size sp_4096_cond_add_32,.-sp_4096_cond_add_32
  37899. #endif /* __APPLE__ */
  37900. /* Conditionally add a and b using the mask m.
  37901. * m is -1 to add and 0 when not.
  37902. *
  37903. * r A single precision number representing conditional add result.
  37904. * a A single precision number to add with.
  37905. * b A single precision number to add.
  37906. * m Mask value to apply.
  37907. */
  37908. #ifndef __APPLE__
  37909. .text
  37910. .globl sp_4096_cond_add_avx2_32
  37911. .type sp_4096_cond_add_avx2_32,@function
  37912. .align 16
  37913. sp_4096_cond_add_avx2_32:
  37914. #else
  37915. .section __TEXT,__text
  37916. .globl _sp_4096_cond_add_avx2_32
  37917. .p2align 4
  37918. _sp_4096_cond_add_avx2_32:
  37919. #endif /* __APPLE__ */
  37920. movq $0x00, %rax
  37921. movq (%rdx), %r10
  37922. movq (%rsi), %r8
  37923. pextq %rcx, %r10, %r10
  37924. addq %r10, %r8
  37925. movq 8(%rdx), %r10
  37926. movq 8(%rsi), %r9
  37927. pextq %rcx, %r10, %r10
  37928. movq %r8, (%rdi)
  37929. adcq %r10, %r9
  37930. movq 16(%rdx), %r8
  37931. movq 16(%rsi), %r10
  37932. pextq %rcx, %r8, %r8
  37933. movq %r9, 8(%rdi)
  37934. adcq %r8, %r10
  37935. movq 24(%rdx), %r9
  37936. movq 24(%rsi), %r8
  37937. pextq %rcx, %r9, %r9
  37938. movq %r10, 16(%rdi)
  37939. adcq %r9, %r8
  37940. movq 32(%rdx), %r10
  37941. movq 32(%rsi), %r9
  37942. pextq %rcx, %r10, %r10
  37943. movq %r8, 24(%rdi)
  37944. adcq %r10, %r9
  37945. movq 40(%rdx), %r8
  37946. movq 40(%rsi), %r10
  37947. pextq %rcx, %r8, %r8
  37948. movq %r9, 32(%rdi)
  37949. adcq %r8, %r10
  37950. movq 48(%rdx), %r9
  37951. movq 48(%rsi), %r8
  37952. pextq %rcx, %r9, %r9
  37953. movq %r10, 40(%rdi)
  37954. adcq %r9, %r8
  37955. movq 56(%rdx), %r10
  37956. movq 56(%rsi), %r9
  37957. pextq %rcx, %r10, %r10
  37958. movq %r8, 48(%rdi)
  37959. adcq %r10, %r9
  37960. movq 64(%rdx), %r8
  37961. movq 64(%rsi), %r10
  37962. pextq %rcx, %r8, %r8
  37963. movq %r9, 56(%rdi)
  37964. adcq %r8, %r10
  37965. movq 72(%rdx), %r9
  37966. movq 72(%rsi), %r8
  37967. pextq %rcx, %r9, %r9
  37968. movq %r10, 64(%rdi)
  37969. adcq %r9, %r8
  37970. movq 80(%rdx), %r10
  37971. movq 80(%rsi), %r9
  37972. pextq %rcx, %r10, %r10
  37973. movq %r8, 72(%rdi)
  37974. adcq %r10, %r9
  37975. movq 88(%rdx), %r8
  37976. movq 88(%rsi), %r10
  37977. pextq %rcx, %r8, %r8
  37978. movq %r9, 80(%rdi)
  37979. adcq %r8, %r10
  37980. movq 96(%rdx), %r9
  37981. movq 96(%rsi), %r8
  37982. pextq %rcx, %r9, %r9
  37983. movq %r10, 88(%rdi)
  37984. adcq %r9, %r8
  37985. movq 104(%rdx), %r10
  37986. movq 104(%rsi), %r9
  37987. pextq %rcx, %r10, %r10
  37988. movq %r8, 96(%rdi)
  37989. adcq %r10, %r9
  37990. movq 112(%rdx), %r8
  37991. movq 112(%rsi), %r10
  37992. pextq %rcx, %r8, %r8
  37993. movq %r9, 104(%rdi)
  37994. adcq %r8, %r10
  37995. movq 120(%rdx), %r9
  37996. movq 120(%rsi), %r8
  37997. pextq %rcx, %r9, %r9
  37998. movq %r10, 112(%rdi)
  37999. adcq %r9, %r8
  38000. movq 128(%rdx), %r10
  38001. movq 128(%rsi), %r9
  38002. pextq %rcx, %r10, %r10
  38003. movq %r8, 120(%rdi)
  38004. adcq %r10, %r9
  38005. movq 136(%rdx), %r8
  38006. movq 136(%rsi), %r10
  38007. pextq %rcx, %r8, %r8
  38008. movq %r9, 128(%rdi)
  38009. adcq %r8, %r10
  38010. movq 144(%rdx), %r9
  38011. movq 144(%rsi), %r8
  38012. pextq %rcx, %r9, %r9
  38013. movq %r10, 136(%rdi)
  38014. adcq %r9, %r8
  38015. movq 152(%rdx), %r10
  38016. movq 152(%rsi), %r9
  38017. pextq %rcx, %r10, %r10
  38018. movq %r8, 144(%rdi)
  38019. adcq %r10, %r9
  38020. movq 160(%rdx), %r8
  38021. movq 160(%rsi), %r10
  38022. pextq %rcx, %r8, %r8
  38023. movq %r9, 152(%rdi)
  38024. adcq %r8, %r10
  38025. movq 168(%rdx), %r9
  38026. movq 168(%rsi), %r8
  38027. pextq %rcx, %r9, %r9
  38028. movq %r10, 160(%rdi)
  38029. adcq %r9, %r8
  38030. movq 176(%rdx), %r10
  38031. movq 176(%rsi), %r9
  38032. pextq %rcx, %r10, %r10
  38033. movq %r8, 168(%rdi)
  38034. adcq %r10, %r9
  38035. movq 184(%rdx), %r8
  38036. movq 184(%rsi), %r10
  38037. pextq %rcx, %r8, %r8
  38038. movq %r9, 176(%rdi)
  38039. adcq %r8, %r10
  38040. movq 192(%rdx), %r9
  38041. movq 192(%rsi), %r8
  38042. pextq %rcx, %r9, %r9
  38043. movq %r10, 184(%rdi)
  38044. adcq %r9, %r8
  38045. movq 200(%rdx), %r10
  38046. movq 200(%rsi), %r9
  38047. pextq %rcx, %r10, %r10
  38048. movq %r8, 192(%rdi)
  38049. adcq %r10, %r9
  38050. movq 208(%rdx), %r8
  38051. movq 208(%rsi), %r10
  38052. pextq %rcx, %r8, %r8
  38053. movq %r9, 200(%rdi)
  38054. adcq %r8, %r10
  38055. movq 216(%rdx), %r9
  38056. movq 216(%rsi), %r8
  38057. pextq %rcx, %r9, %r9
  38058. movq %r10, 208(%rdi)
  38059. adcq %r9, %r8
  38060. movq 224(%rdx), %r10
  38061. movq 224(%rsi), %r9
  38062. pextq %rcx, %r10, %r10
  38063. movq %r8, 216(%rdi)
  38064. adcq %r10, %r9
  38065. movq 232(%rdx), %r8
  38066. movq 232(%rsi), %r10
  38067. pextq %rcx, %r8, %r8
  38068. movq %r9, 224(%rdi)
  38069. adcq %r8, %r10
  38070. movq 240(%rdx), %r9
  38071. movq 240(%rsi), %r8
  38072. pextq %rcx, %r9, %r9
  38073. movq %r10, 232(%rdi)
  38074. adcq %r9, %r8
  38075. movq 248(%rdx), %r10
  38076. movq 248(%rsi), %r9
  38077. pextq %rcx, %r10, %r10
  38078. movq %r8, 240(%rdi)
  38079. adcq %r10, %r9
  38080. movq %r9, 248(%rdi)
  38081. adcq $0x00, %rax
  38082. repz retq
  38083. #ifndef __APPLE__
  38084. .size sp_4096_cond_add_avx2_32,.-sp_4096_cond_add_avx2_32
  38085. #endif /* __APPLE__ */
  38086. /* Shift number left by n bit. (r = a << n)
  38087. *
  38088. * r Result of left shift by n.
  38089. * a Number to shift.
  38090. * n Amoutnt o shift.
  38091. */
  38092. #ifndef __APPLE__
  38093. .text
  38094. .globl sp_4096_lshift_64
  38095. .type sp_4096_lshift_64,@function
  38096. .align 16
  38097. sp_4096_lshift_64:
  38098. #else
  38099. .section __TEXT,__text
  38100. .globl _sp_4096_lshift_64
  38101. .p2align 4
  38102. _sp_4096_lshift_64:
  38103. #endif /* __APPLE__ */
  38104. movb %dl, %cl
  38105. movq $0x00, %r10
  38106. movq 472(%rsi), %r11
  38107. movq 480(%rsi), %rdx
  38108. movq 488(%rsi), %rax
  38109. movq 496(%rsi), %r8
  38110. movq 504(%rsi), %r9
  38111. shldq %cl, %r9, %r10
  38112. shldq %cl, %r8, %r9
  38113. shldq %cl, %rax, %r8
  38114. shldq %cl, %rdx, %rax
  38115. shldq %cl, %r11, %rdx
  38116. movq %rdx, 480(%rdi)
  38117. movq %rax, 488(%rdi)
  38118. movq %r8, 496(%rdi)
  38119. movq %r9, 504(%rdi)
  38120. movq %r10, 512(%rdi)
  38121. movq 440(%rsi), %r9
  38122. movq 448(%rsi), %rdx
  38123. movq 456(%rsi), %rax
  38124. movq 464(%rsi), %r8
  38125. shldq %cl, %r8, %r11
  38126. shldq %cl, %rax, %r8
  38127. shldq %cl, %rdx, %rax
  38128. shldq %cl, %r9, %rdx
  38129. movq %rdx, 448(%rdi)
  38130. movq %rax, 456(%rdi)
  38131. movq %r8, 464(%rdi)
  38132. movq %r11, 472(%rdi)
  38133. movq 408(%rsi), %r11
  38134. movq 416(%rsi), %rdx
  38135. movq 424(%rsi), %rax
  38136. movq 432(%rsi), %r8
  38137. shldq %cl, %r8, %r9
  38138. shldq %cl, %rax, %r8
  38139. shldq %cl, %rdx, %rax
  38140. shldq %cl, %r11, %rdx
  38141. movq %rdx, 416(%rdi)
  38142. movq %rax, 424(%rdi)
  38143. movq %r8, 432(%rdi)
  38144. movq %r9, 440(%rdi)
  38145. movq 376(%rsi), %r9
  38146. movq 384(%rsi), %rdx
  38147. movq 392(%rsi), %rax
  38148. movq 400(%rsi), %r8
  38149. shldq %cl, %r8, %r11
  38150. shldq %cl, %rax, %r8
  38151. shldq %cl, %rdx, %rax
  38152. shldq %cl, %r9, %rdx
  38153. movq %rdx, 384(%rdi)
  38154. movq %rax, 392(%rdi)
  38155. movq %r8, 400(%rdi)
  38156. movq %r11, 408(%rdi)
  38157. movq 344(%rsi), %r11
  38158. movq 352(%rsi), %rdx
  38159. movq 360(%rsi), %rax
  38160. movq 368(%rsi), %r8
  38161. shldq %cl, %r8, %r9
  38162. shldq %cl, %rax, %r8
  38163. shldq %cl, %rdx, %rax
  38164. shldq %cl, %r11, %rdx
  38165. movq %rdx, 352(%rdi)
  38166. movq %rax, 360(%rdi)
  38167. movq %r8, 368(%rdi)
  38168. movq %r9, 376(%rdi)
  38169. movq 312(%rsi), %r9
  38170. movq 320(%rsi), %rdx
  38171. movq 328(%rsi), %rax
  38172. movq 336(%rsi), %r8
  38173. shldq %cl, %r8, %r11
  38174. shldq %cl, %rax, %r8
  38175. shldq %cl, %rdx, %rax
  38176. shldq %cl, %r9, %rdx
  38177. movq %rdx, 320(%rdi)
  38178. movq %rax, 328(%rdi)
  38179. movq %r8, 336(%rdi)
  38180. movq %r11, 344(%rdi)
  38181. movq 280(%rsi), %r11
  38182. movq 288(%rsi), %rdx
  38183. movq 296(%rsi), %rax
  38184. movq 304(%rsi), %r8
  38185. shldq %cl, %r8, %r9
  38186. shldq %cl, %rax, %r8
  38187. shldq %cl, %rdx, %rax
  38188. shldq %cl, %r11, %rdx
  38189. movq %rdx, 288(%rdi)
  38190. movq %rax, 296(%rdi)
  38191. movq %r8, 304(%rdi)
  38192. movq %r9, 312(%rdi)
  38193. movq 248(%rsi), %r9
  38194. movq 256(%rsi), %rdx
  38195. movq 264(%rsi), %rax
  38196. movq 272(%rsi), %r8
  38197. shldq %cl, %r8, %r11
  38198. shldq %cl, %rax, %r8
  38199. shldq %cl, %rdx, %rax
  38200. shldq %cl, %r9, %rdx
  38201. movq %rdx, 256(%rdi)
  38202. movq %rax, 264(%rdi)
  38203. movq %r8, 272(%rdi)
  38204. movq %r11, 280(%rdi)
  38205. movq 216(%rsi), %r11
  38206. movq 224(%rsi), %rdx
  38207. movq 232(%rsi), %rax
  38208. movq 240(%rsi), %r8
  38209. shldq %cl, %r8, %r9
  38210. shldq %cl, %rax, %r8
  38211. shldq %cl, %rdx, %rax
  38212. shldq %cl, %r11, %rdx
  38213. movq %rdx, 224(%rdi)
  38214. movq %rax, 232(%rdi)
  38215. movq %r8, 240(%rdi)
  38216. movq %r9, 248(%rdi)
  38217. movq 184(%rsi), %r9
  38218. movq 192(%rsi), %rdx
  38219. movq 200(%rsi), %rax
  38220. movq 208(%rsi), %r8
  38221. shldq %cl, %r8, %r11
  38222. shldq %cl, %rax, %r8
  38223. shldq %cl, %rdx, %rax
  38224. shldq %cl, %r9, %rdx
  38225. movq %rdx, 192(%rdi)
  38226. movq %rax, 200(%rdi)
  38227. movq %r8, 208(%rdi)
  38228. movq %r11, 216(%rdi)
  38229. movq 152(%rsi), %r11
  38230. movq 160(%rsi), %rdx
  38231. movq 168(%rsi), %rax
  38232. movq 176(%rsi), %r8
  38233. shldq %cl, %r8, %r9
  38234. shldq %cl, %rax, %r8
  38235. shldq %cl, %rdx, %rax
  38236. shldq %cl, %r11, %rdx
  38237. movq %rdx, 160(%rdi)
  38238. movq %rax, 168(%rdi)
  38239. movq %r8, 176(%rdi)
  38240. movq %r9, 184(%rdi)
  38241. movq 120(%rsi), %r9
  38242. movq 128(%rsi), %rdx
  38243. movq 136(%rsi), %rax
  38244. movq 144(%rsi), %r8
  38245. shldq %cl, %r8, %r11
  38246. shldq %cl, %rax, %r8
  38247. shldq %cl, %rdx, %rax
  38248. shldq %cl, %r9, %rdx
  38249. movq %rdx, 128(%rdi)
  38250. movq %rax, 136(%rdi)
  38251. movq %r8, 144(%rdi)
  38252. movq %r11, 152(%rdi)
  38253. movq 88(%rsi), %r11
  38254. movq 96(%rsi), %rdx
  38255. movq 104(%rsi), %rax
  38256. movq 112(%rsi), %r8
  38257. shldq %cl, %r8, %r9
  38258. shldq %cl, %rax, %r8
  38259. shldq %cl, %rdx, %rax
  38260. shldq %cl, %r11, %rdx
  38261. movq %rdx, 96(%rdi)
  38262. movq %rax, 104(%rdi)
  38263. movq %r8, 112(%rdi)
  38264. movq %r9, 120(%rdi)
  38265. movq 56(%rsi), %r9
  38266. movq 64(%rsi), %rdx
  38267. movq 72(%rsi), %rax
  38268. movq 80(%rsi), %r8
  38269. shldq %cl, %r8, %r11
  38270. shldq %cl, %rax, %r8
  38271. shldq %cl, %rdx, %rax
  38272. shldq %cl, %r9, %rdx
  38273. movq %rdx, 64(%rdi)
  38274. movq %rax, 72(%rdi)
  38275. movq %r8, 80(%rdi)
  38276. movq %r11, 88(%rdi)
  38277. movq 24(%rsi), %r11
  38278. movq 32(%rsi), %rdx
  38279. movq 40(%rsi), %rax
  38280. movq 48(%rsi), %r8
  38281. shldq %cl, %r8, %r9
  38282. shldq %cl, %rax, %r8
  38283. shldq %cl, %rdx, %rax
  38284. shldq %cl, %r11, %rdx
  38285. movq %rdx, 32(%rdi)
  38286. movq %rax, 40(%rdi)
  38287. movq %r8, 48(%rdi)
  38288. movq %r9, 56(%rdi)
  38289. movq (%rsi), %rdx
  38290. movq 8(%rsi), %rax
  38291. movq 16(%rsi), %r8
  38292. shldq %cl, %r8, %r11
  38293. shldq %cl, %rax, %r8
  38294. shldq %cl, %rdx, %rax
  38295. shlq %cl, %rdx
  38296. movq %rdx, (%rdi)
  38297. movq %rax, 8(%rdi)
  38298. movq %r8, 16(%rdi)
  38299. movq %r11, 24(%rdi)
  38300. repz retq
  38301. #endif /* WOLFSSL_SP_4096 */
  38302. #endif /* WOLFSSL_SP_4096 */
  38303. #ifndef WOLFSSL_SP_NO_256
  38304. /* Conditionally copy a into r using the mask m.
  38305. * m is -1 to copy and 0 when not.
  38306. *
  38307. * r A single precision number to copy over.
  38308. * a A single precision number to copy.
  38309. * m Mask value to apply.
  38310. */
  38311. #ifndef __APPLE__
  38312. .text
  38313. .globl sp_256_cond_copy_4
  38314. .type sp_256_cond_copy_4,@function
  38315. .align 16
  38316. sp_256_cond_copy_4:
  38317. #else
  38318. .section __TEXT,__text
  38319. .globl _sp_256_cond_copy_4
  38320. .p2align 4
  38321. _sp_256_cond_copy_4:
  38322. #endif /* __APPLE__ */
  38323. movq (%rdi), %rax
  38324. movq 8(%rdi), %rcx
  38325. movq 16(%rdi), %r8
  38326. movq 24(%rdi), %r9
  38327. xorq (%rsi), %rax
  38328. xorq 8(%rsi), %rcx
  38329. xorq 16(%rsi), %r8
  38330. xorq 24(%rsi), %r9
  38331. andq %rdx, %rax
  38332. andq %rdx, %rcx
  38333. andq %rdx, %r8
  38334. andq %rdx, %r9
  38335. xorq %rax, (%rdi)
  38336. xorq %rcx, 8(%rdi)
  38337. xorq %r8, 16(%rdi)
  38338. xorq %r9, 24(%rdi)
  38339. repz retq
  38340. #ifndef __APPLE__
  38341. .size sp_256_cond_copy_4,.-sp_256_cond_copy_4
  38342. #endif /* __APPLE__ */
  38343. /* Multiply two Montogmery form numbers mod the modulus (prime).
  38344. * (r = a * b mod m)
  38345. *
  38346. * r Result of multiplication.
  38347. * a First number to multiply in Montogmery form.
  38348. * b Second number to multiply in Montogmery form.
  38349. * m Modulus (prime).
  38350. * mp Montogmery mulitplier.
  38351. */
  38352. #ifndef __APPLE__
  38353. .text
  38354. .globl sp_256_mont_mul_4
  38355. .type sp_256_mont_mul_4,@function
  38356. .align 16
  38357. sp_256_mont_mul_4:
  38358. #else
  38359. .section __TEXT,__text
  38360. .globl _sp_256_mont_mul_4
  38361. .p2align 4
  38362. _sp_256_mont_mul_4:
  38363. #endif /* __APPLE__ */
  38364. pushq %r12
  38365. pushq %r13
  38366. pushq %r14
  38367. pushq %r15
  38368. pushq %rbx
  38369. movq %rdx, %r8
  38370. # A[0] * B[0]
  38371. movq (%r8), %rax
  38372. mulq (%rsi)
  38373. movq %rax, %r9
  38374. movq %rdx, %r10
  38375. # A[0] * B[1]
  38376. movq 8(%r8), %rax
  38377. mulq (%rsi)
  38378. xorq %r11, %r11
  38379. addq %rax, %r10
  38380. adcq %rdx, %r11
  38381. # A[1] * B[0]
  38382. movq (%r8), %rax
  38383. mulq 8(%rsi)
  38384. xorq %r12, %r12
  38385. addq %rax, %r10
  38386. adcq %rdx, %r11
  38387. adcq $0x00, %r12
  38388. # A[0] * B[2]
  38389. movq 16(%r8), %rax
  38390. mulq (%rsi)
  38391. addq %rax, %r11
  38392. adcq %rdx, %r12
  38393. # A[1] * B[1]
  38394. movq 8(%r8), %rax
  38395. mulq 8(%rsi)
  38396. xorq %r13, %r13
  38397. addq %rax, %r11
  38398. adcq %rdx, %r12
  38399. adcq $0x00, %r13
  38400. # A[2] * B[0]
  38401. movq (%r8), %rax
  38402. mulq 16(%rsi)
  38403. addq %rax, %r11
  38404. adcq %rdx, %r12
  38405. adcq $0x00, %r13
  38406. # A[0] * B[3]
  38407. movq 24(%r8), %rax
  38408. mulq (%rsi)
  38409. xorq %r14, %r14
  38410. addq %rax, %r12
  38411. adcq %rdx, %r13
  38412. adcq $0x00, %r14
  38413. # A[1] * B[2]
  38414. movq 16(%r8), %rax
  38415. mulq 8(%rsi)
  38416. addq %rax, %r12
  38417. adcq %rdx, %r13
  38418. adcq $0x00, %r14
  38419. # A[2] * B[1]
  38420. movq 8(%r8), %rax
  38421. mulq 16(%rsi)
  38422. addq %rax, %r12
  38423. adcq %rdx, %r13
  38424. adcq $0x00, %r14
  38425. # A[3] * B[0]
  38426. movq (%r8), %rax
  38427. mulq 24(%rsi)
  38428. addq %rax, %r12
  38429. adcq %rdx, %r13
  38430. adcq $0x00, %r14
  38431. # A[1] * B[3]
  38432. movq 24(%r8), %rax
  38433. mulq 8(%rsi)
  38434. xorq %r15, %r15
  38435. addq %rax, %r13
  38436. adcq %rdx, %r14
  38437. adcq $0x00, %r15
  38438. # A[2] * B[2]
  38439. movq 16(%r8), %rax
  38440. mulq 16(%rsi)
  38441. addq %rax, %r13
  38442. adcq %rdx, %r14
  38443. adcq $0x00, %r15
  38444. # A[3] * B[1]
  38445. movq 8(%r8), %rax
  38446. mulq 24(%rsi)
  38447. addq %rax, %r13
  38448. adcq %rdx, %r14
  38449. adcq $0x00, %r15
  38450. # A[2] * B[3]
  38451. movq 24(%r8), %rax
  38452. mulq 16(%rsi)
  38453. xorq %rbx, %rbx
  38454. addq %rax, %r14
  38455. adcq %rdx, %r15
  38456. adcq $0x00, %rbx
  38457. # A[3] * B[2]
  38458. movq 16(%r8), %rax
  38459. mulq 24(%rsi)
  38460. addq %rax, %r14
  38461. adcq %rdx, %r15
  38462. adcq $0x00, %rbx
  38463. # A[3] * B[3]
  38464. movq 24(%r8), %rax
  38465. mulq 24(%rsi)
  38466. addq %rax, %r15
  38467. adcq %rdx, %rbx
  38468. # Start Reduction
  38469. # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
  38470. # - a[0] << 32 << 192
  38471. # + (a[0] * 2) << 192
  38472. movq %r9, %rax
  38473. movq %r12, %rdx
  38474. addq %r9, %rdx
  38475. movq %r10, %rsi
  38476. addq %r9, %rdx
  38477. movq %r11, %r8
  38478. # a[0]-a[2] << 32
  38479. shlq $32, %r9
  38480. shldq $32, %rsi, %r11
  38481. shldq $32, %rax, %r10
  38482. # - a[0] << 32 << 192
  38483. subq %r9, %rdx
  38484. # + a[0]-a[2] << 32 << 64
  38485. addq %r9, %rsi
  38486. adcq %r10, %r8
  38487. adcq %r11, %rdx
  38488. # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
  38489. # a += mu << 256
  38490. xorq %r9, %r9
  38491. addq %rax, %r13
  38492. adcq %rsi, %r14
  38493. adcq %r8, %r15
  38494. adcq %rdx, %rbx
  38495. sbbq $0x00, %r9
  38496. # a += mu << 192
  38497. addq %rax, %r12
  38498. adcq %rsi, %r13
  38499. adcq %r8, %r14
  38500. adcq %rdx, %r15
  38501. adcq $0x00, %rbx
  38502. sbbq $0x00, %r9
  38503. # mu <<= 32
  38504. movq %rdx, %rcx
  38505. shldq $32, %r8, %rdx
  38506. shldq $32, %rsi, %r8
  38507. shldq $32, %rax, %rsi
  38508. shrq $32, %rcx
  38509. shlq $32, %rax
  38510. # a += (mu << 32) << 64
  38511. addq %r8, %r12
  38512. adcq %rdx, %r13
  38513. adcq %rcx, %r14
  38514. adcq $0x00, %r15
  38515. adcq $0x00, %rbx
  38516. sbbq $0x00, %r9
  38517. # a -= (mu << 32) << 192
  38518. subq %rax, %r12
  38519. sbbq %rsi, %r13
  38520. sbbq %r8, %r14
  38521. sbbq %rdx, %r15
  38522. sbbq %rcx, %rbx
  38523. adcq $0x00, %r9
  38524. movq $0xffffffff, %rax
  38525. movq $0xffffffff00000001, %rsi
  38526. # mask m and sub from result if overflow
  38527. # m[0] = -1 & mask = mask
  38528. andq %r9, %rax
  38529. # m[2] = 0 & mask = 0
  38530. andq %r9, %rsi
  38531. subq %r9, %r13
  38532. sbbq %rax, %r14
  38533. sbbq $0x00, %r15
  38534. sbbq %rsi, %rbx
  38535. movq %r13, (%rdi)
  38536. movq %r14, 8(%rdi)
  38537. movq %r15, 16(%rdi)
  38538. movq %rbx, 24(%rdi)
  38539. popq %rbx
  38540. popq %r15
  38541. popq %r14
  38542. popq %r13
  38543. popq %r12
  38544. repz retq
  38545. #ifndef __APPLE__
  38546. .size sp_256_mont_mul_4,.-sp_256_mont_mul_4
  38547. #endif /* __APPLE__ */
  38548. /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  38549. *
  38550. * r Result of squaring.
  38551. * a Number to square in Montogmery form.
  38552. * m Modulus (prime).
  38553. * mp Montogmery mulitplier.
  38554. */
  38555. #ifndef __APPLE__
  38556. .text
  38557. .globl sp_256_mont_sqr_4
  38558. .type sp_256_mont_sqr_4,@function
  38559. .align 16
  38560. sp_256_mont_sqr_4:
  38561. #else
  38562. .section __TEXT,__text
  38563. .globl _sp_256_mont_sqr_4
  38564. .p2align 4
  38565. _sp_256_mont_sqr_4:
  38566. #endif /* __APPLE__ */
  38567. pushq %r12
  38568. pushq %r13
  38569. pushq %r14
  38570. pushq %r15
  38571. pushq %rbx
  38572. # A[0] * A[1]
  38573. movq (%rsi), %rax
  38574. mulq 8(%rsi)
  38575. movq %rax, %r9
  38576. movq %rdx, %r10
  38577. # A[0] * A[2]
  38578. movq (%rsi), %rax
  38579. mulq 16(%rsi)
  38580. xorq %r11, %r11
  38581. addq %rax, %r10
  38582. adcq %rdx, %r11
  38583. # A[0] * A[3]
  38584. movq (%rsi), %rax
  38585. mulq 24(%rsi)
  38586. xorq %r12, %r12
  38587. addq %rax, %r11
  38588. adcq %rdx, %r12
  38589. # A[1] * A[2]
  38590. movq 8(%rsi), %rax
  38591. mulq 16(%rsi)
  38592. xorq %r13, %r13
  38593. addq %rax, %r11
  38594. adcq %rdx, %r12
  38595. adcq $0x00, %r13
  38596. # A[1] * A[3]
  38597. movq 8(%rsi), %rax
  38598. mulq 24(%rsi)
  38599. addq %rax, %r12
  38600. adcq %rdx, %r13
  38601. # A[2] * A[3]
  38602. movq 16(%rsi), %rax
  38603. mulq 24(%rsi)
  38604. xorq %r14, %r14
  38605. addq %rax, %r13
  38606. adcq %rdx, %r14
  38607. # Double
  38608. xorq %r15, %r15
  38609. addq %r9, %r9
  38610. adcq %r10, %r10
  38611. adcq %r11, %r11
  38612. adcq %r12, %r12
  38613. adcq %r13, %r13
  38614. adcq %r14, %r14
  38615. adcq $0x00, %r15
  38616. # A[0] * A[0]
  38617. movq (%rsi), %rax
  38618. mulq %rax
  38619. movq %rax, %rax
  38620. movq %rdx, %rdx
  38621. movq %rax, %r8
  38622. movq %rdx, %rbx
  38623. # A[1] * A[1]
  38624. movq 8(%rsi), %rax
  38625. mulq %rax
  38626. movq %rax, %rax
  38627. movq %rdx, %rdx
  38628. addq %rbx, %r9
  38629. adcq %rax, %r10
  38630. adcq $0x00, %rdx
  38631. movq %rdx, %rbx
  38632. # A[2] * A[2]
  38633. movq 16(%rsi), %rax
  38634. mulq %rax
  38635. movq %rax, %rax
  38636. movq %rdx, %rdx
  38637. addq %rbx, %r11
  38638. adcq %rax, %r12
  38639. adcq $0x00, %rdx
  38640. movq %rdx, %rbx
  38641. # A[3] * A[3]
  38642. movq 24(%rsi), %rax
  38643. mulq %rax
  38644. movq %rax, %rax
  38645. movq %rdx, %rdx
  38646. addq %rbx, %r13
  38647. adcq %rax, %r14
  38648. adcq %rdx, %r15
  38649. # Start Reduction
  38650. # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
  38651. # - a[0] << 32 << 192
  38652. # + (a[0] * 2) << 192
  38653. movq %r8, %rax
  38654. movq %r11, %rdx
  38655. addq %r8, %rdx
  38656. movq %r9, %rsi
  38657. addq %r8, %rdx
  38658. movq %r10, %rbx
  38659. # a[0]-a[2] << 32
  38660. shlq $32, %r8
  38661. shldq $32, %rsi, %r10
  38662. shldq $32, %rax, %r9
  38663. # - a[0] << 32 << 192
  38664. subq %r8, %rdx
  38665. # + a[0]-a[2] << 32 << 64
  38666. addq %r8, %rsi
  38667. adcq %r9, %rbx
  38668. adcq %r10, %rdx
  38669. # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
  38670. # a += mu << 256
  38671. xorq %r8, %r8
  38672. addq %rax, %r12
  38673. adcq %rsi, %r13
  38674. adcq %rbx, %r14
  38675. adcq %rdx, %r15
  38676. sbbq $0x00, %r8
  38677. # a += mu << 192
  38678. addq %rax, %r11
  38679. adcq %rsi, %r12
  38680. adcq %rbx, %r13
  38681. adcq %rdx, %r14
  38682. adcq $0x00, %r15
  38683. sbbq $0x00, %r8
  38684. # mu <<= 32
  38685. movq %rdx, %rcx
  38686. shldq $32, %rbx, %rdx
  38687. shldq $32, %rsi, %rbx
  38688. shldq $32, %rax, %rsi
  38689. shrq $32, %rcx
  38690. shlq $32, %rax
  38691. # a += (mu << 32) << 64
  38692. addq %rbx, %r11
  38693. adcq %rdx, %r12
  38694. adcq %rcx, %r13
  38695. adcq $0x00, %r14
  38696. adcq $0x00, %r15
  38697. sbbq $0x00, %r8
  38698. # a -= (mu << 32) << 192
  38699. subq %rax, %r11
  38700. sbbq %rsi, %r12
  38701. sbbq %rbx, %r13
  38702. sbbq %rdx, %r14
  38703. sbbq %rcx, %r15
  38704. adcq $0x00, %r8
  38705. movq $0xffffffff, %rax
  38706. movq $0xffffffff00000001, %rsi
  38707. # mask m and sub from result if overflow
  38708. # m[0] = -1 & mask = mask
  38709. andq %r8, %rax
  38710. # m[2] = 0 & mask = 0
  38711. andq %r8, %rsi
  38712. subq %r8, %r12
  38713. sbbq %rax, %r13
  38714. sbbq $0x00, %r14
  38715. sbbq %rsi, %r15
  38716. movq %r12, (%rdi)
  38717. movq %r13, 8(%rdi)
  38718. movq %r14, 16(%rdi)
  38719. movq %r15, 24(%rdi)
  38720. popq %rbx
  38721. popq %r15
  38722. popq %r14
  38723. popq %r13
  38724. popq %r12
  38725. repz retq
  38726. #ifndef __APPLE__
  38727. .size sp_256_mont_sqr_4,.-sp_256_mont_sqr_4
  38728. #endif /* __APPLE__ */
  38729. /* Compare a with b in constant time.
  38730. *
  38731. * a A single precision integer.
  38732. * b A single precision integer.
  38733. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  38734. * respectively.
  38735. */
  38736. #ifndef __APPLE__
  38737. .text
  38738. .globl sp_256_cmp_4
  38739. .type sp_256_cmp_4,@function
  38740. .align 16
  38741. sp_256_cmp_4:
  38742. #else
  38743. .section __TEXT,__text
  38744. .globl _sp_256_cmp_4
  38745. .p2align 4
  38746. _sp_256_cmp_4:
  38747. #endif /* __APPLE__ */
  38748. xorq %rcx, %rcx
  38749. movq $-1, %rdx
  38750. movq $-1, %rax
  38751. movq $0x01, %r8
  38752. movq 24(%rdi), %r9
  38753. movq 24(%rsi), %r10
  38754. andq %rdx, %r9
  38755. andq %rdx, %r10
  38756. subq %r10, %r9
  38757. cmova %r8, %rax
  38758. cmovc %rdx, %rax
  38759. cmovnz %rcx, %rdx
  38760. movq 16(%rdi), %r9
  38761. movq 16(%rsi), %r10
  38762. andq %rdx, %r9
  38763. andq %rdx, %r10
  38764. subq %r10, %r9
  38765. cmova %r8, %rax
  38766. cmovc %rdx, %rax
  38767. cmovnz %rcx, %rdx
  38768. movq 8(%rdi), %r9
  38769. movq 8(%rsi), %r10
  38770. andq %rdx, %r9
  38771. andq %rdx, %r10
  38772. subq %r10, %r9
  38773. cmova %r8, %rax
  38774. cmovc %rdx, %rax
  38775. cmovnz %rcx, %rdx
  38776. movq (%rdi), %r9
  38777. movq (%rsi), %r10
  38778. andq %rdx, %r9
  38779. andq %rdx, %r10
  38780. subq %r10, %r9
  38781. cmova %r8, %rax
  38782. cmovc %rdx, %rax
  38783. cmovnz %rcx, %rdx
  38784. xorq %rdx, %rax
  38785. repz retq
  38786. #ifndef __APPLE__
  38787. .size sp_256_cmp_4,.-sp_256_cmp_4
  38788. #endif /* __APPLE__ */
  38789. /* Conditionally subtract b from a using the mask m.
  38790. * m is -1 to subtract and 0 when not copying.
  38791. *
  38792. * r A single precision number representing condition subtract result.
  38793. * a A single precision number to subtract from.
  38794. * b A single precision number to subtract.
  38795. * m Mask value to apply.
  38796. */
  38797. #ifndef __APPLE__
  38798. .text
  38799. .globl sp_256_cond_sub_4
  38800. .type sp_256_cond_sub_4,@function
  38801. .align 16
  38802. sp_256_cond_sub_4:
  38803. #else
  38804. .section __TEXT,__text
  38805. .globl _sp_256_cond_sub_4
  38806. .p2align 4
  38807. _sp_256_cond_sub_4:
  38808. #endif /* __APPLE__ */
  38809. pushq %r12
  38810. pushq %r13
  38811. pushq %r14
  38812. pushq %r15
  38813. movq $0x00, %rax
  38814. movq (%rdx), %r12
  38815. movq 8(%rdx), %r13
  38816. movq 16(%rdx), %r14
  38817. movq 24(%rdx), %r15
  38818. andq %rcx, %r12
  38819. andq %rcx, %r13
  38820. andq %rcx, %r14
  38821. andq %rcx, %r15
  38822. movq (%rsi), %r8
  38823. movq 8(%rsi), %r9
  38824. movq 16(%rsi), %r10
  38825. movq 24(%rsi), %r11
  38826. subq %r12, %r8
  38827. sbbq %r13, %r9
  38828. sbbq %r14, %r10
  38829. sbbq %r15, %r11
  38830. movq %r8, (%rdi)
  38831. movq %r9, 8(%rdi)
  38832. movq %r10, 16(%rdi)
  38833. movq %r11, 24(%rdi)
  38834. sbbq $0x00, %rax
  38835. popq %r15
  38836. popq %r14
  38837. popq %r13
  38838. popq %r12
  38839. repz retq
  38840. #ifndef __APPLE__
  38841. .size sp_256_cond_sub_4,.-sp_256_cond_sub_4
  38842. #endif /* __APPLE__ */
  38843. /* Sub b from a into r. (r = a - b)
  38844. *
  38845. * r A single precision integer.
  38846. * a A single precision integer.
  38847. * b A single precision integer.
  38848. */
  38849. #ifndef __APPLE__
  38850. .text
  38851. .globl sp_256_sub_4
  38852. .type sp_256_sub_4,@function
  38853. .align 16
  38854. sp_256_sub_4:
  38855. #else
  38856. .section __TEXT,__text
  38857. .globl _sp_256_sub_4
  38858. .p2align 4
  38859. _sp_256_sub_4:
  38860. #endif /* __APPLE__ */
  38861. xorq %rax, %rax
  38862. movq (%rsi), %rcx
  38863. movq 8(%rsi), %r8
  38864. movq 16(%rsi), %r9
  38865. movq 24(%rsi), %r10
  38866. subq (%rdx), %rcx
  38867. sbbq 8(%rdx), %r8
  38868. sbbq 16(%rdx), %r9
  38869. sbbq 24(%rdx), %r10
  38870. movq %rcx, (%rdi)
  38871. movq %r8, 8(%rdi)
  38872. movq %r9, 16(%rdi)
  38873. movq %r10, 24(%rdi)
  38874. sbbq $0x00, %rax
  38875. repz retq
  38876. #ifndef __APPLE__
  38877. .size sp_256_sub_4,.-sp_256_sub_4
  38878. #endif /* __APPLE__ */
  38879. /* Reduce the number back to 256 bits using Montgomery reduction.
  38880. *
  38881. * a A single precision number to reduce in place.
  38882. * m The single precision number representing the modulus.
  38883. * mp The digit representing the negative inverse of m mod 2^n.
  38884. */
  38885. #ifndef __APPLE__
  38886. .text
  38887. .globl sp_256_mont_reduce_4
  38888. .type sp_256_mont_reduce_4,@function
  38889. .align 16
  38890. sp_256_mont_reduce_4:
  38891. #else
  38892. .section __TEXT,__text
  38893. .globl _sp_256_mont_reduce_4
  38894. .p2align 4
  38895. _sp_256_mont_reduce_4:
  38896. #endif /* __APPLE__ */
  38897. pushq %r12
  38898. pushq %r13
  38899. pushq %r14
  38900. pushq %r15
  38901. movq %rdx, %rcx
  38902. # i = 0
  38903. xorq %r14, %r14
  38904. movq $4, %r8
  38905. movq %rdi, %r13
  38906. L_mont_loop_4:
  38907. # mu = a[i] * mp
  38908. movq (%r13), %r12
  38909. imulq %rcx, %r12
  38910. # a[i+0] += m[0] * mu
  38911. movq (%rsi), %rax
  38912. movq 8(%rsi), %r10
  38913. mulq %r12
  38914. movq (%r13), %r15
  38915. addq %rax, %r15
  38916. movq %rdx, %r9
  38917. movq %r15, (%r13)
  38918. adcq $0x00, %r9
  38919. # a[i+1] += m[1] * mu
  38920. movq %r10, %rax
  38921. mulq %r12
  38922. movq 16(%rsi), %r10
  38923. movq 8(%r13), %r15
  38924. addq %r9, %rax
  38925. movq %rdx, %r11
  38926. adcq $0x00, %r11
  38927. addq %rax, %r15
  38928. movq %r15, 8(%r13)
  38929. adcq $0x00, %r11
  38930. # a[i+2] += m[2] * mu
  38931. movq %r10, %rax
  38932. mulq %r12
  38933. movq 24(%rsi), %r10
  38934. movq 16(%r13), %r15
  38935. addq %r11, %rax
  38936. movq %rdx, %r9
  38937. adcq $0x00, %r9
  38938. addq %rax, %r15
  38939. movq %r15, 16(%r13)
  38940. adcq $0x00, %r9
  38941. # a[i+3] += m[3] * mu
  38942. movq %r10, %rax
  38943. mulq %r12
  38944. movq 24(%r13), %r15
  38945. addq %r9, %rax
  38946. adcq %r14, %rdx
  38947. movq $0x00, %r14
  38948. adcq $0x00, %r14
  38949. addq %rax, %r15
  38950. movq %r15, 24(%r13)
  38951. adcq %rdx, 32(%r13)
  38952. adcq $0x00, %r14
  38953. # i += 1
  38954. addq $8, %r13
  38955. decq %r8
  38956. jnz L_mont_loop_4
  38957. xorq %rax, %rax
  38958. movq 32(%rdi), %rdx
  38959. movq 40(%rdi), %r8
  38960. movq 48(%rdi), %r15
  38961. movq 56(%rdi), %r9
  38962. subq %r14, %rax
  38963. movq (%rsi), %r10
  38964. movq 8(%rsi), %r11
  38965. movq 16(%rsi), %r12
  38966. movq 24(%rsi), %r13
  38967. andq %rax, %r10
  38968. andq %rax, %r11
  38969. andq %rax, %r12
  38970. andq %rax, %r13
  38971. subq %r10, %rdx
  38972. sbbq %r11, %r8
  38973. sbbq %r12, %r15
  38974. sbbq %r13, %r9
  38975. movq %rdx, (%rdi)
  38976. movq %r8, 8(%rdi)
  38977. movq %r15, 16(%rdi)
  38978. movq %r9, 24(%rdi)
  38979. popq %r15
  38980. popq %r14
  38981. popq %r13
  38982. popq %r12
  38983. repz retq
  38984. #ifndef __APPLE__
  38985. .size sp_256_mont_reduce_4,.-sp_256_mont_reduce_4
  38986. #endif /* __APPLE__ */
  38987. /* Add two Montgomery form numbers (r = a + b % m).
  38988. *
  38989. * r Result of addition.
  38990. * a First number to add in Montogmery form.
  38991. * b Second number to add in Montogmery form.
  38992. * m Modulus (prime).
  38993. */
  38994. #ifndef __APPLE__
  38995. .text
  38996. .globl sp_256_mont_add_4
  38997. .type sp_256_mont_add_4,@function
  38998. .align 16
  38999. sp_256_mont_add_4:
  39000. #else
  39001. .section __TEXT,__text
  39002. .globl _sp_256_mont_add_4
  39003. .p2align 4
  39004. _sp_256_mont_add_4:
  39005. #endif /* __APPLE__ */
  39006. movq (%rsi), %rax
  39007. movq 8(%rsi), %rcx
  39008. movq 16(%rsi), %r8
  39009. movq 24(%rsi), %r9
  39010. movq $0xffffffff, %r10
  39011. movq $0xffffffff00000001, %r11
  39012. addq (%rdx), %rax
  39013. adcq 8(%rdx), %rcx
  39014. adcq 16(%rdx), %r8
  39015. movq $0x00, %rsi
  39016. adcq 24(%rdx), %r9
  39017. sbbq $0x00, %rsi
  39018. andq %rsi, %r10
  39019. andq %rsi, %r11
  39020. subq %rsi, %rax
  39021. sbbq %r10, %rcx
  39022. movq %rax, (%rdi)
  39023. sbbq $0x00, %r8
  39024. movq %rcx, 8(%rdi)
  39025. sbbq %r11, %r9
  39026. movq %r8, 16(%rdi)
  39027. movq %r9, 24(%rdi)
  39028. repz retq
  39029. #ifndef __APPLE__
  39030. .size sp_256_mont_add_4,.-sp_256_mont_add_4
  39031. #endif /* __APPLE__ */
  39032. /* Double a Montgomery form number (r = a + a % m).
  39033. *
  39034. * r Result of doubling.
  39035. * a Number to double in Montogmery form.
  39036. * m Modulus (prime).
  39037. */
  39038. #ifndef __APPLE__
  39039. .text
  39040. .globl sp_256_mont_dbl_4
  39041. .type sp_256_mont_dbl_4,@function
  39042. .align 16
  39043. sp_256_mont_dbl_4:
  39044. #else
  39045. .section __TEXT,__text
  39046. .globl _sp_256_mont_dbl_4
  39047. .p2align 4
  39048. _sp_256_mont_dbl_4:
  39049. #endif /* __APPLE__ */
  39050. movq (%rsi), %rdx
  39051. movq 8(%rsi), %rax
  39052. movq 16(%rsi), %rcx
  39053. movq 24(%rsi), %r8
  39054. movq $0xffffffff, %r9
  39055. movq $0xffffffff00000001, %r10
  39056. addq %rdx, %rdx
  39057. adcq %rax, %rax
  39058. adcq %rcx, %rcx
  39059. movq $0x00, %r11
  39060. adcq %r8, %r8
  39061. sbbq $0x00, %r11
  39062. andq %r11, %r9
  39063. andq %r11, %r10
  39064. subq %r11, %rdx
  39065. sbbq %r9, %rax
  39066. movq %rdx, (%rdi)
  39067. sbbq $0x00, %rcx
  39068. movq %rax, 8(%rdi)
  39069. sbbq %r10, %r8
  39070. movq %rcx, 16(%rdi)
  39071. movq %r8, 24(%rdi)
  39072. repz retq
  39073. #ifndef __APPLE__
  39074. .size sp_256_mont_dbl_4,.-sp_256_mont_dbl_4
  39075. #endif /* __APPLE__ */
  39076. /* Triple a Montgomery form number (r = a + a + a % m).
  39077. *
  39078. * r Result of Tripling.
  39079. * a Number to triple in Montogmery form.
  39080. * m Modulus (prime).
  39081. */
  39082. #ifndef __APPLE__
  39083. .text
  39084. .globl sp_256_mont_tpl_4
  39085. .type sp_256_mont_tpl_4,@function
  39086. .align 16
  39087. sp_256_mont_tpl_4:
  39088. #else
  39089. .section __TEXT,__text
  39090. .globl _sp_256_mont_tpl_4
  39091. .p2align 4
  39092. _sp_256_mont_tpl_4:
  39093. #endif /* __APPLE__ */
  39094. movq (%rsi), %rdx
  39095. movq 8(%rsi), %rax
  39096. movq 16(%rsi), %rcx
  39097. movq 24(%rsi), %r8
  39098. movq $0xffffffff, %r9
  39099. movq $0xffffffff00000001, %r10
  39100. addq %rdx, %rdx
  39101. adcq %rax, %rax
  39102. adcq %rcx, %rcx
  39103. movq $0x00, %r11
  39104. adcq %r8, %r8
  39105. sbbq $0x00, %r11
  39106. andq %r11, %r9
  39107. andq %r11, %r10
  39108. subq %r11, %rdx
  39109. sbbq %r9, %rax
  39110. sbbq $0x00, %rcx
  39111. sbbq %r10, %r8
  39112. movq $0xffffffff, %r9
  39113. movq $0xffffffff00000001, %r10
  39114. addq (%rsi), %rdx
  39115. adcq 8(%rsi), %rax
  39116. adcq 16(%rsi), %rcx
  39117. movq $0x00, %r11
  39118. adcq 24(%rsi), %r8
  39119. sbbq $0x00, %r11
  39120. andq %r11, %r9
  39121. andq %r11, %r10
  39122. subq %r11, %rdx
  39123. sbbq %r9, %rax
  39124. movq %rdx, (%rdi)
  39125. sbbq $0x00, %rcx
  39126. movq %rax, 8(%rdi)
  39127. sbbq %r10, %r8
  39128. movq %rcx, 16(%rdi)
  39129. movq %r8, 24(%rdi)
  39130. repz retq
  39131. #ifndef __APPLE__
  39132. .size sp_256_mont_tpl_4,.-sp_256_mont_tpl_4
  39133. #endif /* __APPLE__ */
  39134. /* Subtract two Montgomery form numbers (r = a - b % m).
  39135. *
  39136. * r Result of subtration.
  39137. * a Number to subtract from in Montogmery form.
  39138. * b Number to subtract with in Montogmery form.
  39139. * m Modulus (prime).
  39140. */
  39141. #ifndef __APPLE__
  39142. .text
  39143. .globl sp_256_mont_sub_4
  39144. .type sp_256_mont_sub_4,@function
  39145. .align 16
  39146. sp_256_mont_sub_4:
  39147. #else
  39148. .section __TEXT,__text
  39149. .globl _sp_256_mont_sub_4
  39150. .p2align 4
  39151. _sp_256_mont_sub_4:
  39152. #endif /* __APPLE__ */
  39153. movq (%rsi), %rax
  39154. movq 8(%rsi), %rcx
  39155. movq 16(%rsi), %r8
  39156. movq 24(%rsi), %r9
  39157. movq $0xffffffff, %r10
  39158. movq $0xffffffff00000001, %r11
  39159. subq (%rdx), %rax
  39160. sbbq 8(%rdx), %rcx
  39161. sbbq 16(%rdx), %r8
  39162. movq $0x00, %rsi
  39163. sbbq 24(%rdx), %r9
  39164. sbbq $0x00, %rsi
  39165. andq %rsi, %r10
  39166. andq %rsi, %r11
  39167. addq %rsi, %rax
  39168. adcq %r10, %rcx
  39169. movq %rax, (%rdi)
  39170. adcq $0x00, %r8
  39171. movq %rcx, 8(%rdi)
  39172. adcq %r11, %r9
  39173. movq %r8, 16(%rdi)
  39174. movq %r9, 24(%rdi)
  39175. repz retq
  39176. #ifndef __APPLE__
  39177. .size sp_256_mont_sub_4,.-sp_256_mont_sub_4
  39178. #endif /* __APPLE__ */
  39179. /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  39180. *
  39181. * r Result of division by 2.
  39182. * a Number to divide.
  39183. * m Modulus (prime).
  39184. */
  39185. #ifndef __APPLE__
  39186. .text
  39187. .globl sp_256_div2_4
  39188. .type sp_256_div2_4,@function
  39189. .align 16
  39190. sp_256_div2_4:
  39191. #else
  39192. .section __TEXT,__text
  39193. .globl _sp_256_div2_4
  39194. .p2align 4
  39195. _sp_256_div2_4:
  39196. #endif /* __APPLE__ */
  39197. movq (%rsi), %rdx
  39198. movq 8(%rsi), %rax
  39199. movq 16(%rsi), %rcx
  39200. movq 24(%rsi), %r8
  39201. movq $0xffffffff, %r9
  39202. movq $0xffffffff00000001, %r10
  39203. movq %rdx, %r11
  39204. andq $0x01, %r11
  39205. negq %r11
  39206. andq %r11, %r9
  39207. andq %r11, %r10
  39208. addq %r11, %rdx
  39209. adcq %r9, %rax
  39210. adcq $0x00, %rcx
  39211. adcq %r10, %r8
  39212. movq $0x00, %r11
  39213. adcq $0x00, %r11
  39214. shrdq $0x01, %rax, %rdx
  39215. shrdq $0x01, %rcx, %rax
  39216. shrdq $0x01, %r8, %rcx
  39217. shrdq $0x01, %r11, %r8
  39218. movq %rdx, (%rdi)
  39219. movq %rax, 8(%rdi)
  39220. movq %rcx, 16(%rdi)
  39221. movq %r8, 24(%rdi)
  39222. repz retq
  39223. #ifndef __APPLE__
  39224. .size sp_256_div2_4,.-sp_256_div2_4
  39225. #endif /* __APPLE__ */
  39226. #ifndef WC_NO_CACHE_RESISTANT
  39227. /* Touch each possible point that could be being copied.
  39228. *
  39229. * r Point to copy into.
  39230. * table Table - start of the entires to access
  39231. * idx Index of point to retrieve.
  39232. */
  39233. #ifndef __APPLE__
  39234. .text
  39235. .globl sp_256_get_point_33_4
  39236. .type sp_256_get_point_33_4,@function
  39237. .align 16
  39238. sp_256_get_point_33_4:
  39239. #else
  39240. .section __TEXT,__text
  39241. .globl _sp_256_get_point_33_4
  39242. .p2align 4
  39243. _sp_256_get_point_33_4:
  39244. #endif /* __APPLE__ */
  39245. movq $0x01, %rax
  39246. movd %edx, %xmm13
  39247. addq $0xc8, %rsi
  39248. movd %eax, %xmm15
  39249. movq $32, %rax
  39250. pshufd $0x00, %xmm15, %xmm15
  39251. pshufd $0x00, %xmm13, %xmm13
  39252. pxor %xmm14, %xmm14
  39253. pxor %xmm0, %xmm0
  39254. pxor %xmm1, %xmm1
  39255. pxor %xmm2, %xmm2
  39256. pxor %xmm3, %xmm3
  39257. pxor %xmm4, %xmm4
  39258. pxor %xmm5, %xmm5
  39259. movdqa %xmm15, %xmm14
  39260. L_256_get_point_33_4_start:
  39261. movdqa %xmm14, %xmm12
  39262. paddd %xmm15, %xmm14
  39263. pcmpeqd %xmm13, %xmm12
  39264. movdqu (%rsi), %xmm6
  39265. movdqu 16(%rsi), %xmm7
  39266. movdqu 64(%rsi), %xmm8
  39267. movdqu 80(%rsi), %xmm9
  39268. movdqu 128(%rsi), %xmm10
  39269. movdqu 144(%rsi), %xmm11
  39270. addq $0xc8, %rsi
  39271. pand %xmm12, %xmm6
  39272. pand %xmm12, %xmm7
  39273. pand %xmm12, %xmm8
  39274. pand %xmm12, %xmm9
  39275. pand %xmm12, %xmm10
  39276. pand %xmm12, %xmm11
  39277. por %xmm6, %xmm0
  39278. por %xmm7, %xmm1
  39279. por %xmm8, %xmm2
  39280. por %xmm9, %xmm3
  39281. por %xmm10, %xmm4
  39282. por %xmm11, %xmm5
  39283. decq %rax
  39284. jnz L_256_get_point_33_4_start
  39285. movdqu %xmm0, (%rdi)
  39286. movdqu %xmm1, 16(%rdi)
  39287. movdqu %xmm2, 64(%rdi)
  39288. movdqu %xmm3, 80(%rdi)
  39289. movdqu %xmm4, 128(%rdi)
  39290. movdqu %xmm5, 144(%rdi)
  39291. repz retq
  39292. #ifndef __APPLE__
  39293. .size sp_256_get_point_33_4,.-sp_256_get_point_33_4
  39294. #endif /* __APPLE__ */
  39295. /* Touch each possible point that could be being copied.
  39296. *
  39297. * r Point to copy into.
  39298. * table Table - start of the entires to access
  39299. * idx Index of point to retrieve.
  39300. */
  39301. #ifndef __APPLE__
  39302. .text
  39303. .globl sp_256_get_point_33_avx2_4
  39304. .type sp_256_get_point_33_avx2_4,@function
  39305. .align 16
  39306. sp_256_get_point_33_avx2_4:
  39307. #else
  39308. .section __TEXT,__text
  39309. .globl _sp_256_get_point_33_avx2_4
  39310. .p2align 4
  39311. _sp_256_get_point_33_avx2_4:
  39312. #endif /* __APPLE__ */
  39313. movq $0x01, %rax
  39314. movd %edx, %xmm7
  39315. addq $0xc8, %rsi
  39316. movd %eax, %xmm9
  39317. movq $32, %rax
  39318. vpxor %ymm8, %ymm8, %ymm8
  39319. vpermd %ymm7, %ymm8, %ymm7
  39320. vpermd %ymm9, %ymm8, %ymm9
  39321. vpxor %ymm0, %ymm0, %ymm0
  39322. vpxor %ymm1, %ymm1, %ymm1
  39323. vpxor %ymm2, %ymm2, %ymm2
  39324. vmovdqa %ymm9, %ymm8
  39325. L_256_get_point_33_avx2_4_start:
  39326. vpcmpeqd %ymm7, %ymm8, %ymm6
  39327. vpaddd %ymm9, %ymm8, %ymm8
  39328. vmovdqu (%rsi), %ymm3
  39329. vmovdqu 64(%rsi), %ymm4
  39330. vmovdqu 128(%rsi), %ymm5
  39331. addq $0xc8, %rsi
  39332. vpand %ymm6, %ymm3, %ymm3
  39333. vpand %ymm6, %ymm4, %ymm4
  39334. vpand %ymm6, %ymm5, %ymm5
  39335. vpor %ymm3, %ymm0, %ymm0
  39336. vpor %ymm4, %ymm1, %ymm1
  39337. vpor %ymm5, %ymm2, %ymm2
  39338. decq %rax
  39339. jnz L_256_get_point_33_avx2_4_start
  39340. vmovdqu %ymm0, (%rdi)
  39341. vmovdqu %ymm1, 64(%rdi)
  39342. vmovdqu %ymm2, 128(%rdi)
  39343. repz retq
  39344. #ifndef __APPLE__
  39345. .size sp_256_get_point_33_avx2_4,.-sp_256_get_point_33_avx2_4
  39346. #endif /* __APPLE__ */
  39347. #endif /* !WC_NO_CACHE_RESISTANT */
  39348. /* Multiply two Montogmery form numbers mod the modulus (prime).
  39349. * (r = a * b mod m)
  39350. *
  39351. * r Result of multiplication.
  39352. * a First number to multiply in Montogmery form.
  39353. * b Second number to multiply in Montogmery form.
  39354. * m Modulus (prime).
  39355. * mp Montogmery mulitplier.
  39356. */
  39357. #ifndef __APPLE__
  39358. .text
  39359. .globl sp_256_mont_mul_avx2_4
  39360. .type sp_256_mont_mul_avx2_4,@function
  39361. .align 16
  39362. sp_256_mont_mul_avx2_4:
  39363. #else
  39364. .section __TEXT,__text
  39365. .globl _sp_256_mont_mul_avx2_4
  39366. .p2align 4
  39367. _sp_256_mont_mul_avx2_4:
  39368. #endif /* __APPLE__ */
  39369. pushq %rbx
  39370. pushq %rbp
  39371. pushq %r12
  39372. pushq %r13
  39373. pushq %r14
  39374. pushq %r15
  39375. movq %rdx, %rbp
  39376. # A[0] * B[0]
  39377. movq (%rbp), %rdx
  39378. mulxq (%rsi), %r8, %r9
  39379. # A[2] * B[0]
  39380. mulxq 16(%rsi), %r10, %r11
  39381. # A[1] * B[0]
  39382. mulxq 8(%rsi), %rax, %rcx
  39383. xorq %r15, %r15
  39384. adcxq %rax, %r9
  39385. # A[1] * B[3]
  39386. movq 24(%rbp), %rdx
  39387. mulxq 8(%rsi), %r12, %r13
  39388. adcxq %rcx, %r10
  39389. # A[0] * B[1]
  39390. movq 8(%rbp), %rdx
  39391. mulxq (%rsi), %rax, %rcx
  39392. adoxq %rax, %r9
  39393. # A[2] * B[1]
  39394. mulxq 16(%rsi), %rax, %r14
  39395. adoxq %rcx, %r10
  39396. adcxq %rax, %r11
  39397. # A[1] * B[2]
  39398. movq 16(%rbp), %rdx
  39399. mulxq 8(%rsi), %rax, %rcx
  39400. adcxq %r14, %r12
  39401. adoxq %rax, %r11
  39402. adcxq %r15, %r13
  39403. adoxq %rcx, %r12
  39404. # A[0] * B[2]
  39405. mulxq (%rsi), %rax, %rcx
  39406. adoxq %r15, %r13
  39407. xorq %r14, %r14
  39408. adcxq %rax, %r10
  39409. # A[1] * B[1]
  39410. movq 8(%rbp), %rdx
  39411. mulxq 8(%rsi), %rdx, %rax
  39412. adcxq %rcx, %r11
  39413. adoxq %rdx, %r10
  39414. # A[3] * B[1]
  39415. movq 8(%rbp), %rdx
  39416. adoxq %rax, %r11
  39417. mulxq 24(%rsi), %rax, %rcx
  39418. adcxq %rax, %r12
  39419. # A[2] * B[2]
  39420. movq 16(%rbp), %rdx
  39421. mulxq 16(%rsi), %rdx, %rax
  39422. adcxq %rcx, %r13
  39423. adoxq %rdx, %r12
  39424. # A[3] * B[3]
  39425. movq 24(%rbp), %rdx
  39426. adoxq %rax, %r13
  39427. mulxq 24(%rsi), %rax, %rcx
  39428. adoxq %r15, %r14
  39429. adcxq %rax, %r14
  39430. # A[0] * B[3]
  39431. mulxq (%rsi), %rdx, %rax
  39432. adcxq %rcx, %r15
  39433. xorq %rcx, %rcx
  39434. adcxq %rdx, %r11
  39435. # A[3] * B[0]
  39436. movq 24(%rsi), %rdx
  39437. adcxq %rax, %r12
  39438. mulxq (%rbp), %rbx, %rax
  39439. adoxq %rbx, %r11
  39440. adoxq %rax, %r12
  39441. # A[3] * B[2]
  39442. mulxq 16(%rbp), %rdx, %rax
  39443. adcxq %rdx, %r13
  39444. # A[2] * B[3]
  39445. movq 24(%rbp), %rdx
  39446. adcxq %rax, %r14
  39447. mulxq 16(%rsi), %rax, %rdx
  39448. adcxq %rcx, %r15
  39449. adoxq %rax, %r13
  39450. adoxq %rdx, %r14
  39451. adoxq %rcx, %r15
  39452. # Start Reduction
  39453. # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
  39454. # - a[0] << 32 << 192
  39455. # + (a[0] * 2) << 192
  39456. movq %r8, %rax
  39457. movq %r11, %rdx
  39458. addq %r8, %rdx
  39459. movq %r9, %rsi
  39460. addq %r8, %rdx
  39461. movq %r10, %rbp
  39462. # a[0]-a[2] << 32
  39463. shlq $32, %r8
  39464. shldq $32, %rsi, %r10
  39465. shldq $32, %rax, %r9
  39466. # - a[0] << 32 << 192
  39467. subq %r8, %rdx
  39468. # + a[0]-a[2] << 32 << 64
  39469. addq %r8, %rsi
  39470. adcq %r9, %rbp
  39471. adcq %r10, %rdx
  39472. # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
  39473. # a += mu << 256
  39474. xorq %r8, %r8
  39475. addq %rax, %r12
  39476. adcq %rsi, %r13
  39477. adcq %rbp, %r14
  39478. adcq %rdx, %r15
  39479. sbbq $0x00, %r8
  39480. # a += mu << 192
  39481. addq %rax, %r11
  39482. adcq %rsi, %r12
  39483. adcq %rbp, %r13
  39484. adcq %rdx, %r14
  39485. adcq $0x00, %r15
  39486. sbbq $0x00, %r8
  39487. # mu <<= 32
  39488. movq %rdx, %rcx
  39489. shldq $32, %rbp, %rdx
  39490. shldq $32, %rsi, %rbp
  39491. shldq $32, %rax, %rsi
  39492. shrq $32, %rcx
  39493. shlq $32, %rax
  39494. # a += (mu << 32) << 64
  39495. addq %rbp, %r11
  39496. adcq %rdx, %r12
  39497. adcq %rcx, %r13
  39498. adcq $0x00, %r14
  39499. adcq $0x00, %r15
  39500. sbbq $0x00, %r8
  39501. # a -= (mu << 32) << 192
  39502. subq %rax, %r11
  39503. sbbq %rsi, %r12
  39504. sbbq %rbp, %r13
  39505. sbbq %rdx, %r14
  39506. sbbq %rcx, %r15
  39507. adcq $0x00, %r8
  39508. movq $0xffffffff, %rax
  39509. movq $0xffffffff00000001, %rsi
  39510. # mask m and sub from result if overflow
  39511. # m[0] = -1 & mask = mask
  39512. andq %r8, %rax
  39513. # m[2] = 0 & mask = 0
  39514. andq %r8, %rsi
  39515. subq %r8, %r12
  39516. sbbq %rax, %r13
  39517. sbbq $0x00, %r14
  39518. sbbq %rsi, %r15
  39519. movq %r12, (%rdi)
  39520. movq %r13, 8(%rdi)
  39521. movq %r14, 16(%rdi)
  39522. movq %r15, 24(%rdi)
  39523. popq %r15
  39524. popq %r14
  39525. popq %r13
  39526. popq %r12
  39527. popq %rbp
  39528. popq %rbx
  39529. repz retq
  39530. #ifndef __APPLE__
  39531. .size sp_256_mont_mul_avx2_4,.-sp_256_mont_mul_avx2_4
  39532. #endif /* __APPLE__ */
  39533. /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  39534. *
  39535. * r Result of squaring.
  39536. * a Number to square in Montogmery form.
  39537. * m Modulus (prime).
  39538. * mp Montogmery mulitplier.
  39539. */
  39540. #ifndef __APPLE__
  39541. .text
  39542. .globl sp_256_mont_sqr_avx2_4
  39543. .type sp_256_mont_sqr_avx2_4,@function
  39544. .align 16
  39545. sp_256_mont_sqr_avx2_4:
  39546. #else
  39547. .section __TEXT,__text
  39548. .globl _sp_256_mont_sqr_avx2_4
  39549. .p2align 4
  39550. _sp_256_mont_sqr_avx2_4:
  39551. #endif /* __APPLE__ */
  39552. pushq %r12
  39553. pushq %r13
  39554. pushq %r14
  39555. pushq %r15
  39556. pushq %rbx
  39557. # A[0] * A[1]
  39558. movq (%rsi), %rdx
  39559. movq 16(%rsi), %r15
  39560. mulxq 8(%rsi), %r9, %r10
  39561. # A[0] * A[3]
  39562. mulxq 24(%rsi), %r11, %r12
  39563. # A[2] * A[1]
  39564. movq %r15, %rdx
  39565. mulxq 8(%rsi), %rcx, %rbx
  39566. # A[2] * A[3]
  39567. mulxq 24(%rsi), %r13, %r14
  39568. xorq %r15, %r15
  39569. adoxq %rcx, %r11
  39570. adoxq %rbx, %r12
  39571. # A[2] * A[0]
  39572. mulxq (%rsi), %rcx, %rbx
  39573. # A[1] * A[3]
  39574. movq 8(%rsi), %rdx
  39575. adoxq %r15, %r13
  39576. mulxq 24(%rsi), %rax, %r8
  39577. adcxq %rcx, %r10
  39578. adoxq %r15, %r14
  39579. adcxq %rbx, %r11
  39580. adcxq %rax, %r12
  39581. adcxq %r8, %r13
  39582. adcxq %r15, %r14
  39583. # Double with Carry Flag
  39584. xorq %r15, %r15
  39585. # A[0] * A[0]
  39586. movq (%rsi), %rdx
  39587. mulxq %rdx, %r8, %rax
  39588. adcxq %r9, %r9
  39589. adcxq %r10, %r10
  39590. adoxq %rax, %r9
  39591. # A[1] * A[1]
  39592. movq 8(%rsi), %rdx
  39593. mulxq %rdx, %rcx, %rbx
  39594. adcxq %r11, %r11
  39595. adoxq %rcx, %r10
  39596. # A[2] * A[2]
  39597. movq 16(%rsi), %rdx
  39598. mulxq %rdx, %rax, %rcx
  39599. adcxq %r12, %r12
  39600. adoxq %rbx, %r11
  39601. adcxq %r13, %r13
  39602. adoxq %rax, %r12
  39603. adcxq %r14, %r14
  39604. # A[3] * A[3]
  39605. movq 24(%rsi), %rdx
  39606. mulxq %rdx, %rax, %rbx
  39607. adoxq %rcx, %r13
  39608. adcxq %r15, %r15
  39609. adoxq %rax, %r14
  39610. adoxq %rbx, %r15
  39611. # Start Reduction
  39612. # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
  39613. # - a[0] << 32 << 192
  39614. # + (a[0] * 2) << 192
  39615. movq %r8, %rax
  39616. movq %r11, %rdx
  39617. addq %r8, %rdx
  39618. movq %r9, %rsi
  39619. addq %r8, %rdx
  39620. movq %r10, %rcx
  39621. # a[0]-a[2] << 32
  39622. shlq $32, %r8
  39623. shldq $32, %rsi, %r10
  39624. shldq $32, %rax, %r9
  39625. # - a[0] << 32 << 192
  39626. subq %r8, %rdx
  39627. # + a[0]-a[2] << 32 << 64
  39628. addq %r8, %rsi
  39629. adcq %r9, %rcx
  39630. adcq %r10, %rdx
  39631. # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
  39632. # a += mu << 256
  39633. xorq %r8, %r8
  39634. addq %rax, %r12
  39635. adcq %rsi, %r13
  39636. adcq %rcx, %r14
  39637. adcq %rdx, %r15
  39638. sbbq $0x00, %r8
  39639. # a += mu << 192
  39640. addq %rax, %r11
  39641. adcq %rsi, %r12
  39642. adcq %rcx, %r13
  39643. adcq %rdx, %r14
  39644. adcq $0x00, %r15
  39645. sbbq $0x00, %r8
  39646. # mu <<= 32
  39647. movq %rdx, %rbx
  39648. shldq $32, %rcx, %rdx
  39649. shldq $32, %rsi, %rcx
  39650. shldq $32, %rax, %rsi
  39651. shrq $32, %rbx
  39652. shlq $32, %rax
  39653. # a += (mu << 32) << 64
  39654. addq %rcx, %r11
  39655. adcq %rdx, %r12
  39656. adcq %rbx, %r13
  39657. adcq $0x00, %r14
  39658. adcq $0x00, %r15
  39659. sbbq $0x00, %r8
  39660. # a -= (mu << 32) << 192
  39661. subq %rax, %r11
  39662. sbbq %rsi, %r12
  39663. sbbq %rcx, %r13
  39664. sbbq %rdx, %r14
  39665. sbbq %rbx, %r15
  39666. adcq $0x00, %r8
  39667. movq $0xffffffff, %rax
  39668. movq $0xffffffff00000001, %rsi
  39669. # mask m and sub from result if overflow
  39670. # m[0] = -1 & mask = mask
  39671. andq %r8, %rax
  39672. # m[2] = 0 & mask = 0
  39673. andq %r8, %rsi
  39674. subq %r8, %r12
  39675. sbbq %rax, %r13
  39676. sbbq $0x00, %r14
  39677. sbbq %rsi, %r15
  39678. movq %r12, (%rdi)
  39679. movq %r13, 8(%rdi)
  39680. movq %r14, 16(%rdi)
  39681. movq %r15, 24(%rdi)
  39682. popq %rbx
  39683. popq %r15
  39684. popq %r14
  39685. popq %r13
  39686. popq %r12
  39687. repz retq
  39688. #ifndef __APPLE__
  39689. .size sp_256_mont_sqr_avx2_4,.-sp_256_mont_sqr_avx2_4
  39690. #endif /* __APPLE__ */
  39691. #ifndef WC_NO_CACHE_RESISTANT
  39692. /* Touch each possible entry that could be being copied.
  39693. *
  39694. * r Point to copy into.
  39695. * table Table - start of the entires to access
  39696. * idx Index of entry to retrieve.
  39697. */
  39698. #ifndef __APPLE__
  39699. .text
  39700. .globl sp_256_get_entry_64_4
  39701. .type sp_256_get_entry_64_4,@function
  39702. .align 16
  39703. sp_256_get_entry_64_4:
  39704. #else
  39705. .section __TEXT,__text
  39706. .globl _sp_256_get_entry_64_4
  39707. .p2align 4
  39708. _sp_256_get_entry_64_4:
  39709. #endif /* __APPLE__ */
  39710. movq $0x01, %rax
  39711. movd %edx, %xmm9
  39712. addq $0x40, %rsi
  39713. movd %eax, %xmm11
  39714. movq $63, %rax
  39715. pshufd $0x00, %xmm11, %xmm11
  39716. pshufd $0x00, %xmm9, %xmm9
  39717. pxor %xmm10, %xmm10
  39718. pxor %xmm0, %xmm0
  39719. pxor %xmm1, %xmm1
  39720. pxor %xmm2, %xmm2
  39721. pxor %xmm3, %xmm3
  39722. movdqa %xmm11, %xmm10
  39723. L_256_get_entry_64_4_start:
  39724. movdqa %xmm10, %xmm8
  39725. paddd %xmm11, %xmm10
  39726. pcmpeqd %xmm9, %xmm8
  39727. movdqa (%rsi), %xmm4
  39728. movdqa 16(%rsi), %xmm5
  39729. movdqa 32(%rsi), %xmm6
  39730. movdqa 48(%rsi), %xmm7
  39731. addq $0x40, %rsi
  39732. pand %xmm8, %xmm4
  39733. pand %xmm8, %xmm5
  39734. pand %xmm8, %xmm6
  39735. pand %xmm8, %xmm7
  39736. por %xmm4, %xmm0
  39737. por %xmm5, %xmm1
  39738. por %xmm6, %xmm2
  39739. por %xmm7, %xmm3
  39740. decq %rax
  39741. jnz L_256_get_entry_64_4_start
  39742. movdqu %xmm0, (%rdi)
  39743. movdqu %xmm1, 16(%rdi)
  39744. movdqu %xmm2, 64(%rdi)
  39745. movdqu %xmm3, 80(%rdi)
  39746. repz retq
  39747. #ifndef __APPLE__
  39748. .size sp_256_get_entry_64_4,.-sp_256_get_entry_64_4
  39749. #endif /* __APPLE__ */
  39750. /* Touch each possible entry that could be being copied.
  39751. *
  39752. * r Point to copy into.
  39753. * table Table - start of the entires to access
  39754. * idx Index of entry to retrieve.
  39755. */
  39756. #ifndef __APPLE__
  39757. .text
  39758. .globl sp_256_get_entry_64_avx2_4
  39759. .type sp_256_get_entry_64_avx2_4,@function
  39760. .align 16
  39761. sp_256_get_entry_64_avx2_4:
  39762. #else
  39763. .section __TEXT,__text
  39764. .globl _sp_256_get_entry_64_avx2_4
  39765. .p2align 4
  39766. _sp_256_get_entry_64_avx2_4:
  39767. #endif /* __APPLE__ */
  39768. movq $0x01, %rax
  39769. movd %edx, %xmm5
  39770. addq $0x40, %rsi
  39771. movd %eax, %xmm7
  39772. movq $0x40, %rax
  39773. vpxor %ymm6, %ymm6, %ymm6
  39774. vpermd %ymm5, %ymm6, %ymm5
  39775. vpermd %ymm7, %ymm6, %ymm7
  39776. vpxor %ymm0, %ymm0, %ymm0
  39777. vpxor %ymm1, %ymm1, %ymm1
  39778. vmovdqa %ymm7, %ymm6
  39779. L_256_get_entry_64_avx2_4_start:
  39780. vpcmpeqd %ymm5, %ymm6, %ymm4
  39781. vpaddd %ymm7, %ymm6, %ymm6
  39782. vmovdqu (%rsi), %ymm2
  39783. vmovdqu 32(%rsi), %ymm3
  39784. addq $0x40, %rsi
  39785. vpand %ymm4, %ymm2, %ymm2
  39786. vpand %ymm4, %ymm3, %ymm3
  39787. vpor %ymm2, %ymm0, %ymm0
  39788. vpor %ymm3, %ymm1, %ymm1
  39789. decq %rax
  39790. jnz L_256_get_entry_64_avx2_4_start
  39791. vmovdqu %ymm0, (%rdi)
  39792. vmovdqu %ymm1, 64(%rdi)
  39793. repz retq
  39794. #ifndef __APPLE__
  39795. .size sp_256_get_entry_64_avx2_4,.-sp_256_get_entry_64_avx2_4
  39796. #endif /* __APPLE__ */
  39797. #endif /* !WC_NO_CACHE_RESISTANT */
  39798. #ifndef WC_NO_CACHE_RESISTANT
  39799. /* Touch each possible entry that could be being copied.
  39800. *
  39801. * r Point to copy into.
  39802. * table Table - start of the entires to access
  39803. * idx Index of entry to retrieve.
  39804. */
  39805. #ifndef __APPLE__
  39806. .text
  39807. .globl sp_256_get_entry_65_4
  39808. .type sp_256_get_entry_65_4,@function
  39809. .align 16
  39810. sp_256_get_entry_65_4:
  39811. #else
  39812. .section __TEXT,__text
  39813. .globl _sp_256_get_entry_65_4
  39814. .p2align 4
  39815. _sp_256_get_entry_65_4:
  39816. #endif /* __APPLE__ */
  39817. movq $0x01, %rax
  39818. movd %edx, %xmm9
  39819. addq $0x40, %rsi
  39820. movd %eax, %xmm11
  39821. movq $0x40, %rax
  39822. pshufd $0x00, %xmm11, %xmm11
  39823. pshufd $0x00, %xmm9, %xmm9
  39824. pxor %xmm10, %xmm10
  39825. pxor %xmm0, %xmm0
  39826. pxor %xmm1, %xmm1
  39827. pxor %xmm2, %xmm2
  39828. pxor %xmm3, %xmm3
  39829. movdqa %xmm11, %xmm10
  39830. L_256_get_entry_65_4_start:
  39831. movdqa %xmm10, %xmm8
  39832. paddd %xmm11, %xmm10
  39833. pcmpeqd %xmm9, %xmm8
  39834. movdqa (%rsi), %xmm4
  39835. movdqa 16(%rsi), %xmm5
  39836. movdqa 32(%rsi), %xmm6
  39837. movdqa 48(%rsi), %xmm7
  39838. addq $0x40, %rsi
  39839. pand %xmm8, %xmm4
  39840. pand %xmm8, %xmm5
  39841. pand %xmm8, %xmm6
  39842. pand %xmm8, %xmm7
  39843. por %xmm4, %xmm0
  39844. por %xmm5, %xmm1
  39845. por %xmm6, %xmm2
  39846. por %xmm7, %xmm3
  39847. decq %rax
  39848. jnz L_256_get_entry_65_4_start
  39849. movdqu %xmm0, (%rdi)
  39850. movdqu %xmm1, 16(%rdi)
  39851. movdqu %xmm2, 64(%rdi)
  39852. movdqu %xmm3, 80(%rdi)
  39853. repz retq
  39854. #ifndef __APPLE__
  39855. .size sp_256_get_entry_65_4,.-sp_256_get_entry_65_4
  39856. #endif /* __APPLE__ */
  39857. /* Touch each possible entry that could be being copied.
  39858. *
  39859. * r Point to copy into.
  39860. * table Table - start of the entires to access
  39861. * idx Index of entry to retrieve.
  39862. */
  39863. #ifndef __APPLE__
  39864. .text
  39865. .globl sp_256_get_entry_65_avx2_4
  39866. .type sp_256_get_entry_65_avx2_4,@function
  39867. .align 16
  39868. sp_256_get_entry_65_avx2_4:
  39869. #else
  39870. .section __TEXT,__text
  39871. .globl _sp_256_get_entry_65_avx2_4
  39872. .p2align 4
  39873. _sp_256_get_entry_65_avx2_4:
  39874. #endif /* __APPLE__ */
  39875. movq $0x01, %rax
  39876. movd %edx, %xmm5
  39877. addq $0x40, %rsi
  39878. movd %eax, %xmm7
  39879. movq $0x41, %rax
  39880. vpxor %ymm6, %ymm6, %ymm6
  39881. vpermd %ymm5, %ymm6, %ymm5
  39882. vpermd %ymm7, %ymm6, %ymm7
  39883. vpxor %ymm0, %ymm0, %ymm0
  39884. vpxor %ymm1, %ymm1, %ymm1
  39885. vmovdqa %ymm7, %ymm6
  39886. L_256_get_entry_65_avx2_4_start:
  39887. vpcmpeqd %ymm5, %ymm6, %ymm4
  39888. vpaddd %ymm7, %ymm6, %ymm6
  39889. vmovdqu (%rsi), %ymm2
  39890. vmovdqu 32(%rsi), %ymm3
  39891. addq $0x40, %rsi
  39892. vpand %ymm4, %ymm2, %ymm2
  39893. vpand %ymm4, %ymm3, %ymm3
  39894. vpor %ymm2, %ymm0, %ymm0
  39895. vpor %ymm3, %ymm1, %ymm1
  39896. decq %rax
  39897. jnz L_256_get_entry_65_avx2_4_start
  39898. vmovdqu %ymm0, (%rdi)
  39899. vmovdqu %ymm1, 64(%rdi)
  39900. repz retq
  39901. #ifndef __APPLE__
  39902. .size sp_256_get_entry_65_avx2_4,.-sp_256_get_entry_65_avx2_4
  39903. #endif /* __APPLE__ */
  39904. #endif /* !WC_NO_CACHE_RESISTANT */
  39905. /* Add 1 to a. (a = a + 1)
  39906. *
  39907. * a A single precision integer.
  39908. */
  39909. #ifndef __APPLE__
  39910. .text
  39911. .globl sp_256_add_one_4
  39912. .type sp_256_add_one_4,@function
  39913. .align 16
  39914. sp_256_add_one_4:
  39915. #else
  39916. .section __TEXT,__text
  39917. .globl _sp_256_add_one_4
  39918. .p2align 4
  39919. _sp_256_add_one_4:
  39920. #endif /* __APPLE__ */
  39921. addq $0x01, (%rdi)
  39922. adcq $0x00, 8(%rdi)
  39923. adcq $0x00, 16(%rdi)
  39924. adcq $0x00, 24(%rdi)
  39925. repz retq
  39926. #ifndef __APPLE__
  39927. .size sp_256_add_one_4,.-sp_256_add_one_4
  39928. #endif /* __APPLE__ */
  39929. /* Read big endian unsigned byte array into r.
  39930. * Uses the bswap instruction.
  39931. *
  39932. * r A single precision integer.
  39933. * size Maximum number of bytes to convert
  39934. * a Byte array.
  39935. * n Number of bytes in array to read.
  39936. */
  39937. #ifndef __APPLE__
  39938. .text
  39939. .globl sp_256_from_bin_bswap
  39940. .type sp_256_from_bin_bswap,@function
  39941. .align 16
  39942. sp_256_from_bin_bswap:
  39943. #else
  39944. .section __TEXT,__text
  39945. .globl _sp_256_from_bin_bswap
  39946. .p2align 4
  39947. _sp_256_from_bin_bswap:
  39948. #endif /* __APPLE__ */
  39949. movq %rdx, %r9
  39950. movq %rdi, %r10
  39951. addq %rcx, %r9
  39952. addq $32, %r10
  39953. xorq %r11, %r11
  39954. jmp L_256_from_bin_bswap_64_end
  39955. L_256_from_bin_bswap_64_start:
  39956. subq $0x40, %r9
  39957. movq 56(%r9), %rax
  39958. movq 48(%r9), %r8
  39959. bswapq %rax
  39960. bswapq %r8
  39961. movq %rax, (%rdi)
  39962. movq %r8, 8(%rdi)
  39963. movq 40(%r9), %rax
  39964. movq 32(%r9), %r8
  39965. bswapq %rax
  39966. bswapq %r8
  39967. movq %rax, 16(%rdi)
  39968. movq %r8, 24(%rdi)
  39969. movq 24(%r9), %rax
  39970. movq 16(%r9), %r8
  39971. bswapq %rax
  39972. bswapq %r8
  39973. movq %rax, 32(%rdi)
  39974. movq %r8, 40(%rdi)
  39975. movq 8(%r9), %rax
  39976. movq (%r9), %r8
  39977. bswapq %rax
  39978. bswapq %r8
  39979. movq %rax, 48(%rdi)
  39980. movq %r8, 56(%rdi)
  39981. addq $0x40, %rdi
  39982. subq $0x40, %rcx
  39983. L_256_from_bin_bswap_64_end:
  39984. cmpq $63, %rcx
  39985. jg L_256_from_bin_bswap_64_start
  39986. jmp L_256_from_bin_bswap_8_end
  39987. L_256_from_bin_bswap_8_start:
  39988. subq $8, %r9
  39989. movq (%r9), %rax
  39990. bswapq %rax
  39991. movq %rax, (%rdi)
  39992. addq $8, %rdi
  39993. subq $8, %rcx
  39994. L_256_from_bin_bswap_8_end:
  39995. cmpq $7, %rcx
  39996. jg L_256_from_bin_bswap_8_start
  39997. cmpq %r11, %rcx
  39998. je L_256_from_bin_bswap_hi_end
  39999. movq %r11, %r8
  40000. movq %r11, %rax
  40001. L_256_from_bin_bswap_hi_start:
  40002. movb (%rdx), %al
  40003. shlq $8, %r8
  40004. incq %rdx
  40005. addq %rax, %r8
  40006. decq %rcx
  40007. jg L_256_from_bin_bswap_hi_start
  40008. movq %r8, (%rdi)
  40009. addq $8, %rdi
  40010. L_256_from_bin_bswap_hi_end:
  40011. cmpq %r10, %rdi
  40012. je L_256_from_bin_bswap_zero_end
  40013. L_256_from_bin_bswap_zero_start:
  40014. movq %r11, (%rdi)
  40015. addq $8, %rdi
  40016. cmpq %r10, %rdi
  40017. jl L_256_from_bin_bswap_zero_start
  40018. L_256_from_bin_bswap_zero_end:
  40019. repz retq
  40020. #ifndef __APPLE__
  40021. .size sp_256_from_bin_bswap,.-sp_256_from_bin_bswap
  40022. #endif /* __APPLE__ */
  40023. /* Read big endian unsigned byte array into r.
  40024. * Uses the movbe instruction which is an optional instruction.
  40025. *
  40026. * r A single precision integer.
  40027. * size Maximum number of bytes to convert
  40028. * a Byte array.
  40029. * n Number of bytes in array to read.
  40030. */
  40031. #ifndef __APPLE__
  40032. .text
  40033. .globl sp_256_from_bin_movbe
  40034. .type sp_256_from_bin_movbe,@function
  40035. .align 16
  40036. sp_256_from_bin_movbe:
  40037. #else
  40038. .section __TEXT,__text
  40039. .globl _sp_256_from_bin_movbe
  40040. .p2align 4
  40041. _sp_256_from_bin_movbe:
  40042. #endif /* __APPLE__ */
  40043. movq %rdx, %r9
  40044. movq %rdi, %r10
  40045. addq %rcx, %r9
  40046. addq $32, %r10
  40047. xorq %r11, %r11
  40048. jmp L_256_from_bin_movbe_64_end
  40049. L_256_from_bin_movbe_64_start:
  40050. subq $0x40, %r9
  40051. movbeq 56(%r9), %rax
  40052. movbeq 48(%r9), %r8
  40053. movq %rax, (%rdi)
  40054. movq %r8, 8(%rdi)
  40055. movbeq 40(%r9), %rax
  40056. movbeq 32(%r9), %r8
  40057. movq %rax, 16(%rdi)
  40058. movq %r8, 24(%rdi)
  40059. movbeq 24(%r9), %rax
  40060. movbeq 16(%r9), %r8
  40061. movq %rax, 32(%rdi)
  40062. movq %r8, 40(%rdi)
  40063. movbeq 8(%r9), %rax
  40064. movbeq (%r9), %r8
  40065. movq %rax, 48(%rdi)
  40066. movq %r8, 56(%rdi)
  40067. addq $0x40, %rdi
  40068. subq $0x40, %rcx
  40069. L_256_from_bin_movbe_64_end:
  40070. cmpq $63, %rcx
  40071. jg L_256_from_bin_movbe_64_start
  40072. jmp L_256_from_bin_movbe_8_end
  40073. L_256_from_bin_movbe_8_start:
  40074. subq $8, %r9
  40075. movbeq (%r9), %rax
  40076. movq %rax, (%rdi)
  40077. addq $8, %rdi
  40078. subq $8, %rcx
  40079. L_256_from_bin_movbe_8_end:
  40080. cmpq $7, %rcx
  40081. jg L_256_from_bin_movbe_8_start
  40082. cmpq %r11, %rcx
  40083. je L_256_from_bin_movbe_hi_end
  40084. movq %r11, %r8
  40085. movq %r11, %rax
  40086. L_256_from_bin_movbe_hi_start:
  40087. movb (%rdx), %al
  40088. shlq $8, %r8
  40089. incq %rdx
  40090. addq %rax, %r8
  40091. decq %rcx
  40092. jg L_256_from_bin_movbe_hi_start
  40093. movq %r8, (%rdi)
  40094. addq $8, %rdi
  40095. L_256_from_bin_movbe_hi_end:
  40096. cmpq %r10, %rdi
  40097. je L_256_from_bin_movbe_zero_end
  40098. L_256_from_bin_movbe_zero_start:
  40099. movq %r11, (%rdi)
  40100. addq $8, %rdi
  40101. cmpq %r10, %rdi
  40102. jl L_256_from_bin_movbe_zero_start
  40103. L_256_from_bin_movbe_zero_end:
  40104. repz retq
  40105. #ifndef __APPLE__
  40106. .size sp_256_from_bin_movbe,.-sp_256_from_bin_movbe
  40107. #endif /* __APPLE__ */
  40108. /* Write r as big endian to byte array.
  40109. * Fixed length number of bytes written: 32
  40110. * Uses the bswap instruction.
  40111. *
  40112. * r A single precision integer.
  40113. * a Byte array.
  40114. */
  40115. #ifndef __APPLE__
  40116. .text
  40117. .globl sp_256_to_bin_bswap
  40118. .type sp_256_to_bin_bswap,@function
  40119. .align 16
  40120. sp_256_to_bin_bswap:
  40121. #else
  40122. .section __TEXT,__text
  40123. .globl _sp_256_to_bin_bswap
  40124. .p2align 4
  40125. _sp_256_to_bin_bswap:
  40126. #endif /* __APPLE__ */
  40127. movq 24(%rdi), %rdx
  40128. movq 16(%rdi), %rax
  40129. bswapq %rdx
  40130. bswapq %rax
  40131. movq %rdx, (%rsi)
  40132. movq %rax, 8(%rsi)
  40133. movq 8(%rdi), %rdx
  40134. movq (%rdi), %rax
  40135. bswapq %rdx
  40136. bswapq %rax
  40137. movq %rdx, 16(%rsi)
  40138. movq %rax, 24(%rsi)
  40139. repz retq
  40140. #ifndef __APPLE__
  40141. .size sp_256_to_bin_bswap,.-sp_256_to_bin_bswap
  40142. #endif /* __APPLE__ */
  40143. /* Write r as big endian to byte array.
  40144. * Fixed length number of bytes written: 32
  40145. * Uses the movbe instruction which is optional.
  40146. *
  40147. * r A single precision integer.
  40148. * a Byte array.
  40149. */
  40150. #ifndef __APPLE__
  40151. .text
  40152. .globl sp_256_to_bin_movbe
  40153. .type sp_256_to_bin_movbe,@function
  40154. .align 16
  40155. sp_256_to_bin_movbe:
  40156. #else
  40157. .section __TEXT,__text
  40158. .globl _sp_256_to_bin_movbe
  40159. .p2align 4
  40160. _sp_256_to_bin_movbe:
  40161. #endif /* __APPLE__ */
  40162. movbeq 24(%rdi), %rdx
  40163. movbeq 16(%rdi), %rax
  40164. movq %rdx, (%rsi)
  40165. movq %rax, 8(%rsi)
  40166. movbeq 8(%rdi), %rdx
  40167. movbeq (%rdi), %rax
  40168. movq %rdx, 16(%rsi)
  40169. movq %rax, 24(%rsi)
  40170. repz retq
  40171. #ifndef __APPLE__
  40172. .size sp_256_to_bin_movbe,.-sp_256_to_bin_movbe
  40173. #endif /* __APPLE__ */
  40174. /* Add b to a into r. (r = a + b)
  40175. *
  40176. * r A single precision integer.
  40177. * a A single precision integer.
  40178. * b A single precision integer.
  40179. */
  40180. #ifndef __APPLE__
  40181. .text
  40182. .globl sp_256_add_4
  40183. .type sp_256_add_4,@function
  40184. .align 16
  40185. sp_256_add_4:
  40186. #else
  40187. .section __TEXT,__text
  40188. .globl _sp_256_add_4
  40189. .p2align 4
  40190. _sp_256_add_4:
  40191. #endif /* __APPLE__ */
  40192. # Add
  40193. movq (%rsi), %rcx
  40194. xorq %rax, %rax
  40195. addq (%rdx), %rcx
  40196. movq 8(%rsi), %r8
  40197. movq %rcx, (%rdi)
  40198. adcq 8(%rdx), %r8
  40199. movq 16(%rsi), %rcx
  40200. movq %r8, 8(%rdi)
  40201. adcq 16(%rdx), %rcx
  40202. movq 24(%rsi), %r8
  40203. movq %rcx, 16(%rdi)
  40204. adcq 24(%rdx), %r8
  40205. movq %r8, 24(%rdi)
  40206. adcq $0x00, %rax
  40207. repz retq
  40208. #ifndef __APPLE__
  40209. .size sp_256_add_4,.-sp_256_add_4
  40210. #endif /* __APPLE__ */
  40211. /* Multiply a and b into r. (r = a * b)
  40212. *
  40213. * r A single precision integer.
  40214. * a A single precision integer.
  40215. * b A single precision integer.
  40216. */
  40217. #ifndef __APPLE__
  40218. .text
  40219. .globl sp_256_mul_4
  40220. .type sp_256_mul_4,@function
  40221. .align 16
  40222. sp_256_mul_4:
  40223. #else
  40224. .section __TEXT,__text
  40225. .globl _sp_256_mul_4
  40226. .p2align 4
  40227. _sp_256_mul_4:
  40228. #endif /* __APPLE__ */
  40229. movq %rdx, %rcx
  40230. subq $32, %rsp
  40231. # A[0] * B[0]
  40232. movq (%rcx), %rax
  40233. mulq (%rsi)
  40234. xorq %r10, %r10
  40235. movq %rax, (%rsp)
  40236. movq %rdx, %r9
  40237. # A[0] * B[1]
  40238. movq 8(%rcx), %rax
  40239. mulq (%rsi)
  40240. xorq %r8, %r8
  40241. addq %rax, %r9
  40242. adcq %rdx, %r10
  40243. adcq $0x00, %r8
  40244. # A[1] * B[0]
  40245. movq (%rcx), %rax
  40246. mulq 8(%rsi)
  40247. addq %rax, %r9
  40248. adcq %rdx, %r10
  40249. adcq $0x00, %r8
  40250. movq %r9, 8(%rsp)
  40251. # A[0] * B[2]
  40252. movq 16(%rcx), %rax
  40253. mulq (%rsi)
  40254. xorq %r9, %r9
  40255. addq %rax, %r10
  40256. adcq %rdx, %r8
  40257. adcq $0x00, %r9
  40258. # A[1] * B[1]
  40259. movq 8(%rcx), %rax
  40260. mulq 8(%rsi)
  40261. addq %rax, %r10
  40262. adcq %rdx, %r8
  40263. adcq $0x00, %r9
  40264. # A[2] * B[0]
  40265. movq (%rcx), %rax
  40266. mulq 16(%rsi)
  40267. addq %rax, %r10
  40268. adcq %rdx, %r8
  40269. adcq $0x00, %r9
  40270. movq %r10, 16(%rsp)
  40271. # A[0] * B[3]
  40272. movq 24(%rcx), %rax
  40273. mulq (%rsi)
  40274. xorq %r10, %r10
  40275. addq %rax, %r8
  40276. adcq %rdx, %r9
  40277. adcq $0x00, %r10
  40278. # A[1] * B[2]
  40279. movq 16(%rcx), %rax
  40280. mulq 8(%rsi)
  40281. addq %rax, %r8
  40282. adcq %rdx, %r9
  40283. adcq $0x00, %r10
  40284. # A[2] * B[1]
  40285. movq 8(%rcx), %rax
  40286. mulq 16(%rsi)
  40287. addq %rax, %r8
  40288. adcq %rdx, %r9
  40289. adcq $0x00, %r10
  40290. # A[3] * B[0]
  40291. movq (%rcx), %rax
  40292. mulq 24(%rsi)
  40293. addq %rax, %r8
  40294. adcq %rdx, %r9
  40295. adcq $0x00, %r10
  40296. movq %r8, 24(%rsp)
  40297. # A[1] * B[3]
  40298. movq 24(%rcx), %rax
  40299. mulq 8(%rsi)
  40300. xorq %r8, %r8
  40301. addq %rax, %r9
  40302. adcq %rdx, %r10
  40303. adcq $0x00, %r8
  40304. # A[2] * B[2]
  40305. movq 16(%rcx), %rax
  40306. mulq 16(%rsi)
  40307. addq %rax, %r9
  40308. adcq %rdx, %r10
  40309. adcq $0x00, %r8
  40310. # A[3] * B[1]
  40311. movq 8(%rcx), %rax
  40312. mulq 24(%rsi)
  40313. addq %rax, %r9
  40314. adcq %rdx, %r10
  40315. adcq $0x00, %r8
  40316. movq %r9, 32(%rdi)
  40317. # A[2] * B[3]
  40318. movq 24(%rcx), %rax
  40319. mulq 16(%rsi)
  40320. xorq %r9, %r9
  40321. addq %rax, %r10
  40322. adcq %rdx, %r8
  40323. adcq $0x00, %r9
  40324. # A[3] * B[2]
  40325. movq 16(%rcx), %rax
  40326. mulq 24(%rsi)
  40327. addq %rax, %r10
  40328. adcq %rdx, %r8
  40329. adcq $0x00, %r9
  40330. movq %r10, 40(%rdi)
  40331. # A[3] * B[3]
  40332. movq 24(%rcx), %rax
  40333. mulq 24(%rsi)
  40334. addq %rax, %r8
  40335. adcq %rdx, %r9
  40336. movq %r8, 48(%rdi)
  40337. movq %r9, 56(%rdi)
  40338. movq (%rsp), %rax
  40339. movq 8(%rsp), %rdx
  40340. movq 16(%rsp), %r8
  40341. movq 24(%rsp), %r9
  40342. movq %rax, (%rdi)
  40343. movq %rdx, 8(%rdi)
  40344. movq %r8, 16(%rdi)
  40345. movq %r9, 24(%rdi)
  40346. addq $32, %rsp
  40347. repz retq
  40348. #ifndef __APPLE__
  40349. .size sp_256_mul_4,.-sp_256_mul_4
  40350. #endif /* __APPLE__ */
  40351. /* Multiply a and b into r. (r = a * b)
  40352. *
  40353. * r Result of multiplication.
  40354. * a First number to multiply.
  40355. * b Second number to multiply.
  40356. */
  40357. #ifndef __APPLE__
  40358. .text
  40359. .globl sp_256_mul_avx2_4
  40360. .type sp_256_mul_avx2_4,@function
  40361. .align 16
  40362. sp_256_mul_avx2_4:
  40363. #else
  40364. .section __TEXT,__text
  40365. .globl _sp_256_mul_avx2_4
  40366. .p2align 4
  40367. _sp_256_mul_avx2_4:
  40368. #endif /* __APPLE__ */
  40369. pushq %rbx
  40370. pushq %rbp
  40371. pushq %r12
  40372. pushq %r13
  40373. pushq %r14
  40374. pushq %r15
  40375. movq %rdx, %rbp
  40376. # A[0] * B[0]
  40377. movq (%rbp), %rdx
  40378. mulxq (%rsi), %r8, %r9
  40379. # A[2] * B[0]
  40380. mulxq 16(%rsi), %r10, %r11
  40381. # A[1] * B[0]
  40382. mulxq 8(%rsi), %rax, %rcx
  40383. xorq %r15, %r15
  40384. adcxq %rax, %r9
  40385. # A[1] * B[3]
  40386. movq 24(%rbp), %rdx
  40387. mulxq 8(%rsi), %r12, %r13
  40388. adcxq %rcx, %r10
  40389. # A[0] * B[1]
  40390. movq 8(%rbp), %rdx
  40391. mulxq (%rsi), %rax, %rcx
  40392. adoxq %rax, %r9
  40393. # A[2] * B[1]
  40394. mulxq 16(%rsi), %rax, %r14
  40395. adoxq %rcx, %r10
  40396. adcxq %rax, %r11
  40397. # A[1] * B[2]
  40398. movq 16(%rbp), %rdx
  40399. mulxq 8(%rsi), %rax, %rcx
  40400. adcxq %r14, %r12
  40401. adoxq %rax, %r11
  40402. adcxq %r15, %r13
  40403. adoxq %rcx, %r12
  40404. # A[0] * B[2]
  40405. mulxq (%rsi), %rax, %rcx
  40406. adoxq %r15, %r13
  40407. xorq %r14, %r14
  40408. adcxq %rax, %r10
  40409. # A[1] * B[1]
  40410. movq 8(%rbp), %rdx
  40411. mulxq 8(%rsi), %rdx, %rax
  40412. adcxq %rcx, %r11
  40413. adoxq %rdx, %r10
  40414. # A[3] * B[1]
  40415. movq 8(%rbp), %rdx
  40416. adoxq %rax, %r11
  40417. mulxq 24(%rsi), %rax, %rcx
  40418. adcxq %rax, %r12
  40419. # A[2] * B[2]
  40420. movq 16(%rbp), %rdx
  40421. mulxq 16(%rsi), %rdx, %rax
  40422. adcxq %rcx, %r13
  40423. adoxq %rdx, %r12
  40424. # A[3] * B[3]
  40425. movq 24(%rbp), %rdx
  40426. adoxq %rax, %r13
  40427. mulxq 24(%rsi), %rax, %rcx
  40428. adoxq %r15, %r14
  40429. adcxq %rax, %r14
  40430. # A[0] * B[3]
  40431. mulxq (%rsi), %rdx, %rax
  40432. adcxq %rcx, %r15
  40433. xorq %rcx, %rcx
  40434. adcxq %rdx, %r11
  40435. # A[3] * B[0]
  40436. movq 24(%rsi), %rdx
  40437. adcxq %rax, %r12
  40438. mulxq (%rbp), %rbx, %rax
  40439. adoxq %rbx, %r11
  40440. adoxq %rax, %r12
  40441. # A[3] * B[2]
  40442. mulxq 16(%rbp), %rdx, %rax
  40443. adcxq %rdx, %r13
  40444. # A[2] * B[3]
  40445. movq 24(%rbp), %rdx
  40446. adcxq %rax, %r14
  40447. mulxq 16(%rsi), %rax, %rdx
  40448. adcxq %rcx, %r15
  40449. adoxq %rax, %r13
  40450. adoxq %rdx, %r14
  40451. adoxq %rcx, %r15
  40452. movq %r8, (%rdi)
  40453. movq %r9, 8(%rdi)
  40454. movq %r10, 16(%rdi)
  40455. movq %r11, 24(%rdi)
  40456. movq %r12, 32(%rdi)
  40457. movq %r13, 40(%rdi)
  40458. movq %r14, 48(%rdi)
  40459. movq %r15, 56(%rdi)
  40460. popq %r15
  40461. popq %r14
  40462. popq %r13
  40463. popq %r12
  40464. popq %rbp
  40465. popq %rbx
  40466. repz retq
  40467. #ifndef __APPLE__
  40468. .size sp_256_mul_avx2_4,.-sp_256_mul_avx2_4
  40469. #endif /* __APPLE__ */
  40470. /* Sub b from a into a. (a -= b)
  40471. *
  40472. * a A single precision integer and result.
  40473. * b A single precision integer.
  40474. */
  40475. #ifndef __APPLE__
  40476. .text
  40477. .globl sp_256_sub_in_place_4
  40478. .type sp_256_sub_in_place_4,@function
  40479. .align 16
  40480. sp_256_sub_in_place_4:
  40481. #else
  40482. .section __TEXT,__text
  40483. .globl _sp_256_sub_in_place_4
  40484. .p2align 4
  40485. _sp_256_sub_in_place_4:
  40486. #endif /* __APPLE__ */
  40487. xorq %rax, %rax
  40488. movq (%rsi), %rdx
  40489. movq 8(%rsi), %rcx
  40490. movq 16(%rsi), %r8
  40491. movq 24(%rsi), %r9
  40492. subq %rdx, (%rdi)
  40493. sbbq %rcx, 8(%rdi)
  40494. sbbq %r8, 16(%rdi)
  40495. sbbq %r9, 24(%rdi)
  40496. sbbq $0x00, %rax
  40497. repz retq
  40498. #ifndef __APPLE__
  40499. .size sp_256_sub_in_place_4,.-sp_256_sub_in_place_4
  40500. #endif /* __APPLE__ */
  40501. /* Conditionally subtract b from a using the mask m.
  40502. * m is -1 to subtract and 0 when not copying.
  40503. *
  40504. * r A single precision number representing condition subtract result.
  40505. * a A single precision number to subtract from.
  40506. * b A single precision number to subtract.
  40507. * m Mask value to apply.
  40508. */
  40509. #ifndef __APPLE__
  40510. .text
  40511. .globl sp_256_cond_sub_avx2_4
  40512. .type sp_256_cond_sub_avx2_4,@function
  40513. .align 16
  40514. sp_256_cond_sub_avx2_4:
  40515. #else
  40516. .section __TEXT,__text
  40517. .globl _sp_256_cond_sub_avx2_4
  40518. .p2align 4
  40519. _sp_256_cond_sub_avx2_4:
  40520. #endif /* __APPLE__ */
  40521. pushq %r12
  40522. pushq %r13
  40523. pushq %r14
  40524. pushq %r15
  40525. movq $0x00, %rax
  40526. movq (%rdx), %r12
  40527. movq 8(%rdx), %r13
  40528. movq 16(%rdx), %r14
  40529. movq 24(%rdx), %r15
  40530. andq %rcx, %r12
  40531. andq %rcx, %r13
  40532. andq %rcx, %r14
  40533. andq %rcx, %r15
  40534. movq (%rsi), %r8
  40535. movq 8(%rsi), %r9
  40536. movq 16(%rsi), %r10
  40537. movq 24(%rsi), %r11
  40538. subq %r12, %r8
  40539. sbbq %r13, %r9
  40540. sbbq %r14, %r10
  40541. sbbq %r15, %r11
  40542. movq %r8, (%rdi)
  40543. movq %r9, 8(%rdi)
  40544. movq %r10, 16(%rdi)
  40545. movq %r11, 24(%rdi)
  40546. sbbq $0x00, %rax
  40547. popq %r15
  40548. popq %r14
  40549. popq %r13
  40550. popq %r12
  40551. repz retq
  40552. #ifndef __APPLE__
  40553. .size sp_256_cond_sub_avx2_4,.-sp_256_cond_sub_avx2_4
  40554. #endif /* __APPLE__ */
  40555. /* Mul a by digit b into r. (r = a * b)
  40556. *
  40557. * r A single precision integer.
  40558. * a A single precision integer.
  40559. * b A single precision digit.
  40560. */
  40561. #ifndef __APPLE__
  40562. .text
  40563. .globl sp_256_mul_d_4
  40564. .type sp_256_mul_d_4,@function
  40565. .align 16
  40566. sp_256_mul_d_4:
  40567. #else
  40568. .section __TEXT,__text
  40569. .globl _sp_256_mul_d_4
  40570. .p2align 4
  40571. _sp_256_mul_d_4:
  40572. #endif /* __APPLE__ */
  40573. movq %rdx, %rcx
  40574. # A[0] * B
  40575. movq %rcx, %rax
  40576. xorq %r10, %r10
  40577. mulq (%rsi)
  40578. movq %rax, %r8
  40579. movq %rdx, %r9
  40580. movq %r8, (%rdi)
  40581. # A[1] * B
  40582. movq %rcx, %rax
  40583. xorq %r8, %r8
  40584. mulq 8(%rsi)
  40585. addq %rax, %r9
  40586. movq %r9, 8(%rdi)
  40587. adcq %rdx, %r10
  40588. adcq $0x00, %r8
  40589. # A[2] * B
  40590. movq %rcx, %rax
  40591. xorq %r9, %r9
  40592. mulq 16(%rsi)
  40593. addq %rax, %r10
  40594. movq %r10, 16(%rdi)
  40595. adcq %rdx, %r8
  40596. adcq $0x00, %r9
  40597. # A[3] * B
  40598. movq %rcx, %rax
  40599. mulq 24(%rsi)
  40600. addq %rax, %r8
  40601. adcq %rdx, %r9
  40602. movq %r8, 24(%rdi)
  40603. movq %r9, 32(%rdi)
  40604. repz retq
  40605. #ifndef __APPLE__
  40606. .size sp_256_mul_d_4,.-sp_256_mul_d_4
  40607. #endif /* __APPLE__ */
  40608. #ifdef HAVE_INTEL_AVX2
  40609. /* Mul a by digit b into r. (r = a * b)
  40610. *
  40611. * r A single precision integer.
  40612. * a A single precision integer.
  40613. * b A single precision digit.
  40614. */
  40615. #ifndef __APPLE__
  40616. .text
  40617. .globl sp_256_mul_d_avx2_4
  40618. .type sp_256_mul_d_avx2_4,@function
  40619. .align 16
  40620. sp_256_mul_d_avx2_4:
  40621. #else
  40622. .section __TEXT,__text
  40623. .globl _sp_256_mul_d_avx2_4
  40624. .p2align 4
  40625. _sp_256_mul_d_avx2_4:
  40626. #endif /* __APPLE__ */
  40627. movq %rdx, %rax
  40628. # A[0] * B
  40629. movq %rax, %rdx
  40630. xorq %r11, %r11
  40631. mulxq (%rsi), %r9, %r10
  40632. movq %r9, (%rdi)
  40633. # A[1] * B
  40634. mulxq 8(%rsi), %rcx, %r8
  40635. movq %r11, %r9
  40636. adcxq %rcx, %r10
  40637. movq %r10, 8(%rdi)
  40638. adoxq %r8, %r9
  40639. # A[2] * B
  40640. mulxq 16(%rsi), %rcx, %r8
  40641. movq %r11, %r10
  40642. adcxq %rcx, %r9
  40643. movq %r9, 16(%rdi)
  40644. adoxq %r8, %r10
  40645. # A[3] * B
  40646. mulxq 24(%rsi), %rcx, %r8
  40647. movq %r11, %r9
  40648. adcxq %rcx, %r10
  40649. adoxq %r8, %r9
  40650. adcxq %r11, %r9
  40651. movq %r10, 24(%rdi)
  40652. movq %r9, 32(%rdi)
  40653. repz retq
  40654. #ifndef __APPLE__
  40655. .size sp_256_mul_d_avx2_4,.-sp_256_mul_d_avx2_4
  40656. #endif /* __APPLE__ */
  40657. #endif /* HAVE_INTEL_AVX2 */
  40658. /* Square a and put result in r. (r = a * a)
  40659. *
  40660. * r A single precision integer.
  40661. * a A single precision integer.
  40662. */
  40663. #ifndef __APPLE__
  40664. .text
  40665. .globl sp_256_sqr_4
  40666. .type sp_256_sqr_4,@function
  40667. .align 16
  40668. sp_256_sqr_4:
  40669. #else
  40670. .section __TEXT,__text
  40671. .globl _sp_256_sqr_4
  40672. .p2align 4
  40673. _sp_256_sqr_4:
  40674. #endif /* __APPLE__ */
  40675. pushq %r12
  40676. subq $32, %rsp
  40677. # A[0] * A[0]
  40678. movq (%rsi), %rax
  40679. mulq %rax
  40680. xorq %r9, %r9
  40681. movq %rax, (%rsp)
  40682. movq %rdx, %r8
  40683. # A[0] * A[1]
  40684. movq 8(%rsi), %rax
  40685. mulq (%rsi)
  40686. xorq %rcx, %rcx
  40687. addq %rax, %r8
  40688. adcq %rdx, %r9
  40689. adcq $0x00, %rcx
  40690. addq %rax, %r8
  40691. adcq %rdx, %r9
  40692. adcq $0x00, %rcx
  40693. movq %r8, 8(%rsp)
  40694. # A[0] * A[2]
  40695. movq 16(%rsi), %rax
  40696. mulq (%rsi)
  40697. xorq %r8, %r8
  40698. addq %rax, %r9
  40699. adcq %rdx, %rcx
  40700. adcq $0x00, %r8
  40701. addq %rax, %r9
  40702. adcq %rdx, %rcx
  40703. adcq $0x00, %r8
  40704. # A[1] * A[1]
  40705. movq 8(%rsi), %rax
  40706. mulq %rax
  40707. addq %rax, %r9
  40708. adcq %rdx, %rcx
  40709. adcq $0x00, %r8
  40710. movq %r9, 16(%rsp)
  40711. # A[0] * A[3]
  40712. movq 24(%rsi), %rax
  40713. mulq (%rsi)
  40714. xorq %r9, %r9
  40715. addq %rax, %rcx
  40716. adcq %rdx, %r8
  40717. adcq $0x00, %r9
  40718. addq %rax, %rcx
  40719. adcq %rdx, %r8
  40720. adcq $0x00, %r9
  40721. # A[1] * A[2]
  40722. movq 16(%rsi), %rax
  40723. mulq 8(%rsi)
  40724. addq %rax, %rcx
  40725. adcq %rdx, %r8
  40726. adcq $0x00, %r9
  40727. addq %rax, %rcx
  40728. adcq %rdx, %r8
  40729. adcq $0x00, %r9
  40730. movq %rcx, 24(%rsp)
  40731. # A[1] * A[3]
  40732. movq 24(%rsi), %rax
  40733. mulq 8(%rsi)
  40734. xorq %rcx, %rcx
  40735. addq %rax, %r8
  40736. adcq %rdx, %r9
  40737. adcq $0x00, %rcx
  40738. addq %rax, %r8
  40739. adcq %rdx, %r9
  40740. adcq $0x00, %rcx
  40741. # A[2] * A[2]
  40742. movq 16(%rsi), %rax
  40743. mulq %rax
  40744. addq %rax, %r8
  40745. adcq %rdx, %r9
  40746. adcq $0x00, %rcx
  40747. movq %r8, 32(%rdi)
  40748. # A[2] * A[3]
  40749. movq 24(%rsi), %rax
  40750. mulq 16(%rsi)
  40751. xorq %r8, %r8
  40752. addq %rax, %r9
  40753. adcq %rdx, %rcx
  40754. adcq $0x00, %r8
  40755. addq %rax, %r9
  40756. adcq %rdx, %rcx
  40757. adcq $0x00, %r8
  40758. movq %r9, 40(%rdi)
  40759. # A[3] * A[3]
  40760. movq 24(%rsi), %rax
  40761. mulq %rax
  40762. addq %rax, %rcx
  40763. adcq %rdx, %r8
  40764. movq %rcx, 48(%rdi)
  40765. movq %r8, 56(%rdi)
  40766. movq (%rsp), %rax
  40767. movq 8(%rsp), %rdx
  40768. movq 16(%rsp), %r10
  40769. movq 24(%rsp), %r11
  40770. movq %rax, (%rdi)
  40771. movq %rdx, 8(%rdi)
  40772. movq %r10, 16(%rdi)
  40773. movq %r11, 24(%rdi)
  40774. addq $32, %rsp
  40775. popq %r12
  40776. repz retq
  40777. #ifndef __APPLE__
  40778. .size sp_256_sqr_4,.-sp_256_sqr_4
  40779. #endif /* __APPLE__ */
  40780. /* Multiply two Montogmery form numbers mod the modulus (prime).
  40781. * (r = a * b mod m)
  40782. *
  40783. * r Result of multiplication.
  40784. * a First number to multiply in Montogmery form.
  40785. * b Second number to multiply in Montogmery form.
  40786. */
  40787. #ifndef __APPLE__
  40788. .text
  40789. .globl sp_256_mont_mul_order_avx2_4
  40790. .type sp_256_mont_mul_order_avx2_4,@function
  40791. .align 16
  40792. sp_256_mont_mul_order_avx2_4:
  40793. #else
  40794. .section __TEXT,__text
  40795. .globl _sp_256_mont_mul_order_avx2_4
  40796. .p2align 4
  40797. _sp_256_mont_mul_order_avx2_4:
  40798. #endif /* __APPLE__ */
  40799. pushq %rbx
  40800. pushq %rbp
  40801. pushq %r12
  40802. pushq %r13
  40803. pushq %r14
  40804. pushq %r15
  40805. movq %rdx, %rbp
  40806. # A[0] * B[0]
  40807. movq (%rbp), %rdx
  40808. mulxq (%rsi), %r8, %r9
  40809. # A[2] * B[0]
  40810. mulxq 16(%rsi), %r10, %r11
  40811. # A[1] * B[0]
  40812. mulxq 8(%rsi), %rax, %rcx
  40813. xorq %r15, %r15
  40814. adcxq %rax, %r9
  40815. # A[1] * B[3]
  40816. movq 24(%rbp), %rdx
  40817. mulxq 8(%rsi), %r12, %r13
  40818. adcxq %rcx, %r10
  40819. # A[0] * B[1]
  40820. movq 8(%rbp), %rdx
  40821. mulxq (%rsi), %rax, %rcx
  40822. adoxq %rax, %r9
  40823. # A[2] * B[1]
  40824. mulxq 16(%rsi), %rax, %r14
  40825. adoxq %rcx, %r10
  40826. adcxq %rax, %r11
  40827. # A[1] * B[2]
  40828. movq 16(%rbp), %rdx
  40829. mulxq 8(%rsi), %rax, %rcx
  40830. adcxq %r14, %r12
  40831. adoxq %rax, %r11
  40832. adcxq %r15, %r13
  40833. adoxq %rcx, %r12
  40834. # A[0] * B[2]
  40835. mulxq (%rsi), %rax, %rcx
  40836. adoxq %r15, %r13
  40837. xorq %r14, %r14
  40838. adcxq %rax, %r10
  40839. # A[1] * B[1]
  40840. movq 8(%rbp), %rdx
  40841. mulxq 8(%rsi), %rdx, %rax
  40842. adcxq %rcx, %r11
  40843. adoxq %rdx, %r10
  40844. # A[3] * B[1]
  40845. movq 8(%rbp), %rdx
  40846. adoxq %rax, %r11
  40847. mulxq 24(%rsi), %rax, %rcx
  40848. adcxq %rax, %r12
  40849. # A[2] * B[2]
  40850. movq 16(%rbp), %rdx
  40851. mulxq 16(%rsi), %rdx, %rax
  40852. adcxq %rcx, %r13
  40853. adoxq %rdx, %r12
  40854. # A[3] * B[3]
  40855. movq 24(%rbp), %rdx
  40856. adoxq %rax, %r13
  40857. mulxq 24(%rsi), %rax, %rcx
  40858. adoxq %r15, %r14
  40859. adcxq %rax, %r14
  40860. # A[0] * B[3]
  40861. mulxq (%rsi), %rdx, %rax
  40862. adcxq %rcx, %r15
  40863. xorq %rcx, %rcx
  40864. adcxq %rdx, %r11
  40865. # A[3] * B[0]
  40866. movq 24(%rsi), %rdx
  40867. adcxq %rax, %r12
  40868. mulxq (%rbp), %rbx, %rax
  40869. adoxq %rbx, %r11
  40870. adoxq %rax, %r12
  40871. # A[3] * B[2]
  40872. mulxq 16(%rbp), %rdx, %rax
  40873. adcxq %rdx, %r13
  40874. # A[2] * B[3]
  40875. movq 24(%rbp), %rdx
  40876. adcxq %rax, %r14
  40877. mulxq 16(%rsi), %rax, %rdx
  40878. adcxq %rcx, %r15
  40879. adoxq %rax, %r13
  40880. adoxq %rdx, %r14
  40881. adoxq %rcx, %r15
  40882. # Start Reduction
  40883. movq $0xccd1c8aaee00bc4f, %rbx
  40884. movq %rbx, %rdx
  40885. imulq %r8, %rdx
  40886. movq $0xf3b9cac2fc632551, %rax
  40887. xorq %rbp, %rbp
  40888. mulxq %rax, %rcx, %rsi
  40889. movq $0xbce6faada7179e84, %rax
  40890. adcxq %rcx, %r8
  40891. adoxq %rsi, %r9
  40892. mulxq %rax, %rcx, %rsi
  40893. movq $0xffffffffffffffff, %rax
  40894. adcxq %rcx, %r9
  40895. adoxq %rsi, %r10
  40896. mulxq %rax, %rcx, %rsi
  40897. movq $0xffffffff00000000, %rax
  40898. adcxq %rcx, %r10
  40899. adoxq %rsi, %r11
  40900. mulxq %rax, %rcx, %rsi
  40901. adcxq %rcx, %r11
  40902. adoxq %rsi, %r12
  40903. adcxq %rbp, %r12
  40904. movq %rbp, %r8
  40905. # carry
  40906. adoxq %rbp, %r8
  40907. adcxq %rbp, %r8
  40908. movq %rbx, %rdx
  40909. imulq %r9, %rdx
  40910. movq $0xf3b9cac2fc632551, %rax
  40911. xorq %rbp, %rbp
  40912. mulxq %rax, %rcx, %rsi
  40913. movq $0xbce6faada7179e84, %rax
  40914. adcxq %rcx, %r9
  40915. adoxq %rsi, %r10
  40916. mulxq %rax, %rcx, %rsi
  40917. movq $0xffffffffffffffff, %rax
  40918. adcxq %rcx, %r10
  40919. adoxq %rsi, %r11
  40920. mulxq %rax, %rcx, %rsi
  40921. movq $0xffffffff00000000, %rax
  40922. adcxq %rcx, %r11
  40923. adoxq %rsi, %r12
  40924. mulxq %rax, %rcx, %rsi
  40925. adcxq %rcx, %r12
  40926. adoxq %rsi, %r13
  40927. adcxq %r8, %r13
  40928. movq %rbp, %r8
  40929. # carry
  40930. adoxq %rbp, %r8
  40931. adcxq %rbp, %r8
  40932. movq %rbx, %rdx
  40933. imulq %r10, %rdx
  40934. movq $0xf3b9cac2fc632551, %rax
  40935. xorq %rbp, %rbp
  40936. mulxq %rax, %rcx, %rsi
  40937. movq $0xbce6faada7179e84, %rax
  40938. adcxq %rcx, %r10
  40939. adoxq %rsi, %r11
  40940. mulxq %rax, %rcx, %rsi
  40941. movq $0xffffffffffffffff, %rax
  40942. adcxq %rcx, %r11
  40943. adoxq %rsi, %r12
  40944. mulxq %rax, %rcx, %rsi
  40945. movq $0xffffffff00000000, %rax
  40946. adcxq %rcx, %r12
  40947. adoxq %rsi, %r13
  40948. mulxq %rax, %rcx, %rsi
  40949. adcxq %rcx, %r13
  40950. adoxq %rsi, %r14
  40951. adcxq %r8, %r14
  40952. movq %rbp, %r8
  40953. # carry
  40954. adoxq %rbp, %r8
  40955. adcxq %rbp, %r8
  40956. movq %rbx, %rdx
  40957. imulq %r11, %rdx
  40958. movq $0xf3b9cac2fc632551, %rax
  40959. xorq %rbp, %rbp
  40960. mulxq %rax, %rcx, %rsi
  40961. movq $0xbce6faada7179e84, %rax
  40962. adcxq %rcx, %r11
  40963. adoxq %rsi, %r12
  40964. mulxq %rax, %rcx, %rsi
  40965. movq $0xffffffffffffffff, %rax
  40966. adcxq %rcx, %r12
  40967. adoxq %rsi, %r13
  40968. mulxq %rax, %rcx, %rsi
  40969. movq $0xffffffff00000000, %rax
  40970. adcxq %rcx, %r13
  40971. adoxq %rsi, %r14
  40972. mulxq %rax, %rcx, %rsi
  40973. adcxq %rcx, %r14
  40974. adoxq %rsi, %r15
  40975. adcxq %r8, %r15
  40976. movq %rbp, %r8
  40977. # carry
  40978. adoxq %rbp, %r8
  40979. adcxq %rbp, %r8
  40980. negq %r8
  40981. movq $0xf3b9cac2fc632551, %rax
  40982. movq $0xbce6faada7179e84, %rbx
  40983. andq %r8, %rax
  40984. movq $0xffffffff00000000, %rbp
  40985. andq %r8, %rbx
  40986. andq %r8, %rbp
  40987. subq %rax, %r12
  40988. sbbq %rbx, %r13
  40989. movq %r12, (%rdi)
  40990. sbbq %r8, %r14
  40991. movq %r13, 8(%rdi)
  40992. sbbq %rbp, %r15
  40993. movq %r14, 16(%rdi)
  40994. movq %r15, 24(%rdi)
  40995. popq %r15
  40996. popq %r14
  40997. popq %r13
  40998. popq %r12
  40999. popq %rbp
  41000. popq %rbx
  41001. repz retq
  41002. #ifndef __APPLE__
  41003. .size sp_256_mont_mul_order_avx2_4,.-sp_256_mont_mul_order_avx2_4
  41004. #endif /* __APPLE__ */
  41005. /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  41006. *
  41007. * r Result of squaring.
  41008. * a Number to square in Montogmery form.
  41009. */
  41010. #ifndef __APPLE__
  41011. .text
  41012. .globl sp_256_mont_sqr_order_avx2_4
  41013. .type sp_256_mont_sqr_order_avx2_4,@function
  41014. .align 16
  41015. sp_256_mont_sqr_order_avx2_4:
  41016. #else
  41017. .section __TEXT,__text
  41018. .globl _sp_256_mont_sqr_order_avx2_4
  41019. .p2align 4
  41020. _sp_256_mont_sqr_order_avx2_4:
  41021. #endif /* __APPLE__ */
  41022. pushq %rbp
  41023. pushq %r12
  41024. pushq %r13
  41025. pushq %r14
  41026. pushq %r15
  41027. pushq %rbx
  41028. # A[0] * A[1]
  41029. movq (%rsi), %rdx
  41030. movq 16(%rsi), %r15
  41031. mulxq 8(%rsi), %r9, %r10
  41032. # A[0] * A[3]
  41033. mulxq 24(%rsi), %r11, %r12
  41034. # A[2] * A[1]
  41035. movq %r15, %rdx
  41036. mulxq 8(%rsi), %rcx, %rbx
  41037. # A[2] * A[3]
  41038. mulxq 24(%rsi), %r13, %r14
  41039. xorq %r15, %r15
  41040. adoxq %rcx, %r11
  41041. adoxq %rbx, %r12
  41042. # A[2] * A[0]
  41043. mulxq (%rsi), %rcx, %rbx
  41044. # A[1] * A[3]
  41045. movq 8(%rsi), %rdx
  41046. adoxq %r15, %r13
  41047. mulxq 24(%rsi), %rax, %r8
  41048. adcxq %rcx, %r10
  41049. adoxq %r15, %r14
  41050. adcxq %rbx, %r11
  41051. adcxq %rax, %r12
  41052. adcxq %r8, %r13
  41053. adcxq %r15, %r14
  41054. # Double with Carry Flag
  41055. xorq %r15, %r15
  41056. # A[0] * A[0]
  41057. movq (%rsi), %rdx
  41058. mulxq %rdx, %r8, %rax
  41059. adcxq %r9, %r9
  41060. adcxq %r10, %r10
  41061. adoxq %rax, %r9
  41062. # A[1] * A[1]
  41063. movq 8(%rsi), %rdx
  41064. mulxq %rdx, %rcx, %rbx
  41065. adcxq %r11, %r11
  41066. adoxq %rcx, %r10
  41067. # A[2] * A[2]
  41068. movq 16(%rsi), %rdx
  41069. mulxq %rdx, %rax, %rcx
  41070. adcxq %r12, %r12
  41071. adoxq %rbx, %r11
  41072. adcxq %r13, %r13
  41073. adoxq %rax, %r12
  41074. adcxq %r14, %r14
  41075. # A[3] * A[3]
  41076. movq 24(%rsi), %rdx
  41077. mulxq %rdx, %rax, %rbx
  41078. adoxq %rcx, %r13
  41079. adcxq %r15, %r15
  41080. adoxq %rax, %r14
  41081. adoxq %rbx, %r15
  41082. # Start Reduction
  41083. movq $0xccd1c8aaee00bc4f, %rbx
  41084. movq %rbx, %rdx
  41085. imulq %r8, %rdx
  41086. movq $0xf3b9cac2fc632551, %rax
  41087. xorq %rbp, %rbp
  41088. mulxq %rax, %rcx, %rsi
  41089. movq $0xbce6faada7179e84, %rax
  41090. adcxq %rcx, %r8
  41091. adoxq %rsi, %r9
  41092. mulxq %rax, %rcx, %rsi
  41093. movq $0xffffffffffffffff, %rax
  41094. adcxq %rcx, %r9
  41095. adoxq %rsi, %r10
  41096. mulxq %rax, %rcx, %rsi
  41097. movq $0xffffffff00000000, %rax
  41098. adcxq %rcx, %r10
  41099. adoxq %rsi, %r11
  41100. mulxq %rax, %rcx, %rsi
  41101. adcxq %rcx, %r11
  41102. adoxq %rsi, %r12
  41103. adcxq %rbp, %r12
  41104. movq %rbp, %r8
  41105. # carry
  41106. adoxq %rbp, %r8
  41107. adcxq %rbp, %r8
  41108. movq %rbx, %rdx
  41109. imulq %r9, %rdx
  41110. movq $0xf3b9cac2fc632551, %rax
  41111. xorq %rbp, %rbp
  41112. mulxq %rax, %rcx, %rsi
  41113. movq $0xbce6faada7179e84, %rax
  41114. adcxq %rcx, %r9
  41115. adoxq %rsi, %r10
  41116. mulxq %rax, %rcx, %rsi
  41117. movq $0xffffffffffffffff, %rax
  41118. adcxq %rcx, %r10
  41119. adoxq %rsi, %r11
  41120. mulxq %rax, %rcx, %rsi
  41121. movq $0xffffffff00000000, %rax
  41122. adcxq %rcx, %r11
  41123. adoxq %rsi, %r12
  41124. mulxq %rax, %rcx, %rsi
  41125. adcxq %rcx, %r12
  41126. adoxq %rsi, %r13
  41127. adcxq %r8, %r13
  41128. movq %rbp, %r8
  41129. # carry
  41130. adoxq %rbp, %r8
  41131. adcxq %rbp, %r8
  41132. movq %rbx, %rdx
  41133. imulq %r10, %rdx
  41134. movq $0xf3b9cac2fc632551, %rax
  41135. xorq %rbp, %rbp
  41136. mulxq %rax, %rcx, %rsi
  41137. movq $0xbce6faada7179e84, %rax
  41138. adcxq %rcx, %r10
  41139. adoxq %rsi, %r11
  41140. mulxq %rax, %rcx, %rsi
  41141. movq $0xffffffffffffffff, %rax
  41142. adcxq %rcx, %r11
  41143. adoxq %rsi, %r12
  41144. mulxq %rax, %rcx, %rsi
  41145. movq $0xffffffff00000000, %rax
  41146. adcxq %rcx, %r12
  41147. adoxq %rsi, %r13
  41148. mulxq %rax, %rcx, %rsi
  41149. adcxq %rcx, %r13
  41150. adoxq %rsi, %r14
  41151. adcxq %r8, %r14
  41152. movq %rbp, %r8
  41153. # carry
  41154. adoxq %rbp, %r8
  41155. adcxq %rbp, %r8
  41156. movq %rbx, %rdx
  41157. imulq %r11, %rdx
  41158. movq $0xf3b9cac2fc632551, %rax
  41159. xorq %rbp, %rbp
  41160. mulxq %rax, %rcx, %rsi
  41161. movq $0xbce6faada7179e84, %rax
  41162. adcxq %rcx, %r11
  41163. adoxq %rsi, %r12
  41164. mulxq %rax, %rcx, %rsi
  41165. movq $0xffffffffffffffff, %rax
  41166. adcxq %rcx, %r12
  41167. adoxq %rsi, %r13
  41168. mulxq %rax, %rcx, %rsi
  41169. movq $0xffffffff00000000, %rax
  41170. adcxq %rcx, %r13
  41171. adoxq %rsi, %r14
  41172. mulxq %rax, %rcx, %rsi
  41173. adcxq %rcx, %r14
  41174. adoxq %rsi, %r15
  41175. adcxq %r8, %r15
  41176. movq %rbp, %r8
  41177. # carry
  41178. adoxq %rbp, %r8
  41179. adcxq %rbp, %r8
  41180. negq %r8
  41181. movq $0xf3b9cac2fc632551, %rax
  41182. movq $0xbce6faada7179e84, %rbx
  41183. andq %r8, %rax
  41184. movq $0xffffffff00000000, %rbp
  41185. andq %r8, %rbx
  41186. andq %r8, %rbp
  41187. subq %rax, %r12
  41188. sbbq %rbx, %r13
  41189. movq %r12, (%rdi)
  41190. sbbq %r8, %r14
  41191. movq %r13, 8(%rdi)
  41192. sbbq %rbp, %r15
  41193. movq %r14, 16(%rdi)
  41194. movq %r15, 24(%rdi)
  41195. popq %rbx
  41196. popq %r15
  41197. popq %r14
  41198. popq %r13
  41199. popq %r12
  41200. popq %rbp
  41201. repz retq
  41202. #ifndef __APPLE__
  41203. .size sp_256_mont_sqr_order_avx2_4,.-sp_256_mont_sqr_order_avx2_4
  41204. #endif /* __APPLE__ */
  41205. /* Non-constant time modular inversion.
  41206. *
  41207. * @param [out] r Resulting number.
  41208. * @param [in] a Number to invert.
  41209. * @param [in] m Modulus.
  41210. * @return MP_OKAY on success.
  41211. */
  41212. #ifndef __APPLE__
  41213. .text
  41214. .globl sp_256_mod_inv_4
  41215. .type sp_256_mod_inv_4,@function
  41216. .align 16
  41217. sp_256_mod_inv_4:
  41218. #else
  41219. .section __TEXT,__text
  41220. .globl _sp_256_mod_inv_4
  41221. .p2align 4
  41222. _sp_256_mod_inv_4:
  41223. #endif /* __APPLE__ */
  41224. pushq %r12
  41225. pushq %r13
  41226. pushq %r14
  41227. pushq %r15
  41228. subq $0x201, %rsp
  41229. movq (%rdx), %rcx
  41230. movq 8(%rdx), %r8
  41231. movq 16(%rdx), %r9
  41232. movq 24(%rdx), %r10
  41233. movq (%rsi), %r11
  41234. movq 8(%rsi), %r12
  41235. movq 16(%rsi), %r13
  41236. movq 24(%rsi), %r14
  41237. movq $0x00, %r15
  41238. testb $0x01, %r11b
  41239. jnz L_256_mod_inv_4_v_even_end
  41240. L_256_mod_inv_4_v_even_start:
  41241. shrdq $0x01, %r12, %r11
  41242. shrdq $0x01, %r13, %r12
  41243. shrdq $0x01, %r14, %r13
  41244. shrq $0x01, %r14
  41245. movb $0x01, (%rsp,%r15,1)
  41246. incq %r15
  41247. testb $0x01, %r11b
  41248. jz L_256_mod_inv_4_v_even_start
  41249. L_256_mod_inv_4_v_even_end:
  41250. L_256_mod_inv_4_uv_start:
  41251. cmpq %r14, %r10
  41252. jb L_256_mod_inv_4_uv_v
  41253. ja L_256_mod_inv_4_uv_u
  41254. cmpq %r13, %r9
  41255. jb L_256_mod_inv_4_uv_v
  41256. ja L_256_mod_inv_4_uv_u
  41257. cmpq %r12, %r8
  41258. jb L_256_mod_inv_4_uv_v
  41259. ja L_256_mod_inv_4_uv_u
  41260. cmpq %r11, %rcx
  41261. jb L_256_mod_inv_4_uv_v
  41262. L_256_mod_inv_4_uv_u:
  41263. movb $2, (%rsp,%r15,1)
  41264. incq %r15
  41265. subq %r11, %rcx
  41266. sbbq %r12, %r8
  41267. sbbq %r13, %r9
  41268. sbbq %r14, %r10
  41269. shrdq $0x01, %r8, %rcx
  41270. shrdq $0x01, %r9, %r8
  41271. shrdq $0x01, %r10, %r9
  41272. shrq $0x01, %r10
  41273. testb $0x01, %cl
  41274. jnz L_256_mod_inv_4_usubv_even_end
  41275. L_256_mod_inv_4_usubv_even_start:
  41276. shrdq $0x01, %r8, %rcx
  41277. shrdq $0x01, %r9, %r8
  41278. shrdq $0x01, %r10, %r9
  41279. shrq $0x01, %r10
  41280. movb $0x00, (%rsp,%r15,1)
  41281. incq %r15
  41282. testb $0x01, %cl
  41283. jz L_256_mod_inv_4_usubv_even_start
  41284. L_256_mod_inv_4_usubv_even_end:
  41285. cmpq $0x01, %rcx
  41286. jne L_256_mod_inv_4_uv_start
  41287. movq %r8, %rsi
  41288. orq %r9, %rsi
  41289. jne L_256_mod_inv_4_uv_start
  41290. orq %r10, %rsi
  41291. jne L_256_mod_inv_4_uv_start
  41292. movb $0x01, %al
  41293. jmp L_256_mod_inv_4_uv_end
  41294. L_256_mod_inv_4_uv_v:
  41295. movb $3, (%rsp,%r15,1)
  41296. incq %r15
  41297. subq %rcx, %r11
  41298. sbbq %r8, %r12
  41299. sbbq %r9, %r13
  41300. sbbq %r10, %r14
  41301. shrdq $0x01, %r12, %r11
  41302. shrdq $0x01, %r13, %r12
  41303. shrdq $0x01, %r14, %r13
  41304. shrq $0x01, %r14
  41305. testb $0x01, %r11b
  41306. jnz L_256_mod_inv_4_vsubu_even_end
  41307. L_256_mod_inv_4_vsubu_even_start:
  41308. shrdq $0x01, %r12, %r11
  41309. shrdq $0x01, %r13, %r12
  41310. shrdq $0x01, %r14, %r13
  41311. shrq $0x01, %r14
  41312. movb $0x01, (%rsp,%r15,1)
  41313. incq %r15
  41314. testb $0x01, %r11b
  41315. jz L_256_mod_inv_4_vsubu_even_start
  41316. L_256_mod_inv_4_vsubu_even_end:
  41317. cmpq $0x01, %r11
  41318. jne L_256_mod_inv_4_uv_start
  41319. movq %r12, %rsi
  41320. orq %r13, %rsi
  41321. jne L_256_mod_inv_4_uv_start
  41322. orq %r14, %rsi
  41323. jne L_256_mod_inv_4_uv_start
  41324. movb $0x00, %al
  41325. L_256_mod_inv_4_uv_end:
  41326. movq (%rdx), %rcx
  41327. movq 8(%rdx), %r8
  41328. movq 16(%rdx), %r9
  41329. movq 24(%rdx), %r10
  41330. movq $0x01, %r11
  41331. xorq %r12, %r12
  41332. xorq %r13, %r13
  41333. xorq %r14, %r14
  41334. movb $7, (%rsp,%r15,1)
  41335. movb (%rsp), %sil
  41336. movq $0x01, %r15
  41337. cmpb $0x01, %sil
  41338. je L_256_mod_inv_4_op_div2_d
  41339. jl L_256_mod_inv_4_op_div2_b
  41340. cmpb $3, %sil
  41341. je L_256_mod_inv_4_op_d_sub_b
  41342. jl L_256_mod_inv_4_op_b_sub_d
  41343. jmp L_256_mod_inv_4_op_end
  41344. L_256_mod_inv_4_op_b_sub_d:
  41345. subq %r11, %rcx
  41346. sbbq %r12, %r8
  41347. sbbq %r13, %r9
  41348. sbbq %r14, %r10
  41349. jnc L_256_mod_inv_4_op_div2_b
  41350. addq (%rdx), %rcx
  41351. adcq 8(%rdx), %r8
  41352. adcq 16(%rdx), %r9
  41353. adcq 24(%rdx), %r10
  41354. L_256_mod_inv_4_op_div2_b:
  41355. testb $0x01, %cl
  41356. movq $0x00, %rsi
  41357. jz L_256_mod_inv_4_op_div2_b_mod
  41358. addq (%rdx), %rcx
  41359. adcq 8(%rdx), %r8
  41360. adcq 16(%rdx), %r9
  41361. adcq 24(%rdx), %r10
  41362. adcq $0x00, %rsi
  41363. L_256_mod_inv_4_op_div2_b_mod:
  41364. shrdq $0x01, %r8, %rcx
  41365. shrdq $0x01, %r9, %r8
  41366. shrdq $0x01, %r10, %r9
  41367. shrdq $0x01, %rsi, %r10
  41368. movb (%rsp,%r15,1), %sil
  41369. incq %r15
  41370. cmpb $0x01, %sil
  41371. je L_256_mod_inv_4_op_div2_d
  41372. jl L_256_mod_inv_4_op_div2_b
  41373. cmpb $3, %sil
  41374. je L_256_mod_inv_4_op_d_sub_b
  41375. jl L_256_mod_inv_4_op_b_sub_d
  41376. jmp L_256_mod_inv_4_op_end
  41377. L_256_mod_inv_4_op_d_sub_b:
  41378. subq %rcx, %r11
  41379. sbbq %r8, %r12
  41380. sbbq %r9, %r13
  41381. sbbq %r10, %r14
  41382. jnc L_256_mod_inv_4_op_div2_d
  41383. addq (%rdx), %r11
  41384. adcq 8(%rdx), %r12
  41385. adcq 16(%rdx), %r13
  41386. adcq 24(%rdx), %r14
  41387. L_256_mod_inv_4_op_div2_d:
  41388. testb $0x01, %r11b
  41389. movq $0x00, %rsi
  41390. jz L_256_mod_inv_4_op_div2_d_mod
  41391. addq (%rdx), %r11
  41392. adcq 8(%rdx), %r12
  41393. adcq 16(%rdx), %r13
  41394. adcq 24(%rdx), %r14
  41395. adcq $0x00, %rsi
  41396. L_256_mod_inv_4_op_div2_d_mod:
  41397. shrdq $0x01, %r12, %r11
  41398. shrdq $0x01, %r13, %r12
  41399. shrdq $0x01, %r14, %r13
  41400. shrdq $0x01, %rsi, %r14
  41401. movb (%rsp,%r15,1), %sil
  41402. incq %r15
  41403. cmpb $0x01, %sil
  41404. je L_256_mod_inv_4_op_div2_d
  41405. jl L_256_mod_inv_4_op_div2_b
  41406. cmpb $3, %sil
  41407. je L_256_mod_inv_4_op_d_sub_b
  41408. jl L_256_mod_inv_4_op_b_sub_d
  41409. L_256_mod_inv_4_op_end:
  41410. cmpb $0x01, %al
  41411. jne L_256_mod_inv_4_store_d
  41412. movq %rcx, (%rdi)
  41413. movq %r8, 8(%rdi)
  41414. movq %r9, 16(%rdi)
  41415. movq %r10, 24(%rdi)
  41416. jmp L_256_mod_inv_4_store_end
  41417. L_256_mod_inv_4_store_d:
  41418. movq %r11, (%rdi)
  41419. movq %r12, 8(%rdi)
  41420. movq %r13, 16(%rdi)
  41421. movq %r14, 24(%rdi)
  41422. L_256_mod_inv_4_store_end:
  41423. addq $0x201, %rsp
  41424. popq %r15
  41425. popq %r14
  41426. popq %r13
  41427. popq %r12
  41428. repz retq
  41429. #ifndef __APPLE__
  41430. .size sp_256_mod_inv_4,.-sp_256_mod_inv_4
  41431. #endif /* __APPLE__ */
  41432. #ifndef __APPLE__
  41433. .data
  41434. #else
  41435. .section __DATA,__data
  41436. #endif /* __APPLE__ */
  41437. L_sp256_mod_inv_avx2_4_order:
  41438. .long 0x632551,0x1e84f3b,0x3bce6fa,0x3ffffff
  41439. .long 0x3ff0000,0x0,0x0,0x0
  41440. .long 0x272b0bf,0x2b69c5e,0x3ffffff,0x3ff
  41441. .long 0x3fffff,0x0,0x0,0x0
  41442. #ifndef __APPLE__
  41443. .data
  41444. #else
  41445. .section __DATA,__data
  41446. #endif /* __APPLE__ */
  41447. #ifndef __APPLE__
  41448. .align 32
  41449. #else
  41450. .p2align 5
  41451. #endif /* __APPLE__ */
  41452. L_sp256_mod_inv_avx2_4_one:
  41453. .quad 0x1, 0x0
  41454. .quad 0x0, 0x0
  41455. #ifndef __APPLE__
  41456. .data
  41457. #else
  41458. .section __DATA,__data
  41459. #endif /* __APPLE__ */
  41460. L_sp256_mod_inv_avx2_4_all_one:
  41461. .long 0x1,0x1,0x1,0x1
  41462. .long 0x1,0x1,0x1,0x1
  41463. #ifndef __APPLE__
  41464. .data
  41465. #else
  41466. .section __DATA,__data
  41467. #endif /* __APPLE__ */
  41468. L_sp256_mod_inv_avx2_4_mask01111:
  41469. .long 0x0,0x1,0x1,0x1
  41470. .long 0x1,0x0,0x0,0x0
  41471. #ifndef __APPLE__
  41472. .data
  41473. #else
  41474. .section __DATA,__data
  41475. #endif /* __APPLE__ */
  41476. L_sp256_mod_inv_avx2_4_down_one_dword:
  41477. .long 0x1,0x2,0x3,0x4
  41478. .long 0x5,0x6,0x7,0x7
  41479. #ifndef __APPLE__
  41480. .data
  41481. #else
  41482. .section __DATA,__data
  41483. #endif /* __APPLE__ */
  41484. L_sp256_mod_inv_avx2_4_neg:
  41485. .long 0x0,0x0,0x0,0x0
  41486. .long 0x80000000,0x0,0x0,0x0
  41487. #ifndef __APPLE__
  41488. .data
  41489. #else
  41490. .section __DATA,__data
  41491. #endif /* __APPLE__ */
  41492. L_sp256_mod_inv_avx2_4_up_one_dword:
  41493. .long 0x7,0x0,0x1,0x2
  41494. .long 0x3,0x7,0x7,0x7
  41495. #ifndef __APPLE__
  41496. .data
  41497. #else
  41498. .section __DATA,__data
  41499. #endif /* __APPLE__ */
  41500. L_sp256_mod_inv_avx2_4_mask26:
  41501. .long 0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff
  41502. .long 0x3ffffff,0x0,0x0,0x0
  41503. /* Non-constant time modular inversion.
  41504. *
  41505. * @param [out] r Resulting number.
  41506. * @param [in] a Number to invert.
  41507. * @param [in] m Modulus.
  41508. * @return MP_OKAY on success.
  41509. */
  41510. #ifndef __APPLE__
  41511. .text
  41512. .globl sp_256_mod_inv_avx2_4
  41513. .type sp_256_mod_inv_avx2_4,@function
  41514. .align 16
  41515. sp_256_mod_inv_avx2_4:
  41516. #else
  41517. .section __TEXT,__text
  41518. .globl _sp_256_mod_inv_avx2_4
  41519. .p2align 4
  41520. _sp_256_mod_inv_avx2_4:
  41521. #endif /* __APPLE__ */
  41522. pushq %r12
  41523. pushq %r13
  41524. pushq %r14
  41525. pushq %r15
  41526. pushq %rbx
  41527. movq (%rdx), %rax
  41528. movq 8(%rdx), %rcx
  41529. movq 16(%rdx), %r8
  41530. movq 24(%rdx), %r9
  41531. movq (%rsi), %r10
  41532. movq 8(%rsi), %r11
  41533. movq 16(%rsi), %r12
  41534. movq 24(%rsi), %r13
  41535. vmovdqu 0+L_sp256_mod_inv_avx2_4_order(%rip), %ymm6
  41536. vmovdqu 32+L_sp256_mod_inv_avx2_4_order(%rip), %ymm7
  41537. vmovdqu 0+L_sp256_mod_inv_avx2_4_one(%rip), %ymm8
  41538. vmovdqu 0+L_sp256_mod_inv_avx2_4_mask01111(%rip), %ymm9
  41539. vmovdqu 0+L_sp256_mod_inv_avx2_4_all_one(%rip), %ymm10
  41540. vmovdqu L_sp256_mod_inv_avx2_4_down_one_dword(%rip), %ymm11
  41541. vmovdqu L_sp256_mod_inv_avx2_4_neg(%rip), %ymm12
  41542. vmovdqu L_sp256_mod_inv_avx2_4_up_one_dword(%rip), %ymm13
  41543. vmovdqu L_sp256_mod_inv_avx2_4_mask26(%rip), %ymm14
  41544. vpxor %xmm0, %xmm0, %xmm0
  41545. vpxor %xmm1, %xmm1, %xmm1
  41546. vmovdqu %ymm8, %ymm2
  41547. vpxor %xmm3, %xmm3, %xmm3
  41548. testb $0x01, %r10b
  41549. jnz L_256_mod_inv_avx2_4_v_even_end
  41550. L_256_mod_inv_avx2_4_v_even_start:
  41551. shrdq $0x01, %r11, %r10
  41552. shrdq $0x01, %r12, %r11
  41553. shrdq $0x01, %r13, %r12
  41554. shrq $0x01, %r13
  41555. vptest %ymm8, %ymm2
  41556. jz L_256_mod_inv_avx2_4_v_even_shr1
  41557. vpaddd %ymm6, %ymm2, %ymm2
  41558. vpaddd %ymm7, %ymm3, %ymm3
  41559. L_256_mod_inv_avx2_4_v_even_shr1:
  41560. vpand %ymm9, %ymm2, %ymm4
  41561. vpand %ymm10, %ymm3, %ymm5
  41562. vpermd %ymm4, %ymm11, %ymm4
  41563. vpsrad $0x01, %ymm2, %ymm2
  41564. vpsrad $0x01, %ymm3, %ymm3
  41565. vpslld $25, %ymm5, %ymm5
  41566. vpslld $25, %xmm4, %xmm4
  41567. vpaddd %ymm5, %ymm2, %ymm2
  41568. vpaddd %ymm4, %ymm3, %ymm3
  41569. testb $0x01, %r10b
  41570. jz L_256_mod_inv_avx2_4_v_even_start
  41571. L_256_mod_inv_avx2_4_v_even_end:
  41572. L_256_mod_inv_avx2_4_uv_start:
  41573. cmpq %r13, %r9
  41574. jb L_256_mod_inv_avx2_4_uv_v
  41575. ja L_256_mod_inv_avx2_4_uv_u
  41576. cmpq %r12, %r8
  41577. jb L_256_mod_inv_avx2_4_uv_v
  41578. ja L_256_mod_inv_avx2_4_uv_u
  41579. cmpq %r11, %rcx
  41580. jb L_256_mod_inv_avx2_4_uv_v
  41581. ja L_256_mod_inv_avx2_4_uv_u
  41582. cmpq %r10, %rax
  41583. jb L_256_mod_inv_avx2_4_uv_v
  41584. L_256_mod_inv_avx2_4_uv_u:
  41585. subq %r10, %rax
  41586. sbbq %r11, %rcx
  41587. vpsubd %ymm2, %ymm0, %ymm0
  41588. sbbq %r12, %r8
  41589. vpsubd %ymm3, %ymm1, %ymm1
  41590. sbbq %r13, %r9
  41591. vptest %ymm12, %ymm1
  41592. jz L_256_mod_inv_avx2_4_usubv_done_neg
  41593. vpaddd %ymm6, %ymm0, %ymm0
  41594. vpaddd %ymm7, %ymm1, %ymm1
  41595. L_256_mod_inv_avx2_4_usubv_done_neg:
  41596. L_256_mod_inv_avx2_4_usubv_shr1:
  41597. shrdq $0x01, %rcx, %rax
  41598. shrdq $0x01, %r8, %rcx
  41599. shrdq $0x01, %r9, %r8
  41600. shrq $0x01, %r9
  41601. vptest %ymm8, %ymm0
  41602. jz L_256_mod_inv_avx2_4_usubv_sub_shr1
  41603. vpaddd %ymm6, %ymm0, %ymm0
  41604. vpaddd %ymm7, %ymm1, %ymm1
  41605. L_256_mod_inv_avx2_4_usubv_sub_shr1:
  41606. vpand %ymm9, %ymm0, %ymm4
  41607. vpand %ymm10, %ymm1, %ymm5
  41608. vpermd %ymm4, %ymm11, %ymm4
  41609. vpsrad $0x01, %ymm0, %ymm0
  41610. vpsrad $0x01, %ymm1, %ymm1
  41611. vpslld $25, %ymm5, %ymm5
  41612. vpslld $25, %xmm4, %xmm4
  41613. vpaddd %ymm5, %ymm0, %ymm0
  41614. vpaddd %ymm4, %ymm1, %ymm1
  41615. testb $0x01, %al
  41616. jz L_256_mod_inv_avx2_4_usubv_shr1
  41617. cmpq $0x01, %rax
  41618. jne L_256_mod_inv_avx2_4_uv_start
  41619. movq %rcx, %rsi
  41620. orq %r8, %rsi
  41621. jne L_256_mod_inv_avx2_4_uv_start
  41622. orq %r9, %rsi
  41623. jne L_256_mod_inv_avx2_4_uv_start
  41624. vpsrad $26, %ymm1, %ymm5
  41625. vpsrad $26, %ymm0, %ymm4
  41626. vpermd %ymm5, %ymm13, %ymm5
  41627. vpand %ymm14, %ymm0, %ymm0
  41628. vpand %ymm14, %ymm1, %ymm1
  41629. vpaddd %ymm5, %ymm0, %ymm0
  41630. vpaddd %ymm4, %ymm1, %ymm1
  41631. vpextrd $0x00, %xmm0, %eax
  41632. vpextrd $0x01, %xmm0, %r8d
  41633. vpextrd $2, %xmm0, %r10d
  41634. vpextrd $3, %xmm0, %r12d
  41635. vextracti128 $0x01, %ymm0, %xmm0
  41636. vpextrd $0x00, %xmm1, %ecx
  41637. vpextrd $0x01, %xmm1, %r9d
  41638. vpextrd $2, %xmm1, %r11d
  41639. vpextrd $3, %xmm1, %r13d
  41640. vextracti128 $0x01, %ymm1, %xmm1
  41641. vpextrd $0x00, %xmm0, %r14d
  41642. vpextrd $0x00, %xmm1, %r15d
  41643. jmp L_256_mod_inv_avx2_4_store_done
  41644. L_256_mod_inv_avx2_4_uv_v:
  41645. subq %rax, %r10
  41646. sbbq %rcx, %r11
  41647. vpsubd %ymm0, %ymm2, %ymm2
  41648. sbbq %r8, %r12
  41649. vpsubd %ymm1, %ymm3, %ymm3
  41650. sbbq %r9, %r13
  41651. vptest %ymm12, %ymm3
  41652. jz L_256_mod_inv_avx2_4_vsubu_done_neg
  41653. vpaddd %ymm6, %ymm2, %ymm2
  41654. vpaddd %ymm7, %ymm3, %ymm3
  41655. L_256_mod_inv_avx2_4_vsubu_done_neg:
  41656. L_256_mod_inv_avx2_4_vsubu_shr1:
  41657. shrdq $0x01, %r11, %r10
  41658. shrdq $0x01, %r12, %r11
  41659. shrdq $0x01, %r13, %r12
  41660. shrq $0x01, %r13
  41661. vptest %ymm8, %ymm2
  41662. jz L_256_mod_inv_avx2_4_vsubu_sub_shr1
  41663. vpaddd %ymm6, %ymm2, %ymm2
  41664. vpaddd %ymm7, %ymm3, %ymm3
  41665. L_256_mod_inv_avx2_4_vsubu_sub_shr1:
  41666. vpand %ymm9, %ymm2, %ymm4
  41667. vpand %ymm10, %ymm3, %ymm5
  41668. vpermd %ymm4, %ymm11, %ymm4
  41669. vpsrad $0x01, %ymm2, %ymm2
  41670. vpsrad $0x01, %ymm3, %ymm3
  41671. vpslld $25, %ymm5, %ymm5
  41672. vpslld $25, %xmm4, %xmm4
  41673. vpaddd %ymm5, %ymm2, %ymm2
  41674. vpaddd %ymm4, %ymm3, %ymm3
  41675. testb $0x01, %r10b
  41676. jz L_256_mod_inv_avx2_4_vsubu_shr1
  41677. cmpq $0x01, %r10
  41678. jne L_256_mod_inv_avx2_4_uv_start
  41679. movq %r11, %rsi
  41680. orq %r12, %rsi
  41681. jne L_256_mod_inv_avx2_4_uv_start
  41682. orq %r13, %rsi
  41683. jne L_256_mod_inv_avx2_4_uv_start
  41684. vpsrad $26, %ymm3, %ymm5
  41685. vpsrad $26, %ymm2, %ymm4
  41686. vpermd %ymm5, %ymm13, %ymm5
  41687. vpand %ymm14, %ymm2, %ymm2
  41688. vpand %ymm14, %ymm3, %ymm3
  41689. vpaddd %ymm5, %ymm2, %ymm2
  41690. vpaddd %ymm4, %ymm3, %ymm3
  41691. vpextrd $0x00, %xmm2, %eax
  41692. vpextrd $0x01, %xmm2, %r8d
  41693. vpextrd $2, %xmm2, %r10d
  41694. vpextrd $3, %xmm2, %r12d
  41695. vextracti128 $0x01, %ymm2, %xmm2
  41696. vpextrd $0x00, %xmm3, %ecx
  41697. vpextrd $0x01, %xmm3, %r9d
  41698. vpextrd $2, %xmm3, %r11d
  41699. vpextrd $3, %xmm3, %r13d
  41700. vextracti128 $0x01, %ymm3, %xmm3
  41701. vpextrd $0x00, %xmm2, %r14d
  41702. vpextrd $0x00, %xmm3, %r15d
  41703. L_256_mod_inv_avx2_4_store_done:
  41704. shlq $26, %rcx
  41705. addq %rcx, %rax
  41706. shlq $26, %r9
  41707. addq %r9, %r8
  41708. shlq $26, %r11
  41709. addq %r11, %r10
  41710. shlq $26, %r13
  41711. addq %r13, %r12
  41712. shlq $26, %r15
  41713. addq %r15, %r14
  41714. movq %r8, %rcx
  41715. movq %r10, %r9
  41716. movq %r12, %r11
  41717. shlq $52, %rcx
  41718. sarq $12, %r8
  41719. shlq $40, %r9
  41720. sarq $24, %r10
  41721. shlq $28, %r11
  41722. sarq $36, %r12
  41723. shlq $16, %r14
  41724. addq %rcx, %rax
  41725. adcq %r9, %r8
  41726. adcq %r11, %r10
  41727. adcq %r14, %r12
  41728. movq %rax, (%rdi)
  41729. movq %r8, 8(%rdi)
  41730. movq %r10, 16(%rdi)
  41731. movq %r12, 24(%rdi)
  41732. popq %rbx
  41733. popq %r15
  41734. popq %r14
  41735. popq %r13
  41736. popq %r12
  41737. repz retq
  41738. #ifndef __APPLE__
  41739. .size sp_256_mod_inv_avx2_4,.-sp_256_mod_inv_avx2_4
  41740. #endif /* __APPLE__ */
  41741. #endif /* !WOLFSSL_SP_NO_256 */
  41742. #ifdef WOLFSSL_SP_384
  41743. /* Conditionally copy a into r using the mask m.
  41744. * m is -1 to copy and 0 when not.
  41745. *
  41746. * r A single precision number to copy over.
  41747. * a A single precision number to copy.
  41748. * m Mask value to apply.
  41749. */
  41750. #ifndef __APPLE__
  41751. .text
  41752. .globl sp_384_cond_copy_6
  41753. .type sp_384_cond_copy_6,@function
  41754. .align 16
  41755. sp_384_cond_copy_6:
  41756. #else
  41757. .section __TEXT,__text
  41758. .globl _sp_384_cond_copy_6
  41759. .p2align 4
  41760. _sp_384_cond_copy_6:
  41761. #endif /* __APPLE__ */
  41762. movq (%rdi), %rax
  41763. movq 8(%rdi), %rcx
  41764. movq 16(%rdi), %r8
  41765. movq 24(%rdi), %r9
  41766. movq 32(%rdi), %r10
  41767. movq 40(%rdi), %r11
  41768. xorq (%rsi), %rax
  41769. xorq 8(%rsi), %rcx
  41770. xorq 16(%rsi), %r8
  41771. xorq 24(%rsi), %r9
  41772. xorq 32(%rsi), %r10
  41773. xorq 40(%rsi), %r11
  41774. andq %rdx, %rax
  41775. andq %rdx, %rcx
  41776. andq %rdx, %r8
  41777. andq %rdx, %r9
  41778. andq %rdx, %r10
  41779. andq %rdx, %r11
  41780. xorq %rax, (%rdi)
  41781. xorq %rcx, 8(%rdi)
  41782. xorq %r8, 16(%rdi)
  41783. xorq %r9, 24(%rdi)
  41784. xorq %r10, 32(%rdi)
  41785. xorq %r11, 40(%rdi)
  41786. repz retq
  41787. #ifndef __APPLE__
  41788. .size sp_384_cond_copy_6,.-sp_384_cond_copy_6
  41789. #endif /* __APPLE__ */
  41790. /* Multiply a and b into r. (r = a * b)
  41791. *
  41792. * r A single precision integer.
  41793. * a A single precision integer.
  41794. * b A single precision integer.
  41795. */
  41796. #ifndef __APPLE__
  41797. .text
  41798. .globl sp_384_mul_6
  41799. .type sp_384_mul_6,@function
  41800. .align 16
  41801. sp_384_mul_6:
  41802. #else
  41803. .section __TEXT,__text
  41804. .globl _sp_384_mul_6
  41805. .p2align 4
  41806. _sp_384_mul_6:
  41807. #endif /* __APPLE__ */
  41808. movq %rdx, %rcx
  41809. subq $48, %rsp
  41810. # A[0] * B[0]
  41811. movq (%rcx), %rax
  41812. mulq (%rsi)
  41813. xorq %r10, %r10
  41814. movq %rax, (%rsp)
  41815. movq %rdx, %r9
  41816. # A[0] * B[1]
  41817. movq 8(%rcx), %rax
  41818. mulq (%rsi)
  41819. xorq %r8, %r8
  41820. addq %rax, %r9
  41821. adcq %rdx, %r10
  41822. adcq $0x00, %r8
  41823. # A[1] * B[0]
  41824. movq (%rcx), %rax
  41825. mulq 8(%rsi)
  41826. addq %rax, %r9
  41827. adcq %rdx, %r10
  41828. adcq $0x00, %r8
  41829. movq %r9, 8(%rsp)
  41830. # A[0] * B[2]
  41831. movq 16(%rcx), %rax
  41832. mulq (%rsi)
  41833. xorq %r9, %r9
  41834. addq %rax, %r10
  41835. adcq %rdx, %r8
  41836. adcq $0x00, %r9
  41837. # A[1] * B[1]
  41838. movq 8(%rcx), %rax
  41839. mulq 8(%rsi)
  41840. addq %rax, %r10
  41841. adcq %rdx, %r8
  41842. adcq $0x00, %r9
  41843. # A[2] * B[0]
  41844. movq (%rcx), %rax
  41845. mulq 16(%rsi)
  41846. addq %rax, %r10
  41847. adcq %rdx, %r8
  41848. adcq $0x00, %r9
  41849. movq %r10, 16(%rsp)
  41850. # A[0] * B[3]
  41851. movq 24(%rcx), %rax
  41852. mulq (%rsi)
  41853. xorq %r10, %r10
  41854. addq %rax, %r8
  41855. adcq %rdx, %r9
  41856. adcq $0x00, %r10
  41857. # A[1] * B[2]
  41858. movq 16(%rcx), %rax
  41859. mulq 8(%rsi)
  41860. addq %rax, %r8
  41861. adcq %rdx, %r9
  41862. adcq $0x00, %r10
  41863. # A[2] * B[1]
  41864. movq 8(%rcx), %rax
  41865. mulq 16(%rsi)
  41866. addq %rax, %r8
  41867. adcq %rdx, %r9
  41868. adcq $0x00, %r10
  41869. # A[3] * B[0]
  41870. movq (%rcx), %rax
  41871. mulq 24(%rsi)
  41872. addq %rax, %r8
  41873. adcq %rdx, %r9
  41874. adcq $0x00, %r10
  41875. movq %r8, 24(%rsp)
  41876. # A[0] * B[4]
  41877. movq 32(%rcx), %rax
  41878. mulq (%rsi)
  41879. xorq %r8, %r8
  41880. addq %rax, %r9
  41881. adcq %rdx, %r10
  41882. adcq $0x00, %r8
  41883. # A[1] * B[3]
  41884. movq 24(%rcx), %rax
  41885. mulq 8(%rsi)
  41886. addq %rax, %r9
  41887. adcq %rdx, %r10
  41888. adcq $0x00, %r8
  41889. # A[2] * B[2]
  41890. movq 16(%rcx), %rax
  41891. mulq 16(%rsi)
  41892. addq %rax, %r9
  41893. adcq %rdx, %r10
  41894. adcq $0x00, %r8
  41895. # A[3] * B[1]
  41896. movq 8(%rcx), %rax
  41897. mulq 24(%rsi)
  41898. addq %rax, %r9
  41899. adcq %rdx, %r10
  41900. adcq $0x00, %r8
  41901. # A[4] * B[0]
  41902. movq (%rcx), %rax
  41903. mulq 32(%rsi)
  41904. addq %rax, %r9
  41905. adcq %rdx, %r10
  41906. adcq $0x00, %r8
  41907. movq %r9, 32(%rsp)
  41908. # A[0] * B[5]
  41909. movq 40(%rcx), %rax
  41910. mulq (%rsi)
  41911. xorq %r9, %r9
  41912. addq %rax, %r10
  41913. adcq %rdx, %r8
  41914. adcq $0x00, %r9
  41915. # A[1] * B[4]
  41916. movq 32(%rcx), %rax
  41917. mulq 8(%rsi)
  41918. addq %rax, %r10
  41919. adcq %rdx, %r8
  41920. adcq $0x00, %r9
  41921. # A[2] * B[3]
  41922. movq 24(%rcx), %rax
  41923. mulq 16(%rsi)
  41924. addq %rax, %r10
  41925. adcq %rdx, %r8
  41926. adcq $0x00, %r9
  41927. # A[3] * B[2]
  41928. movq 16(%rcx), %rax
  41929. mulq 24(%rsi)
  41930. addq %rax, %r10
  41931. adcq %rdx, %r8
  41932. adcq $0x00, %r9
  41933. # A[4] * B[1]
  41934. movq 8(%rcx), %rax
  41935. mulq 32(%rsi)
  41936. addq %rax, %r10
  41937. adcq %rdx, %r8
  41938. adcq $0x00, %r9
  41939. # A[5] * B[0]
  41940. movq (%rcx), %rax
  41941. mulq 40(%rsi)
  41942. addq %rax, %r10
  41943. adcq %rdx, %r8
  41944. adcq $0x00, %r9
  41945. movq %r10, 40(%rsp)
  41946. # A[1] * B[5]
  41947. movq 40(%rcx), %rax
  41948. mulq 8(%rsi)
  41949. xorq %r10, %r10
  41950. addq %rax, %r8
  41951. adcq %rdx, %r9
  41952. adcq $0x00, %r10
  41953. # A[2] * B[4]
  41954. movq 32(%rcx), %rax
  41955. mulq 16(%rsi)
  41956. addq %rax, %r8
  41957. adcq %rdx, %r9
  41958. adcq $0x00, %r10
  41959. # A[3] * B[3]
  41960. movq 24(%rcx), %rax
  41961. mulq 24(%rsi)
  41962. addq %rax, %r8
  41963. adcq %rdx, %r9
  41964. adcq $0x00, %r10
  41965. # A[4] * B[2]
  41966. movq 16(%rcx), %rax
  41967. mulq 32(%rsi)
  41968. addq %rax, %r8
  41969. adcq %rdx, %r9
  41970. adcq $0x00, %r10
  41971. # A[5] * B[1]
  41972. movq 8(%rcx), %rax
  41973. mulq 40(%rsi)
  41974. addq %rax, %r8
  41975. adcq %rdx, %r9
  41976. adcq $0x00, %r10
  41977. movq %r8, 48(%rdi)
  41978. # A[2] * B[5]
  41979. movq 40(%rcx), %rax
  41980. mulq 16(%rsi)
  41981. xorq %r8, %r8
  41982. addq %rax, %r9
  41983. adcq %rdx, %r10
  41984. adcq $0x00, %r8
  41985. # A[3] * B[4]
  41986. movq 32(%rcx), %rax
  41987. mulq 24(%rsi)
  41988. addq %rax, %r9
  41989. adcq %rdx, %r10
  41990. adcq $0x00, %r8
  41991. # A[4] * B[3]
  41992. movq 24(%rcx), %rax
  41993. mulq 32(%rsi)
  41994. addq %rax, %r9
  41995. adcq %rdx, %r10
  41996. adcq $0x00, %r8
  41997. # A[5] * B[2]
  41998. movq 16(%rcx), %rax
  41999. mulq 40(%rsi)
  42000. addq %rax, %r9
  42001. adcq %rdx, %r10
  42002. adcq $0x00, %r8
  42003. movq %r9, 56(%rdi)
  42004. # A[3] * B[5]
  42005. movq 40(%rcx), %rax
  42006. mulq 24(%rsi)
  42007. xorq %r9, %r9
  42008. addq %rax, %r10
  42009. adcq %rdx, %r8
  42010. adcq $0x00, %r9
  42011. # A[4] * B[4]
  42012. movq 32(%rcx), %rax
  42013. mulq 32(%rsi)
  42014. addq %rax, %r10
  42015. adcq %rdx, %r8
  42016. adcq $0x00, %r9
  42017. # A[5] * B[3]
  42018. movq 24(%rcx), %rax
  42019. mulq 40(%rsi)
  42020. addq %rax, %r10
  42021. adcq %rdx, %r8
  42022. adcq $0x00, %r9
  42023. movq %r10, 64(%rdi)
  42024. # A[4] * B[5]
  42025. movq 40(%rcx), %rax
  42026. mulq 32(%rsi)
  42027. xorq %r10, %r10
  42028. addq %rax, %r8
  42029. adcq %rdx, %r9
  42030. adcq $0x00, %r10
  42031. # A[5] * B[4]
  42032. movq 32(%rcx), %rax
  42033. mulq 40(%rsi)
  42034. addq %rax, %r8
  42035. adcq %rdx, %r9
  42036. adcq $0x00, %r10
  42037. movq %r8, 72(%rdi)
  42038. # A[5] * B[5]
  42039. movq 40(%rcx), %rax
  42040. mulq 40(%rsi)
  42041. addq %rax, %r9
  42042. adcq %rdx, %r10
  42043. movq %r9, 80(%rdi)
  42044. movq %r10, 88(%rdi)
  42045. movq (%rsp), %rax
  42046. movq 8(%rsp), %rdx
  42047. movq 16(%rsp), %r8
  42048. movq 24(%rsp), %r9
  42049. movq %rax, (%rdi)
  42050. movq %rdx, 8(%rdi)
  42051. movq %r8, 16(%rdi)
  42052. movq %r9, 24(%rdi)
  42053. movq 32(%rsp), %rax
  42054. movq 40(%rsp), %rdx
  42055. movq %rax, 32(%rdi)
  42056. movq %rdx, 40(%rdi)
  42057. addq $48, %rsp
  42058. repz retq
  42059. #ifndef __APPLE__
  42060. .size sp_384_mul_6,.-sp_384_mul_6
  42061. #endif /* __APPLE__ */
  42062. /* Conditionally subtract b from a using the mask m.
  42063. * m is -1 to subtract and 0 when not copying.
  42064. *
  42065. * r A single precision number representing condition subtract result.
  42066. * a A single precision number to subtract from.
  42067. * b A single precision number to subtract.
  42068. * m Mask value to apply.
  42069. */
  42070. #ifndef __APPLE__
  42071. .text
  42072. .globl sp_384_cond_sub_6
  42073. .type sp_384_cond_sub_6,@function
  42074. .align 16
  42075. sp_384_cond_sub_6:
  42076. #else
  42077. .section __TEXT,__text
  42078. .globl _sp_384_cond_sub_6
  42079. .p2align 4
  42080. _sp_384_cond_sub_6:
  42081. #endif /* __APPLE__ */
  42082. subq $48, %rsp
  42083. movq $0x00, %rax
  42084. movq (%rdx), %r8
  42085. movq 8(%rdx), %r9
  42086. andq %rcx, %r8
  42087. andq %rcx, %r9
  42088. movq %r8, (%rsp)
  42089. movq %r9, 8(%rsp)
  42090. movq 16(%rdx), %r8
  42091. movq 24(%rdx), %r9
  42092. andq %rcx, %r8
  42093. andq %rcx, %r9
  42094. movq %r8, 16(%rsp)
  42095. movq %r9, 24(%rsp)
  42096. movq 32(%rdx), %r8
  42097. movq 40(%rdx), %r9
  42098. andq %rcx, %r8
  42099. andq %rcx, %r9
  42100. movq %r8, 32(%rsp)
  42101. movq %r9, 40(%rsp)
  42102. movq (%rsi), %r8
  42103. movq (%rsp), %rdx
  42104. subq %rdx, %r8
  42105. movq 8(%rsi), %r9
  42106. movq 8(%rsp), %rdx
  42107. sbbq %rdx, %r9
  42108. movq %r8, (%rdi)
  42109. movq 16(%rsi), %r8
  42110. movq 16(%rsp), %rdx
  42111. sbbq %rdx, %r8
  42112. movq %r9, 8(%rdi)
  42113. movq 24(%rsi), %r9
  42114. movq 24(%rsp), %rdx
  42115. sbbq %rdx, %r9
  42116. movq %r8, 16(%rdi)
  42117. movq 32(%rsi), %r8
  42118. movq 32(%rsp), %rdx
  42119. sbbq %rdx, %r8
  42120. movq %r9, 24(%rdi)
  42121. movq 40(%rsi), %r9
  42122. movq 40(%rsp), %rdx
  42123. sbbq %rdx, %r9
  42124. movq %r8, 32(%rdi)
  42125. movq %r9, 40(%rdi)
  42126. sbbq $0x00, %rax
  42127. addq $48, %rsp
  42128. repz retq
  42129. #ifndef __APPLE__
  42130. .size sp_384_cond_sub_6,.-sp_384_cond_sub_6
  42131. #endif /* __APPLE__ */
  42132. #ifdef HAVE_INTEL_AVX2
  42133. /* Reduce the number back to 384 bits using Montgomery reduction.
  42134. *
  42135. * a A single precision number to reduce in place.
  42136. * m The single precision number representing the modulus.
  42137. * mp The digit representing the negative inverse of m mod 2^n.
  42138. */
  42139. #ifndef __APPLE__
  42140. .text
  42141. .globl sp_384_mont_reduce_6
  42142. .type sp_384_mont_reduce_6,@function
  42143. .align 16
  42144. sp_384_mont_reduce_6:
  42145. #else
  42146. .section __TEXT,__text
  42147. .globl _sp_384_mont_reduce_6
  42148. .p2align 4
  42149. _sp_384_mont_reduce_6:
  42150. #endif /* __APPLE__ */
  42151. pushq %r12
  42152. pushq %r13
  42153. pushq %r14
  42154. pushq %r15
  42155. pushq %rbx
  42156. pushq %rbp
  42157. movq (%rdi), %r11
  42158. movq 8(%rdi), %r12
  42159. movq 16(%rdi), %r13
  42160. movq 24(%rdi), %r14
  42161. movq 32(%rdi), %r15
  42162. movq 40(%rdi), %rsi
  42163. xorq %r10, %r10
  42164. # a[0-7] += m[0-5] * mu[0..1] = m[0-5] * (a[0..1] * mp)
  42165. movq 48(%rdi), %rbx
  42166. movq 56(%rdi), %rbp
  42167. movq %r11, %rdx
  42168. movq %r12, %rax
  42169. shldq $32, %rdx, %rax
  42170. shlq $32, %rdx
  42171. addq %r11, %rdx
  42172. adcq %r12, %rax
  42173. addq %r11, %rax
  42174. movq %rdx, %rcx
  42175. movq %rax, %r8
  42176. movq %rax, %r9
  42177. shldq $32, %rcx, %r8
  42178. shlq $32, %rcx
  42179. shrq $32, %r9
  42180. addq %rcx, %r11
  42181. adcq %r8, %r12
  42182. adcq %r9, %r13
  42183. adcq $0x00, %r14
  42184. adcq $0x00, %r15
  42185. adcq $0x00, %rsi
  42186. adcq %rdx, %rbx
  42187. adcq %rax, %rbp
  42188. adcq $0x00, %r10
  42189. addq %rax, %rcx
  42190. adcq %rdx, %r8
  42191. adcq %rax, %r9
  42192. movq $0x00, %rax
  42193. adcq $0x00, %rax
  42194. subq %r8, %r13
  42195. sbbq %r9, %r14
  42196. sbbq %rax, %r15
  42197. sbbq $0x00, %rsi
  42198. sbbq $0x00, %rbx
  42199. sbbq $0x00, %rbp
  42200. sbbq $0x00, %r10
  42201. # a[2-9] += m[0-5] * mu[0..1] = m[0-5] * (a[2..3] * mp)
  42202. movq 64(%rdi), %r11
  42203. movq 72(%rdi), %r12
  42204. movq %r13, %rdx
  42205. movq %r14, %rax
  42206. shldq $32, %rdx, %rax
  42207. shlq $32, %rdx
  42208. addq %r13, %rdx
  42209. adcq %r14, %rax
  42210. addq %r13, %rax
  42211. movq %rdx, %rcx
  42212. movq %rax, %r8
  42213. movq %rax, %r9
  42214. shldq $32, %rcx, %r8
  42215. shlq $32, %rcx
  42216. shrq $32, %r9
  42217. addq %r10, %r11
  42218. adcq $0x00, %r12
  42219. movq $0x00, %r10
  42220. adcq $0x00, %r10
  42221. addq %rcx, %r13
  42222. adcq %r8, %r14
  42223. adcq %r9, %r15
  42224. adcq $0x00, %rsi
  42225. adcq $0x00, %rbx
  42226. adcq $0x00, %rbp
  42227. adcq %rdx, %r11
  42228. adcq %rax, %r12
  42229. adcq $0x00, %r10
  42230. addq %rax, %rcx
  42231. adcq %rdx, %r8
  42232. adcq %rax, %r9
  42233. movq $0x00, %rax
  42234. adcq $0x00, %rax
  42235. subq %r8, %r15
  42236. sbbq %r9, %rsi
  42237. sbbq %rax, %rbx
  42238. sbbq $0x00, %rbp
  42239. sbbq $0x00, %r11
  42240. sbbq $0x00, %r12
  42241. sbbq $0x00, %r10
  42242. # a[4-11] += m[0-5] * mu[0..1] = m[0-5] * (a[4..5] * mp)
  42243. movq 80(%rdi), %r13
  42244. movq 88(%rdi), %r14
  42245. movq %r15, %rdx
  42246. movq %rsi, %rax
  42247. shldq $32, %rdx, %rax
  42248. shlq $32, %rdx
  42249. addq %r15, %rdx
  42250. adcq %rsi, %rax
  42251. addq %r15, %rax
  42252. movq %rdx, %rcx
  42253. movq %rax, %r8
  42254. movq %rax, %r9
  42255. shldq $32, %rcx, %r8
  42256. shlq $32, %rcx
  42257. shrq $32, %r9
  42258. addq %r10, %r13
  42259. adcq $0x00, %r14
  42260. movq $0x00, %r10
  42261. adcq $0x00, %r10
  42262. addq %rcx, %r15
  42263. adcq %r8, %rsi
  42264. adcq %r9, %rbx
  42265. adcq $0x00, %rbp
  42266. adcq $0x00, %r11
  42267. adcq $0x00, %r12
  42268. adcq %rdx, %r13
  42269. adcq %rax, %r14
  42270. adcq $0x00, %r10
  42271. addq %rax, %rcx
  42272. adcq %rdx, %r8
  42273. adcq %rax, %r9
  42274. movq $0x00, %rax
  42275. adcq $0x00, %rax
  42276. subq %r8, %rbx
  42277. sbbq %r9, %rbp
  42278. sbbq %rax, %r11
  42279. sbbq $0x00, %r12
  42280. sbbq $0x00, %r13
  42281. sbbq $0x00, %r14
  42282. sbbq $0x00, %r10
  42283. # Subtract mod if carry
  42284. negq %r10
  42285. movq $0xfffffffffffffffe, %r9
  42286. movq %r10, %rcx
  42287. movq %r10, %r8
  42288. shrq $32, %rcx
  42289. shlq $32, %r8
  42290. andq %r10, %r9
  42291. subq %rcx, %rbx
  42292. sbbq %r8, %rbp
  42293. sbbq %r9, %r11
  42294. sbbq %r10, %r12
  42295. sbbq %r10, %r13
  42296. sbbq %r10, %r14
  42297. movq %rbx, (%rdi)
  42298. movq %rbp, 8(%rdi)
  42299. movq %r11, 16(%rdi)
  42300. movq %r12, 24(%rdi)
  42301. movq %r13, 32(%rdi)
  42302. movq %r14, 40(%rdi)
  42303. popq %rbp
  42304. popq %rbx
  42305. popq %r15
  42306. popq %r14
  42307. popq %r13
  42308. popq %r12
  42309. repz retq
  42310. #ifndef __APPLE__
  42311. .size sp_384_mont_reduce_6,.-sp_384_mont_reduce_6
  42312. #endif /* __APPLE__ */
  42313. #endif /* HAVE_INTEL_AVX2 */
  42314. /* Reduce the number back to 384 bits using Montgomery reduction.
  42315. *
  42316. * a A single precision number to reduce in place.
  42317. * m The single precision number representing the modulus.
  42318. * mp The digit representing the negative inverse of m mod 2^n.
  42319. */
  42320. #ifndef __APPLE__
  42321. .text
  42322. .globl sp_384_mont_reduce_order_6
  42323. .type sp_384_mont_reduce_order_6,@function
  42324. .align 16
  42325. sp_384_mont_reduce_order_6:
  42326. #else
  42327. .section __TEXT,__text
  42328. .globl _sp_384_mont_reduce_order_6
  42329. .p2align 4
  42330. _sp_384_mont_reduce_order_6:
  42331. #endif /* __APPLE__ */
  42332. pushq %r12
  42333. pushq %r13
  42334. pushq %r14
  42335. pushq %r15
  42336. movq %rdx, %rcx
  42337. xorq %r15, %r15
  42338. # i = 6
  42339. movq $6, %r8
  42340. movq (%rdi), %r13
  42341. movq 8(%rdi), %r14
  42342. L_mont_loop_order_6:
  42343. # mu = a[i] * mp
  42344. movq %r13, %r11
  42345. imulq %rcx, %r11
  42346. # a[i+0] += m[0] * mu
  42347. movq %r11, %rax
  42348. xorq %r10, %r10
  42349. mulq (%rsi)
  42350. addq %rax, %r13
  42351. adcq %rdx, %r10
  42352. # a[i+1] += m[1] * mu
  42353. movq %r11, %rax
  42354. xorq %r9, %r9
  42355. mulq 8(%rsi)
  42356. movq %r14, %r13
  42357. addq %rax, %r13
  42358. adcq %rdx, %r9
  42359. addq %r10, %r13
  42360. adcq $0x00, %r9
  42361. # a[i+2] += m[2] * mu
  42362. movq %r11, %rax
  42363. xorq %r10, %r10
  42364. mulq 16(%rsi)
  42365. movq 16(%rdi), %r14
  42366. addq %rax, %r14
  42367. adcq %rdx, %r10
  42368. addq %r9, %r14
  42369. adcq $0x00, %r10
  42370. # a[i+3] += m[3] * mu
  42371. movq %r11, %rax
  42372. xorq %r9, %r9
  42373. mulq 24(%rsi)
  42374. movq 24(%rdi), %r12
  42375. addq %rax, %r12
  42376. adcq %rdx, %r9
  42377. addq %r10, %r12
  42378. movq %r12, 24(%rdi)
  42379. adcq $0x00, %r9
  42380. # a[i+4] += m[4] * mu
  42381. movq %r11, %rax
  42382. xorq %r10, %r10
  42383. mulq 32(%rsi)
  42384. movq 32(%rdi), %r12
  42385. addq %rax, %r12
  42386. adcq %rdx, %r10
  42387. addq %r9, %r12
  42388. movq %r12, 32(%rdi)
  42389. adcq $0x00, %r10
  42390. # a[i+5] += m[5] * mu
  42391. movq %r11, %rax
  42392. mulq 40(%rsi)
  42393. movq 40(%rdi), %r12
  42394. addq %rax, %r10
  42395. adcq %r15, %rdx
  42396. movq $0x00, %r15
  42397. adcq $0x00, %r15
  42398. addq %r10, %r12
  42399. movq %r12, 40(%rdi)
  42400. adcq %rdx, 48(%rdi)
  42401. adcq $0x00, %r15
  42402. # i -= 1
  42403. addq $8, %rdi
  42404. decq %r8
  42405. jnz L_mont_loop_order_6
  42406. movq %r13, (%rdi)
  42407. movq %r14, 8(%rdi)
  42408. negq %r15
  42409. movq %r15, %rcx
  42410. movq %rsi, %rdx
  42411. movq %rdi, %rsi
  42412. movq %rdi, %rdi
  42413. subq $48, %rdi
  42414. #ifndef __APPLE__
  42415. callq sp_384_cond_sub_6@plt
  42416. #else
  42417. callq _sp_384_cond_sub_6
  42418. #endif /* __APPLE__ */
  42419. popq %r15
  42420. popq %r14
  42421. popq %r13
  42422. popq %r12
  42423. repz retq
  42424. #ifndef __APPLE__
  42425. .size sp_384_mont_reduce_order_6,.-sp_384_mont_reduce_order_6
  42426. #endif /* __APPLE__ */
  42427. /* Square a and put result in r. (r = a * a)
  42428. *
  42429. * r A single precision integer.
  42430. * a A single precision integer.
  42431. */
  42432. #ifndef __APPLE__
  42433. .text
  42434. .globl sp_384_sqr_6
  42435. .type sp_384_sqr_6,@function
  42436. .align 16
  42437. sp_384_sqr_6:
  42438. #else
  42439. .section __TEXT,__text
  42440. .globl _sp_384_sqr_6
  42441. .p2align 4
  42442. _sp_384_sqr_6:
  42443. #endif /* __APPLE__ */
  42444. pushq %r12
  42445. subq $48, %rsp
  42446. # A[0] * A[0]
  42447. movq (%rsi), %rax
  42448. mulq %rax
  42449. xorq %r9, %r9
  42450. movq %rax, (%rsp)
  42451. movq %rdx, %r8
  42452. # A[0] * A[1]
  42453. movq 8(%rsi), %rax
  42454. mulq (%rsi)
  42455. xorq %rcx, %rcx
  42456. addq %rax, %r8
  42457. adcq %rdx, %r9
  42458. adcq $0x00, %rcx
  42459. addq %rax, %r8
  42460. adcq %rdx, %r9
  42461. adcq $0x00, %rcx
  42462. movq %r8, 8(%rsp)
  42463. # A[0] * A[2]
  42464. movq 16(%rsi), %rax
  42465. mulq (%rsi)
  42466. xorq %r8, %r8
  42467. addq %rax, %r9
  42468. adcq %rdx, %rcx
  42469. adcq $0x00, %r8
  42470. addq %rax, %r9
  42471. adcq %rdx, %rcx
  42472. adcq $0x00, %r8
  42473. # A[1] * A[1]
  42474. movq 8(%rsi), %rax
  42475. mulq %rax
  42476. addq %rax, %r9
  42477. adcq %rdx, %rcx
  42478. adcq $0x00, %r8
  42479. movq %r9, 16(%rsp)
  42480. # A[0] * A[3]
  42481. movq 24(%rsi), %rax
  42482. mulq (%rsi)
  42483. xorq %r9, %r9
  42484. addq %rax, %rcx
  42485. adcq %rdx, %r8
  42486. adcq $0x00, %r9
  42487. addq %rax, %rcx
  42488. adcq %rdx, %r8
  42489. adcq $0x00, %r9
  42490. # A[1] * A[2]
  42491. movq 16(%rsi), %rax
  42492. mulq 8(%rsi)
  42493. addq %rax, %rcx
  42494. adcq %rdx, %r8
  42495. adcq $0x00, %r9
  42496. addq %rax, %rcx
  42497. adcq %rdx, %r8
  42498. adcq $0x00, %r9
  42499. movq %rcx, 24(%rsp)
  42500. # A[0] * A[4]
  42501. movq 32(%rsi), %rax
  42502. mulq (%rsi)
  42503. xorq %rcx, %rcx
  42504. addq %rax, %r8
  42505. adcq %rdx, %r9
  42506. adcq $0x00, %rcx
  42507. addq %rax, %r8
  42508. adcq %rdx, %r9
  42509. adcq $0x00, %rcx
  42510. # A[1] * A[3]
  42511. movq 24(%rsi), %rax
  42512. mulq 8(%rsi)
  42513. addq %rax, %r8
  42514. adcq %rdx, %r9
  42515. adcq $0x00, %rcx
  42516. addq %rax, %r8
  42517. adcq %rdx, %r9
  42518. adcq $0x00, %rcx
  42519. # A[2] * A[2]
  42520. movq 16(%rsi), %rax
  42521. mulq %rax
  42522. addq %rax, %r8
  42523. adcq %rdx, %r9
  42524. adcq $0x00, %rcx
  42525. movq %r8, 32(%rsp)
  42526. # A[0] * A[5]
  42527. movq 40(%rsi), %rax
  42528. mulq (%rsi)
  42529. xorq %r8, %r8
  42530. xorq %r12, %r12
  42531. movq %rax, %r10
  42532. movq %rdx, %r11
  42533. # A[1] * A[4]
  42534. movq 32(%rsi), %rax
  42535. mulq 8(%rsi)
  42536. addq %rax, %r10
  42537. adcq %rdx, %r11
  42538. adcq $0x00, %r12
  42539. # A[2] * A[3]
  42540. movq 24(%rsi), %rax
  42541. mulq 16(%rsi)
  42542. addq %rax, %r10
  42543. adcq %rdx, %r11
  42544. adcq $0x00, %r12
  42545. addq %r10, %r10
  42546. adcq %r11, %r11
  42547. adcq %r12, %r12
  42548. addq %r10, %r9
  42549. adcq %r11, %rcx
  42550. adcq %r12, %r8
  42551. movq %r9, 40(%rsp)
  42552. # A[1] * A[5]
  42553. movq 40(%rsi), %rax
  42554. mulq 8(%rsi)
  42555. xorq %r9, %r9
  42556. addq %rax, %rcx
  42557. adcq %rdx, %r8
  42558. adcq $0x00, %r9
  42559. addq %rax, %rcx
  42560. adcq %rdx, %r8
  42561. adcq $0x00, %r9
  42562. # A[2] * A[4]
  42563. movq 32(%rsi), %rax
  42564. mulq 16(%rsi)
  42565. addq %rax, %rcx
  42566. adcq %rdx, %r8
  42567. adcq $0x00, %r9
  42568. addq %rax, %rcx
  42569. adcq %rdx, %r8
  42570. adcq $0x00, %r9
  42571. # A[3] * A[3]
  42572. movq 24(%rsi), %rax
  42573. mulq %rax
  42574. addq %rax, %rcx
  42575. adcq %rdx, %r8
  42576. adcq $0x00, %r9
  42577. movq %rcx, 48(%rdi)
  42578. # A[2] * A[5]
  42579. movq 40(%rsi), %rax
  42580. mulq 16(%rsi)
  42581. xorq %rcx, %rcx
  42582. addq %rax, %r8
  42583. adcq %rdx, %r9
  42584. adcq $0x00, %rcx
  42585. addq %rax, %r8
  42586. adcq %rdx, %r9
  42587. adcq $0x00, %rcx
  42588. # A[3] * A[4]
  42589. movq 32(%rsi), %rax
  42590. mulq 24(%rsi)
  42591. addq %rax, %r8
  42592. adcq %rdx, %r9
  42593. adcq $0x00, %rcx
  42594. addq %rax, %r8
  42595. adcq %rdx, %r9
  42596. adcq $0x00, %rcx
  42597. movq %r8, 56(%rdi)
  42598. # A[3] * A[5]
  42599. movq 40(%rsi), %rax
  42600. mulq 24(%rsi)
  42601. xorq %r8, %r8
  42602. addq %rax, %r9
  42603. adcq %rdx, %rcx
  42604. adcq $0x00, %r8
  42605. addq %rax, %r9
  42606. adcq %rdx, %rcx
  42607. adcq $0x00, %r8
  42608. # A[4] * A[4]
  42609. movq 32(%rsi), %rax
  42610. mulq %rax
  42611. addq %rax, %r9
  42612. adcq %rdx, %rcx
  42613. adcq $0x00, %r8
  42614. movq %r9, 64(%rdi)
  42615. # A[4] * A[5]
  42616. movq 40(%rsi), %rax
  42617. mulq 32(%rsi)
  42618. xorq %r9, %r9
  42619. addq %rax, %rcx
  42620. adcq %rdx, %r8
  42621. adcq $0x00, %r9
  42622. addq %rax, %rcx
  42623. adcq %rdx, %r8
  42624. adcq $0x00, %r9
  42625. movq %rcx, 72(%rdi)
  42626. # A[5] * A[5]
  42627. movq 40(%rsi), %rax
  42628. mulq %rax
  42629. addq %rax, %r8
  42630. adcq %rdx, %r9
  42631. movq %r8, 80(%rdi)
  42632. movq %r9, 88(%rdi)
  42633. movq (%rsp), %rax
  42634. movq 8(%rsp), %rdx
  42635. movq 16(%rsp), %r10
  42636. movq 24(%rsp), %r11
  42637. movq %rax, (%rdi)
  42638. movq %rdx, 8(%rdi)
  42639. movq %r10, 16(%rdi)
  42640. movq %r11, 24(%rdi)
  42641. movq 32(%rsp), %rax
  42642. movq 40(%rsp), %rdx
  42643. movq %rax, 32(%rdi)
  42644. movq %rdx, 40(%rdi)
  42645. addq $48, %rsp
  42646. popq %r12
  42647. repz retq
  42648. #ifndef __APPLE__
  42649. .size sp_384_sqr_6,.-sp_384_sqr_6
  42650. #endif /* __APPLE__ */
  42651. /* Compare a with b in constant time.
  42652. *
  42653. * a A single precision integer.
  42654. * b A single precision integer.
  42655. * return -ve, 0 or +ve if a is less than, equal to or greater than b
  42656. * respectively.
  42657. */
  42658. #ifndef __APPLE__
  42659. .text
  42660. .globl sp_384_cmp_6
  42661. .type sp_384_cmp_6,@function
  42662. .align 16
  42663. sp_384_cmp_6:
  42664. #else
  42665. .section __TEXT,__text
  42666. .globl _sp_384_cmp_6
  42667. .p2align 4
  42668. _sp_384_cmp_6:
  42669. #endif /* __APPLE__ */
  42670. xorq %rcx, %rcx
  42671. movq $-1, %rdx
  42672. movq $-1, %rax
  42673. movq $0x01, %r8
  42674. movq 40(%rdi), %r9
  42675. movq 40(%rsi), %r10
  42676. andq %rdx, %r9
  42677. andq %rdx, %r10
  42678. subq %r10, %r9
  42679. cmova %r8, %rax
  42680. cmovc %rdx, %rax
  42681. cmovnz %rcx, %rdx
  42682. movq 32(%rdi), %r9
  42683. movq 32(%rsi), %r10
  42684. andq %rdx, %r9
  42685. andq %rdx, %r10
  42686. subq %r10, %r9
  42687. cmova %r8, %rax
  42688. cmovc %rdx, %rax
  42689. cmovnz %rcx, %rdx
  42690. movq 24(%rdi), %r9
  42691. movq 24(%rsi), %r10
  42692. andq %rdx, %r9
  42693. andq %rdx, %r10
  42694. subq %r10, %r9
  42695. cmova %r8, %rax
  42696. cmovc %rdx, %rax
  42697. cmovnz %rcx, %rdx
  42698. movq 16(%rdi), %r9
  42699. movq 16(%rsi), %r10
  42700. andq %rdx, %r9
  42701. andq %rdx, %r10
  42702. subq %r10, %r9
  42703. cmova %r8, %rax
  42704. cmovc %rdx, %rax
  42705. cmovnz %rcx, %rdx
  42706. movq 8(%rdi), %r9
  42707. movq 8(%rsi), %r10
  42708. andq %rdx, %r9
  42709. andq %rdx, %r10
  42710. subq %r10, %r9
  42711. cmova %r8, %rax
  42712. cmovc %rdx, %rax
  42713. cmovnz %rcx, %rdx
  42714. movq (%rdi), %r9
  42715. movq (%rsi), %r10
  42716. andq %rdx, %r9
  42717. andq %rdx, %r10
  42718. subq %r10, %r9
  42719. cmova %r8, %rax
  42720. cmovc %rdx, %rax
  42721. cmovnz %rcx, %rdx
  42722. xorq %rdx, %rax
  42723. repz retq
  42724. #ifndef __APPLE__
  42725. .size sp_384_cmp_6,.-sp_384_cmp_6
  42726. #endif /* __APPLE__ */
  42727. /* Add b to a into r. (r = a + b)
  42728. *
  42729. * r A single precision integer.
  42730. * a A single precision integer.
  42731. * b A single precision integer.
  42732. */
  42733. #ifndef __APPLE__
  42734. .text
  42735. .globl sp_384_add_6
  42736. .type sp_384_add_6,@function
  42737. .align 16
  42738. sp_384_add_6:
  42739. #else
  42740. .section __TEXT,__text
  42741. .globl _sp_384_add_6
  42742. .p2align 4
  42743. _sp_384_add_6:
  42744. #endif /* __APPLE__ */
  42745. # Add
  42746. movq (%rsi), %rcx
  42747. xorq %rax, %rax
  42748. addq (%rdx), %rcx
  42749. movq 8(%rsi), %r8
  42750. movq %rcx, (%rdi)
  42751. adcq 8(%rdx), %r8
  42752. movq 16(%rsi), %rcx
  42753. movq %r8, 8(%rdi)
  42754. adcq 16(%rdx), %rcx
  42755. movq 24(%rsi), %r8
  42756. movq %rcx, 16(%rdi)
  42757. adcq 24(%rdx), %r8
  42758. movq 32(%rsi), %rcx
  42759. movq %r8, 24(%rdi)
  42760. adcq 32(%rdx), %rcx
  42761. movq 40(%rsi), %r8
  42762. movq %rcx, 32(%rdi)
  42763. adcq 40(%rdx), %r8
  42764. movq %r8, 40(%rdi)
  42765. adcq $0x00, %rax
  42766. repz retq
  42767. #ifndef __APPLE__
  42768. .size sp_384_add_6,.-sp_384_add_6
  42769. #endif /* __APPLE__ */
  42770. /* Add a to a into r. (r = a + a)
  42771. *
  42772. * r A single precision integer.
  42773. * a A single precision integer.
  42774. */
  42775. #ifndef __APPLE__
  42776. .text
  42777. .globl sp_384_dbl_6
  42778. .type sp_384_dbl_6,@function
  42779. .align 16
  42780. sp_384_dbl_6:
  42781. #else
  42782. .section __TEXT,__text
  42783. .globl _sp_384_dbl_6
  42784. .p2align 4
  42785. _sp_384_dbl_6:
  42786. #endif /* __APPLE__ */
  42787. movq (%rsi), %rdx
  42788. xorq %rax, %rax
  42789. addq %rdx, %rdx
  42790. movq 8(%rsi), %rcx
  42791. movq %rdx, (%rdi)
  42792. adcq %rcx, %rcx
  42793. movq 16(%rsi), %rdx
  42794. movq %rcx, 8(%rdi)
  42795. adcq %rdx, %rdx
  42796. movq 24(%rsi), %rcx
  42797. movq %rdx, 16(%rdi)
  42798. adcq %rcx, %rcx
  42799. movq 32(%rsi), %rdx
  42800. movq %rcx, 24(%rdi)
  42801. adcq %rdx, %rdx
  42802. movq 40(%rsi), %rcx
  42803. movq %rdx, 32(%rdi)
  42804. adcq %rcx, %rcx
  42805. movq %rcx, 40(%rdi)
  42806. adcq $0x00, %rax
  42807. repz retq
  42808. #ifndef __APPLE__
  42809. .size sp_384_dbl_6,.-sp_384_dbl_6
  42810. #endif /* __APPLE__ */
  42811. /* Sub b from a into r. (r = a - b)
  42812. *
  42813. * r A single precision integer.
  42814. * a A single precision integer.
  42815. * b A single precision integer.
  42816. */
  42817. #ifndef __APPLE__
  42818. .text
  42819. .globl sp_384_sub_6
  42820. .type sp_384_sub_6,@function
  42821. .align 16
  42822. sp_384_sub_6:
  42823. #else
  42824. .section __TEXT,__text
  42825. .globl _sp_384_sub_6
  42826. .p2align 4
  42827. _sp_384_sub_6:
  42828. #endif /* __APPLE__ */
  42829. pushq %r12
  42830. xorq %rax, %rax
  42831. movq (%rsi), %rcx
  42832. movq 8(%rsi), %r8
  42833. movq 16(%rsi), %r9
  42834. movq 24(%rsi), %r10
  42835. movq 32(%rsi), %r11
  42836. movq 40(%rsi), %r12
  42837. subq (%rdx), %rcx
  42838. sbbq 8(%rdx), %r8
  42839. sbbq 16(%rdx), %r9
  42840. sbbq 24(%rdx), %r10
  42841. sbbq 32(%rdx), %r11
  42842. sbbq 40(%rdx), %r12
  42843. movq %rcx, (%rdi)
  42844. movq %r8, 8(%rdi)
  42845. movq %r9, 16(%rdi)
  42846. movq %r10, 24(%rdi)
  42847. movq %r11, 32(%rdi)
  42848. movq %r12, 40(%rdi)
  42849. sbbq $0x00, %rax
  42850. popq %r12
  42851. repz retq
  42852. #ifndef __APPLE__
  42853. .size sp_384_sub_6,.-sp_384_sub_6
  42854. #endif /* __APPLE__ */
  42855. /* Conditionally add a and b using the mask m.
  42856. * m is -1 to add and 0 when not.
  42857. *
  42858. * r A single precision number representing conditional add result.
  42859. * a A single precision number to add with.
  42860. * b A single precision number to add.
  42861. * m Mask value to apply.
  42862. */
  42863. #ifndef __APPLE__
  42864. .text
  42865. .globl sp_384_cond_add_6
  42866. .type sp_384_cond_add_6,@function
  42867. .align 16
  42868. sp_384_cond_add_6:
  42869. #else
  42870. .section __TEXT,__text
  42871. .globl _sp_384_cond_add_6
  42872. .p2align 4
  42873. _sp_384_cond_add_6:
  42874. #endif /* __APPLE__ */
  42875. subq $48, %rsp
  42876. movq $0x00, %rax
  42877. movq (%rdx), %r8
  42878. movq 8(%rdx), %r9
  42879. andq %rcx, %r8
  42880. andq %rcx, %r9
  42881. movq %r8, (%rsp)
  42882. movq %r9, 8(%rsp)
  42883. movq 16(%rdx), %r8
  42884. movq 24(%rdx), %r9
  42885. andq %rcx, %r8
  42886. andq %rcx, %r9
  42887. movq %r8, 16(%rsp)
  42888. movq %r9, 24(%rsp)
  42889. movq 32(%rdx), %r8
  42890. movq 40(%rdx), %r9
  42891. andq %rcx, %r8
  42892. andq %rcx, %r9
  42893. movq %r8, 32(%rsp)
  42894. movq %r9, 40(%rsp)
  42895. movq (%rsi), %r8
  42896. movq (%rsp), %rdx
  42897. addq %rdx, %r8
  42898. movq 8(%rsi), %r9
  42899. movq 8(%rsp), %rdx
  42900. adcq %rdx, %r9
  42901. movq %r8, (%rdi)
  42902. movq 16(%rsi), %r8
  42903. movq 16(%rsp), %rdx
  42904. adcq %rdx, %r8
  42905. movq %r9, 8(%rdi)
  42906. movq 24(%rsi), %r9
  42907. movq 24(%rsp), %rdx
  42908. adcq %rdx, %r9
  42909. movq %r8, 16(%rdi)
  42910. movq 32(%rsi), %r8
  42911. movq 32(%rsp), %rdx
  42912. adcq %rdx, %r8
  42913. movq %r9, 24(%rdi)
  42914. movq 40(%rsi), %r9
  42915. movq 40(%rsp), %rdx
  42916. adcq %rdx, %r9
  42917. movq %r8, 32(%rdi)
  42918. movq %r9, 40(%rdi)
  42919. adcq $0x00, %rax
  42920. addq $48, %rsp
  42921. repz retq
  42922. #ifndef __APPLE__
  42923. .size sp_384_cond_add_6,.-sp_384_cond_add_6
  42924. #endif /* __APPLE__ */
  42925. /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  42926. *
  42927. * r Result of division by 2.
  42928. * a Number to divide.
  42929. * m Modulus (prime).
  42930. */
  42931. #ifndef __APPLE__
  42932. .text
  42933. .globl sp_384_div2_6
  42934. .type sp_384_div2_6,@function
  42935. .align 16
  42936. sp_384_div2_6:
  42937. #else
  42938. .section __TEXT,__text
  42939. .globl _sp_384_div2_6
  42940. .p2align 4
  42941. _sp_384_div2_6:
  42942. #endif /* __APPLE__ */
  42943. subq $48, %rsp
  42944. movq (%rsi), %rax
  42945. movq %rax, %r11
  42946. andq $0x01, %r11
  42947. negq %r11
  42948. xorq %r10, %r10
  42949. movq (%rdx), %r8
  42950. andq %r11, %r8
  42951. movq %r8, (%rsp)
  42952. movq 8(%rdx), %r8
  42953. andq %r11, %r8
  42954. movq %r8, 8(%rsp)
  42955. movq 16(%rdx), %r8
  42956. andq %r11, %r8
  42957. movq %r8, 16(%rsp)
  42958. movq 24(%rdx), %r8
  42959. andq %r11, %r8
  42960. movq %r8, 24(%rsp)
  42961. movq 32(%rdx), %r8
  42962. andq %r11, %r8
  42963. movq %r8, 32(%rsp)
  42964. movq 40(%rdx), %r8
  42965. andq %r11, %r8
  42966. movq %r8, 40(%rsp)
  42967. addq %rax, (%rsp)
  42968. movq 8(%rsi), %rax
  42969. adcq %rax, 8(%rsp)
  42970. movq 16(%rsi), %rax
  42971. adcq %rax, 16(%rsp)
  42972. movq 24(%rsi), %rax
  42973. adcq %rax, 24(%rsp)
  42974. movq 32(%rsi), %rax
  42975. adcq %rax, 32(%rsp)
  42976. movq 40(%rsi), %rax
  42977. adcq %rax, 40(%rsp)
  42978. adcq $0x00, %r10
  42979. movq (%rsp), %rax
  42980. movq 8(%rsp), %rcx
  42981. shrdq $0x01, %rcx, %rax
  42982. movq %rax, (%rdi)
  42983. movq 16(%rsp), %rax
  42984. shrdq $0x01, %rax, %rcx
  42985. movq %rcx, 8(%rdi)
  42986. movq 24(%rsp), %rcx
  42987. shrdq $0x01, %rcx, %rax
  42988. movq %rax, 16(%rdi)
  42989. movq 32(%rsp), %rax
  42990. shrdq $0x01, %rax, %rcx
  42991. movq %rcx, 24(%rdi)
  42992. movq 40(%rsp), %rcx
  42993. shrdq $0x01, %rcx, %rax
  42994. movq %rax, 32(%rdi)
  42995. shrdq $0x01, %r10, %rcx
  42996. movq %rcx, 40(%rdi)
  42997. addq $48, %rsp
  42998. repz retq
  42999. #ifndef __APPLE__
  43000. .size sp_384_div2_6,.-sp_384_div2_6
  43001. #endif /* __APPLE__ */
  43002. #ifndef WC_NO_CACHE_RESISTANT
  43003. /* Touch each possible point that could be being copied.
  43004. *
  43005. * r Point to copy into.
  43006. * table Table - start of the entires to access
  43007. * idx Index of point to retrieve.
  43008. */
  43009. #ifndef __APPLE__
  43010. .text
  43011. .globl sp_384_get_point_33_6
  43012. .type sp_384_get_point_33_6,@function
  43013. .align 16
  43014. sp_384_get_point_33_6:
  43015. #else
  43016. .section __TEXT,__text
  43017. .globl _sp_384_get_point_33_6
  43018. .p2align 4
  43019. _sp_384_get_point_33_6:
  43020. #endif /* __APPLE__ */
  43021. movq $0x01, %rax
  43022. movd %edx, %xmm13
  43023. addq $0x128, %rsi
  43024. movd %eax, %xmm15
  43025. movq $32, %rax
  43026. pshufd $0x00, %xmm15, %xmm15
  43027. pshufd $0x00, %xmm13, %xmm13
  43028. pxor %xmm14, %xmm14
  43029. pxor %xmm0, %xmm0
  43030. pxor %xmm1, %xmm1
  43031. pxor %xmm2, %xmm2
  43032. pxor %xmm3, %xmm3
  43033. pxor %xmm4, %xmm4
  43034. pxor %xmm5, %xmm5
  43035. movdqa %xmm15, %xmm14
  43036. L_384_get_point_33_6_start:
  43037. movdqa %xmm14, %xmm12
  43038. paddd %xmm15, %xmm14
  43039. pcmpeqd %xmm13, %xmm12
  43040. movdqu (%rsi), %xmm6
  43041. movdqu 16(%rsi), %xmm7
  43042. movdqu 32(%rsi), %xmm8
  43043. movdqu 96(%rsi), %xmm9
  43044. movdqu 112(%rsi), %xmm10
  43045. movdqu 128(%rsi), %xmm11
  43046. addq $0x128, %rsi
  43047. pand %xmm12, %xmm6
  43048. pand %xmm12, %xmm7
  43049. pand %xmm12, %xmm8
  43050. pand %xmm12, %xmm9
  43051. pand %xmm12, %xmm10
  43052. pand %xmm12, %xmm11
  43053. por %xmm6, %xmm0
  43054. por %xmm7, %xmm1
  43055. por %xmm8, %xmm2
  43056. por %xmm9, %xmm3
  43057. por %xmm10, %xmm4
  43058. por %xmm11, %xmm5
  43059. decq %rax
  43060. jnz L_384_get_point_33_6_start
  43061. movdqu %xmm0, (%rdi)
  43062. movdqu %xmm1, 16(%rdi)
  43063. movdqu %xmm2, 32(%rdi)
  43064. movdqu %xmm3, 96(%rdi)
  43065. movdqu %xmm4, 112(%rdi)
  43066. movdqu %xmm5, 128(%rdi)
  43067. movq $0x01, %rax
  43068. movd %edx, %xmm13
  43069. subq $0x2500, %rsi
  43070. movd %eax, %xmm15
  43071. movq $32, %rax
  43072. pshufd $0x00, %xmm15, %xmm15
  43073. pshufd $0x00, %xmm13, %xmm13
  43074. pxor %xmm14, %xmm14
  43075. pxor %xmm0, %xmm0
  43076. pxor %xmm1, %xmm1
  43077. pxor %xmm2, %xmm2
  43078. movdqa %xmm15, %xmm14
  43079. L_384_get_point_33_6_start_2:
  43080. movdqa %xmm14, %xmm12
  43081. paddd %xmm15, %xmm14
  43082. pcmpeqd %xmm13, %xmm12
  43083. movdqu 192(%rsi), %xmm6
  43084. movdqu 208(%rsi), %xmm7
  43085. movdqu 224(%rsi), %xmm8
  43086. addq $0x128, %rsi
  43087. pand %xmm12, %xmm6
  43088. pand %xmm12, %xmm7
  43089. pand %xmm12, %xmm8
  43090. por %xmm6, %xmm0
  43091. por %xmm7, %xmm1
  43092. por %xmm8, %xmm2
  43093. decq %rax
  43094. jnz L_384_get_point_33_6_start_2
  43095. movdqu %xmm0, 192(%rdi)
  43096. movdqu %xmm1, 208(%rdi)
  43097. movdqu %xmm2, 224(%rdi)
  43098. repz retq
  43099. #ifndef __APPLE__
  43100. .size sp_384_get_point_33_6,.-sp_384_get_point_33_6
  43101. #endif /* __APPLE__ */
  43102. /* Touch each possible point that could be being copied.
  43103. *
  43104. * r Point to copy into.
  43105. * table Table - start of the entires to access
  43106. * idx Index of point to retrieve.
  43107. */
  43108. #ifndef __APPLE__
  43109. .text
  43110. .globl sp_384_get_point_33_avx2_6
  43111. .type sp_384_get_point_33_avx2_6,@function
  43112. .align 16
  43113. sp_384_get_point_33_avx2_6:
  43114. #else
  43115. .section __TEXT,__text
  43116. .globl _sp_384_get_point_33_avx2_6
  43117. .p2align 4
  43118. _sp_384_get_point_33_avx2_6:
  43119. #endif /* __APPLE__ */
  43120. movq $0x01, %rax
  43121. movd %edx, %xmm13
  43122. addq $0x128, %rsi
  43123. movd %eax, %xmm15
  43124. movq $32, %rax
  43125. vpxor %ymm14, %ymm14, %ymm14
  43126. vpermd %ymm13, %ymm14, %ymm13
  43127. vpermd %ymm15, %ymm14, %ymm15
  43128. vpxor %ymm0, %ymm0, %ymm0
  43129. vpxor %xmm1, %xmm1, %xmm1
  43130. vpxor %ymm2, %ymm2, %ymm2
  43131. vpxor %xmm3, %xmm3, %xmm3
  43132. vpxor %ymm4, %ymm4, %ymm4
  43133. vpxor %xmm5, %xmm5, %xmm5
  43134. vmovdqa %ymm15, %ymm14
  43135. L_384_get_point_33_avx2_6_start:
  43136. vpcmpeqd %ymm13, %ymm14, %ymm12
  43137. vpaddd %ymm15, %ymm14, %ymm14
  43138. vmovdqu (%rsi), %ymm6
  43139. vmovdqu 32(%rsi), %xmm7
  43140. vmovdqu 96(%rsi), %ymm8
  43141. vmovdqu 128(%rsi), %xmm9
  43142. vmovdqu 192(%rsi), %ymm10
  43143. vmovdqu 224(%rsi), %xmm11
  43144. addq $0x128, %rsi
  43145. vpand %ymm12, %ymm6, %ymm6
  43146. vpand %xmm12, %xmm7, %xmm7
  43147. vpand %ymm12, %ymm8, %ymm8
  43148. vpand %xmm12, %xmm9, %xmm9
  43149. vpand %ymm12, %ymm10, %ymm10
  43150. vpand %xmm12, %xmm11, %xmm11
  43151. vpor %ymm6, %ymm0, %ymm0
  43152. vpor %xmm7, %xmm1, %xmm1
  43153. vpor %ymm8, %ymm2, %ymm2
  43154. vpor %xmm9, %xmm3, %xmm3
  43155. vpor %ymm10, %ymm4, %ymm4
  43156. vpor %xmm11, %xmm5, %xmm5
  43157. decq %rax
  43158. jnz L_384_get_point_33_avx2_6_start
  43159. vmovdqu %ymm0, (%rdi)
  43160. vmovdqu %xmm1, 32(%rdi)
  43161. vmovdqu %ymm2, 96(%rdi)
  43162. vmovdqu %xmm3, 128(%rdi)
  43163. vmovdqu %ymm4, 192(%rdi)
  43164. vmovdqu %xmm5, 224(%rdi)
  43165. repz retq
  43166. #ifndef __APPLE__
  43167. .size sp_384_get_point_33_avx2_6,.-sp_384_get_point_33_avx2_6
  43168. #endif /* __APPLE__ */
  43169. #endif /* !WC_NO_CACHE_RESISTANT */
  43170. /* Multiply a and b into r. (r = a * b)
  43171. *
  43172. * r Result of multiplication.
  43173. * a First number to multiply.
  43174. * b Second number to multiply.
  43175. */
  43176. #ifndef __APPLE__
  43177. .text
  43178. .globl sp_384_mul_avx2_6
  43179. .type sp_384_mul_avx2_6,@function
  43180. .align 16
  43181. sp_384_mul_avx2_6:
  43182. #else
  43183. .section __TEXT,__text
  43184. .globl _sp_384_mul_avx2_6
  43185. .p2align 4
  43186. _sp_384_mul_avx2_6:
  43187. #endif /* __APPLE__ */
  43188. pushq %r12
  43189. pushq %r13
  43190. pushq %r14
  43191. pushq %r15
  43192. pushq %rbx
  43193. movq %rdx, %rax
  43194. subq $40, %rsp
  43195. xorq %rbx, %rbx
  43196. movq (%rsi), %rdx
  43197. # A[0] * B[0]
  43198. mulxq (%rax), %r9, %r10
  43199. # A[0] * B[1]
  43200. mulxq 8(%rax), %rcx, %r11
  43201. adcxq %rcx, %r10
  43202. # A[0] * B[2]
  43203. mulxq 16(%rax), %rcx, %r12
  43204. adcxq %rcx, %r11
  43205. # A[0] * B[3]
  43206. mulxq 24(%rax), %rcx, %r13
  43207. adcxq %rcx, %r12
  43208. # A[0] * B[4]
  43209. mulxq 32(%rax), %rcx, %r14
  43210. adcxq %rcx, %r13
  43211. # A[0] * B[5]
  43212. mulxq 40(%rax), %rcx, %r15
  43213. adcxq %rcx, %r14
  43214. adcxq %rbx, %r15
  43215. movq %r9, (%rsp)
  43216. movq $0x00, %r9
  43217. adcxq %rbx, %r9
  43218. xorq %rbx, %rbx
  43219. movq 8(%rsi), %rdx
  43220. # A[1] * B[0]
  43221. mulxq (%rax), %rcx, %r8
  43222. adcxq %rcx, %r10
  43223. adoxq %r8, %r11
  43224. # A[1] * B[1]
  43225. mulxq 8(%rax), %rcx, %r8
  43226. adcxq %rcx, %r11
  43227. adoxq %r8, %r12
  43228. # A[1] * B[2]
  43229. mulxq 16(%rax), %rcx, %r8
  43230. adcxq %rcx, %r12
  43231. adoxq %r8, %r13
  43232. # A[1] * B[3]
  43233. mulxq 24(%rax), %rcx, %r8
  43234. adcxq %rcx, %r13
  43235. adoxq %r8, %r14
  43236. # A[1] * B[4]
  43237. mulxq 32(%rax), %rcx, %r8
  43238. adcxq %rcx, %r14
  43239. adoxq %r8, %r15
  43240. # A[1] * B[5]
  43241. mulxq 40(%rax), %rcx, %r8
  43242. adcxq %rcx, %r15
  43243. adoxq %r8, %r9
  43244. adcxq %rbx, %r9
  43245. movq %r10, 8(%rsp)
  43246. movq $0x00, %r10
  43247. adcxq %rbx, %r10
  43248. adoxq %rbx, %r10
  43249. xorq %rbx, %rbx
  43250. movq 16(%rsi), %rdx
  43251. # A[2] * B[0]
  43252. mulxq (%rax), %rcx, %r8
  43253. adcxq %rcx, %r11
  43254. adoxq %r8, %r12
  43255. # A[2] * B[1]
  43256. mulxq 8(%rax), %rcx, %r8
  43257. adcxq %rcx, %r12
  43258. adoxq %r8, %r13
  43259. # A[2] * B[2]
  43260. mulxq 16(%rax), %rcx, %r8
  43261. adcxq %rcx, %r13
  43262. adoxq %r8, %r14
  43263. # A[2] * B[3]
  43264. mulxq 24(%rax), %rcx, %r8
  43265. adcxq %rcx, %r14
  43266. adoxq %r8, %r15
  43267. # A[2] * B[4]
  43268. mulxq 32(%rax), %rcx, %r8
  43269. adcxq %rcx, %r15
  43270. adoxq %r8, %r9
  43271. # A[2] * B[5]
  43272. mulxq 40(%rax), %rcx, %r8
  43273. adcxq %rcx, %r9
  43274. adoxq %r8, %r10
  43275. adcxq %rbx, %r10
  43276. movq %r11, 16(%rsp)
  43277. movq $0x00, %r11
  43278. adcxq %rbx, %r11
  43279. adoxq %rbx, %r11
  43280. xorq %rbx, %rbx
  43281. movq 24(%rsi), %rdx
  43282. # A[3] * B[0]
  43283. mulxq (%rax), %rcx, %r8
  43284. adcxq %rcx, %r12
  43285. adoxq %r8, %r13
  43286. # A[3] * B[1]
  43287. mulxq 8(%rax), %rcx, %r8
  43288. adcxq %rcx, %r13
  43289. adoxq %r8, %r14
  43290. # A[3] * B[2]
  43291. mulxq 16(%rax), %rcx, %r8
  43292. adcxq %rcx, %r14
  43293. adoxq %r8, %r15
  43294. # A[3] * B[3]
  43295. mulxq 24(%rax), %rcx, %r8
  43296. adcxq %rcx, %r15
  43297. adoxq %r8, %r9
  43298. # A[3] * B[4]
  43299. mulxq 32(%rax), %rcx, %r8
  43300. adcxq %rcx, %r9
  43301. adoxq %r8, %r10
  43302. # A[3] * B[5]
  43303. mulxq 40(%rax), %rcx, %r8
  43304. adcxq %rcx, %r10
  43305. adoxq %r8, %r11
  43306. adcxq %rbx, %r11
  43307. movq %r12, 24(%rsp)
  43308. movq $0x00, %r12
  43309. adcxq %rbx, %r12
  43310. adoxq %rbx, %r12
  43311. xorq %rbx, %rbx
  43312. movq 32(%rsi), %rdx
  43313. # A[4] * B[0]
  43314. mulxq (%rax), %rcx, %r8
  43315. adcxq %rcx, %r13
  43316. adoxq %r8, %r14
  43317. # A[4] * B[1]
  43318. mulxq 8(%rax), %rcx, %r8
  43319. adcxq %rcx, %r14
  43320. adoxq %r8, %r15
  43321. # A[4] * B[2]
  43322. mulxq 16(%rax), %rcx, %r8
  43323. adcxq %rcx, %r15
  43324. adoxq %r8, %r9
  43325. # A[4] * B[3]
  43326. mulxq 24(%rax), %rcx, %r8
  43327. adcxq %rcx, %r9
  43328. adoxq %r8, %r10
  43329. # A[4] * B[4]
  43330. mulxq 32(%rax), %rcx, %r8
  43331. adcxq %rcx, %r10
  43332. adoxq %r8, %r11
  43333. # A[4] * B[5]
  43334. mulxq 40(%rax), %rcx, %r8
  43335. adcxq %rcx, %r11
  43336. adoxq %r8, %r12
  43337. adcxq %rbx, %r12
  43338. movq %r13, 32(%rsp)
  43339. movq 40(%rsi), %rdx
  43340. # A[5] * B[0]
  43341. mulxq (%rax), %rcx, %r8
  43342. adcxq %rcx, %r14
  43343. adoxq %r8, %r15
  43344. # A[5] * B[1]
  43345. mulxq 8(%rax), %rcx, %r8
  43346. adcxq %rcx, %r15
  43347. adoxq %r8, %r9
  43348. # A[5] * B[2]
  43349. mulxq 16(%rax), %rcx, %r8
  43350. adcxq %rcx, %r9
  43351. adoxq %r8, %r10
  43352. # A[5] * B[3]
  43353. mulxq 24(%rax), %rcx, %r8
  43354. adcxq %rcx, %r10
  43355. adoxq %r8, %r11
  43356. # A[5] * B[4]
  43357. mulxq 32(%rax), %rcx, %r8
  43358. adcxq %rcx, %r11
  43359. adoxq %r8, %r12
  43360. # A[5] * B[5]
  43361. mulxq 40(%rax), %rcx, %r13
  43362. adcxq %rcx, %r12
  43363. adoxq %rbx, %r13
  43364. adcxq %rbx, %r13
  43365. movq %r14, 40(%rdi)
  43366. movq %r15, 48(%rdi)
  43367. movq %r9, 56(%rdi)
  43368. movq %r10, 64(%rdi)
  43369. movq %r11, 72(%rdi)
  43370. movq %r12, 80(%rdi)
  43371. movq %r13, 88(%rdi)
  43372. movq (%rsp), %r9
  43373. movq 8(%rsp), %r10
  43374. movq 16(%rsp), %r11
  43375. movq 24(%rsp), %r12
  43376. movq 32(%rsp), %r13
  43377. movq %r9, (%rdi)
  43378. movq %r10, 8(%rdi)
  43379. movq %r11, 16(%rdi)
  43380. movq %r12, 24(%rdi)
  43381. movq %r13, 32(%rdi)
  43382. addq $40, %rsp
  43383. popq %rbx
  43384. popq %r15
  43385. popq %r14
  43386. popq %r13
  43387. popq %r12
  43388. repz retq
  43389. #ifndef __APPLE__
  43390. .size sp_384_mul_avx2_6,.-sp_384_mul_avx2_6
  43391. #endif /* __APPLE__ */
  43392. #ifdef HAVE_INTEL_AVX2
  43393. /* Reduce the number back to 384 bits using Montgomery reduction.
  43394. *
  43395. * a A single precision number to reduce in place.
  43396. * m The single precision number representing the modulus.
  43397. * mp The digit representing the negative inverse of m mod 2^n.
  43398. */
  43399. #ifndef __APPLE__
  43400. .text
  43401. .globl sp_384_mont_reduce_order_avx2_6
  43402. .type sp_384_mont_reduce_order_avx2_6,@function
  43403. .align 16
  43404. sp_384_mont_reduce_order_avx2_6:
  43405. #else
  43406. .section __TEXT,__text
  43407. .globl _sp_384_mont_reduce_order_avx2_6
  43408. .p2align 4
  43409. _sp_384_mont_reduce_order_avx2_6:
  43410. #endif /* __APPLE__ */
  43411. pushq %r12
  43412. pushq %r13
  43413. movq %rdx, %rax
  43414. xorq %r13, %r13
  43415. movq (%rdi), %r12
  43416. xorq %r11, %r11
  43417. L_mont_loop_order_avx2_6:
  43418. # mu = a[i] * mp
  43419. movq %r12, %rdx
  43420. movq %r12, %r9
  43421. imulq %rax, %rdx
  43422. xorq %r11, %r11
  43423. # a[i+0] += m[0] * mu
  43424. mulxq (%rsi), %rcx, %r8
  43425. movq 8(%rdi), %r12
  43426. adcxq %rcx, %r9
  43427. adoxq %r8, %r12
  43428. # a[i+1] += m[1] * mu
  43429. mulxq 8(%rsi), %rcx, %r8
  43430. movq 16(%rdi), %r9
  43431. adcxq %rcx, %r12
  43432. adoxq %r8, %r9
  43433. # a[i+2] += m[2] * mu
  43434. mulxq 16(%rsi), %rcx, %r8
  43435. movq 24(%rdi), %r10
  43436. adcxq %rcx, %r9
  43437. adoxq %r8, %r10
  43438. movq %r9, 16(%rdi)
  43439. # a[i+3] += m[3] * mu
  43440. mulxq 24(%rsi), %rcx, %r8
  43441. movq 32(%rdi), %r9
  43442. adcxq %rcx, %r10
  43443. adoxq %r8, %r9
  43444. movq %r10, 24(%rdi)
  43445. # a[i+4] += m[4] * mu
  43446. mulxq 32(%rsi), %rcx, %r8
  43447. movq 40(%rdi), %r10
  43448. adcxq %rcx, %r9
  43449. adoxq %r8, %r10
  43450. movq %r9, 32(%rdi)
  43451. # a[i+5] += m[5] * mu
  43452. mulxq 40(%rsi), %rcx, %r8
  43453. movq 48(%rdi), %r9
  43454. adcxq %rcx, %r10
  43455. adoxq %r8, %r9
  43456. movq %r10, 40(%rdi)
  43457. adcxq %r13, %r9
  43458. movq %r9, 48(%rdi)
  43459. movq %r11, %r13
  43460. adoxq %r11, %r13
  43461. adcxq %r11, %r13
  43462. # mu = a[i] * mp
  43463. movq %r12, %rdx
  43464. movq %r12, %r9
  43465. imulq %rax, %rdx
  43466. xorq %r11, %r11
  43467. # a[i+0] += m[0] * mu
  43468. mulxq (%rsi), %rcx, %r8
  43469. movq 16(%rdi), %r12
  43470. adcxq %rcx, %r9
  43471. adoxq %r8, %r12
  43472. # a[i+1] += m[1] * mu
  43473. mulxq 8(%rsi), %rcx, %r8
  43474. movq 24(%rdi), %r9
  43475. adcxq %rcx, %r12
  43476. adoxq %r8, %r9
  43477. # a[i+2] += m[2] * mu
  43478. mulxq 16(%rsi), %rcx, %r8
  43479. movq 32(%rdi), %r10
  43480. adcxq %rcx, %r9
  43481. adoxq %r8, %r10
  43482. movq %r9, 24(%rdi)
  43483. # a[i+3] += m[3] * mu
  43484. mulxq 24(%rsi), %rcx, %r8
  43485. movq 40(%rdi), %r9
  43486. adcxq %rcx, %r10
  43487. adoxq %r8, %r9
  43488. movq %r10, 32(%rdi)
  43489. # a[i+4] += m[4] * mu
  43490. mulxq 32(%rsi), %rcx, %r8
  43491. movq 48(%rdi), %r10
  43492. adcxq %rcx, %r9
  43493. adoxq %r8, %r10
  43494. movq %r9, 40(%rdi)
  43495. # a[i+5] += m[5] * mu
  43496. mulxq 40(%rsi), %rcx, %r8
  43497. movq 56(%rdi), %r9
  43498. adcxq %rcx, %r10
  43499. adoxq %r8, %r9
  43500. movq %r10, 48(%rdi)
  43501. adcxq %r13, %r9
  43502. movq %r9, 56(%rdi)
  43503. movq %r11, %r13
  43504. adoxq %r11, %r13
  43505. adcxq %r11, %r13
  43506. # mu = a[i] * mp
  43507. movq %r12, %rdx
  43508. movq %r12, %r9
  43509. imulq %rax, %rdx
  43510. xorq %r11, %r11
  43511. # a[i+0] += m[0] * mu
  43512. mulxq (%rsi), %rcx, %r8
  43513. movq 24(%rdi), %r12
  43514. adcxq %rcx, %r9
  43515. adoxq %r8, %r12
  43516. # a[i+1] += m[1] * mu
  43517. mulxq 8(%rsi), %rcx, %r8
  43518. movq 32(%rdi), %r9
  43519. adcxq %rcx, %r12
  43520. adoxq %r8, %r9
  43521. # a[i+2] += m[2] * mu
  43522. mulxq 16(%rsi), %rcx, %r8
  43523. movq 40(%rdi), %r10
  43524. adcxq %rcx, %r9
  43525. adoxq %r8, %r10
  43526. movq %r9, 32(%rdi)
  43527. # a[i+3] += m[3] * mu
  43528. mulxq 24(%rsi), %rcx, %r8
  43529. movq 48(%rdi), %r9
  43530. adcxq %rcx, %r10
  43531. adoxq %r8, %r9
  43532. movq %r10, 40(%rdi)
  43533. # a[i+4] += m[4] * mu
  43534. mulxq 32(%rsi), %rcx, %r8
  43535. movq 56(%rdi), %r10
  43536. adcxq %rcx, %r9
  43537. adoxq %r8, %r10
  43538. movq %r9, 48(%rdi)
  43539. # a[i+5] += m[5] * mu
  43540. mulxq 40(%rsi), %rcx, %r8
  43541. movq 64(%rdi), %r9
  43542. adcxq %rcx, %r10
  43543. adoxq %r8, %r9
  43544. movq %r10, 56(%rdi)
  43545. adcxq %r13, %r9
  43546. movq %r9, 64(%rdi)
  43547. movq %r11, %r13
  43548. adoxq %r11, %r13
  43549. adcxq %r11, %r13
  43550. # mu = a[i] * mp
  43551. movq %r12, %rdx
  43552. movq %r12, %r9
  43553. imulq %rax, %rdx
  43554. xorq %r11, %r11
  43555. # a[i+0] += m[0] * mu
  43556. mulxq (%rsi), %rcx, %r8
  43557. movq 32(%rdi), %r12
  43558. adcxq %rcx, %r9
  43559. adoxq %r8, %r12
  43560. # a[i+1] += m[1] * mu
  43561. mulxq 8(%rsi), %rcx, %r8
  43562. movq 40(%rdi), %r9
  43563. adcxq %rcx, %r12
  43564. adoxq %r8, %r9
  43565. # a[i+2] += m[2] * mu
  43566. mulxq 16(%rsi), %rcx, %r8
  43567. movq 48(%rdi), %r10
  43568. adcxq %rcx, %r9
  43569. adoxq %r8, %r10
  43570. movq %r9, 40(%rdi)
  43571. # a[i+3] += m[3] * mu
  43572. mulxq 24(%rsi), %rcx, %r8
  43573. movq 56(%rdi), %r9
  43574. adcxq %rcx, %r10
  43575. adoxq %r8, %r9
  43576. movq %r10, 48(%rdi)
  43577. # a[i+4] += m[4] * mu
  43578. mulxq 32(%rsi), %rcx, %r8
  43579. movq 64(%rdi), %r10
  43580. adcxq %rcx, %r9
  43581. adoxq %r8, %r10
  43582. movq %r9, 56(%rdi)
  43583. # a[i+5] += m[5] * mu
  43584. mulxq 40(%rsi), %rcx, %r8
  43585. movq 72(%rdi), %r9
  43586. adcxq %rcx, %r10
  43587. adoxq %r8, %r9
  43588. movq %r10, 64(%rdi)
  43589. adcxq %r13, %r9
  43590. movq %r9, 72(%rdi)
  43591. movq %r11, %r13
  43592. adoxq %r11, %r13
  43593. adcxq %r11, %r13
  43594. # mu = a[i] * mp
  43595. movq %r12, %rdx
  43596. movq %r12, %r9
  43597. imulq %rax, %rdx
  43598. xorq %r11, %r11
  43599. # a[i+0] += m[0] * mu
  43600. mulxq (%rsi), %rcx, %r8
  43601. movq 40(%rdi), %r12
  43602. adcxq %rcx, %r9
  43603. adoxq %r8, %r12
  43604. # a[i+1] += m[1] * mu
  43605. mulxq 8(%rsi), %rcx, %r8
  43606. movq 48(%rdi), %r9
  43607. adcxq %rcx, %r12
  43608. adoxq %r8, %r9
  43609. # a[i+2] += m[2] * mu
  43610. mulxq 16(%rsi), %rcx, %r8
  43611. movq 56(%rdi), %r10
  43612. adcxq %rcx, %r9
  43613. adoxq %r8, %r10
  43614. movq %r9, 48(%rdi)
  43615. # a[i+3] += m[3] * mu
  43616. mulxq 24(%rsi), %rcx, %r8
  43617. movq 64(%rdi), %r9
  43618. adcxq %rcx, %r10
  43619. adoxq %r8, %r9
  43620. movq %r10, 56(%rdi)
  43621. # a[i+4] += m[4] * mu
  43622. mulxq 32(%rsi), %rcx, %r8
  43623. movq 72(%rdi), %r10
  43624. adcxq %rcx, %r9
  43625. adoxq %r8, %r10
  43626. movq %r9, 64(%rdi)
  43627. # a[i+5] += m[5] * mu
  43628. mulxq 40(%rsi), %rcx, %r8
  43629. movq 80(%rdi), %r9
  43630. adcxq %rcx, %r10
  43631. adoxq %r8, %r9
  43632. movq %r10, 72(%rdi)
  43633. adcxq %r13, %r9
  43634. movq %r9, 80(%rdi)
  43635. movq %r11, %r13
  43636. adoxq %r11, %r13
  43637. adcxq %r11, %r13
  43638. # mu = a[i] * mp
  43639. movq %r12, %rdx
  43640. movq %r12, %r9
  43641. imulq %rax, %rdx
  43642. xorq %r11, %r11
  43643. # a[i+0] += m[0] * mu
  43644. mulxq (%rsi), %rcx, %r8
  43645. movq 48(%rdi), %r12
  43646. adcxq %rcx, %r9
  43647. adoxq %r8, %r12
  43648. # a[i+1] += m[1] * mu
  43649. mulxq 8(%rsi), %rcx, %r8
  43650. movq 56(%rdi), %r9
  43651. adcxq %rcx, %r12
  43652. adoxq %r8, %r9
  43653. # a[i+2] += m[2] * mu
  43654. mulxq 16(%rsi), %rcx, %r8
  43655. movq 64(%rdi), %r10
  43656. adcxq %rcx, %r9
  43657. adoxq %r8, %r10
  43658. movq %r9, 56(%rdi)
  43659. # a[i+3] += m[3] * mu
  43660. mulxq 24(%rsi), %rcx, %r8
  43661. movq 72(%rdi), %r9
  43662. adcxq %rcx, %r10
  43663. adoxq %r8, %r9
  43664. movq %r10, 64(%rdi)
  43665. # a[i+4] += m[4] * mu
  43666. mulxq 32(%rsi), %rcx, %r8
  43667. movq 80(%rdi), %r10
  43668. adcxq %rcx, %r9
  43669. adoxq %r8, %r10
  43670. movq %r9, 72(%rdi)
  43671. # a[i+5] += m[5] * mu
  43672. mulxq 40(%rsi), %rcx, %r8
  43673. movq 88(%rdi), %r9
  43674. adcxq %rcx, %r10
  43675. adoxq %r8, %r9
  43676. movq %r10, 80(%rdi)
  43677. adcxq %r13, %r9
  43678. movq %r9, 88(%rdi)
  43679. movq %r11, %r13
  43680. adoxq %r11, %r13
  43681. adcxq %r11, %r13
  43682. negq %r13
  43683. movq %rdi, %rax
  43684. addq $48, %rdi
  43685. movq (%rsi), %r8
  43686. movq %r12, %rdx
  43687. pextq %r13, %r8, %r8
  43688. subq %r8, %rdx
  43689. movq 8(%rsi), %r8
  43690. movq 8(%rdi), %rcx
  43691. pextq %r13, %r8, %r8
  43692. movq %rdx, (%rax)
  43693. sbbq %r8, %rcx
  43694. movq 16(%rsi), %rdx
  43695. movq 16(%rdi), %r8
  43696. pextq %r13, %rdx, %rdx
  43697. movq %rcx, 8(%rax)
  43698. sbbq %rdx, %r8
  43699. movq 24(%rsi), %rcx
  43700. movq 24(%rdi), %rdx
  43701. pextq %r13, %rcx, %rcx
  43702. movq %r8, 16(%rax)
  43703. sbbq %rcx, %rdx
  43704. movq 32(%rsi), %r8
  43705. movq 32(%rdi), %rcx
  43706. pextq %r13, %r8, %r8
  43707. movq %rdx, 24(%rax)
  43708. sbbq %r8, %rcx
  43709. movq 40(%rsi), %rdx
  43710. movq 40(%rdi), %r8
  43711. pextq %r13, %rdx, %rdx
  43712. movq %rcx, 32(%rax)
  43713. sbbq %rdx, %r8
  43714. movq %r8, 40(%rax)
  43715. popq %r13
  43716. popq %r12
  43717. repz retq
  43718. #ifndef __APPLE__
  43719. .size sp_384_mont_reduce_order_avx2_6,.-sp_384_mont_reduce_order_avx2_6
  43720. #endif /* __APPLE__ */
  43721. #endif /* HAVE_INTEL_AVX2 */
  43722. /* Square a and put result in r. (r = a * a)
  43723. *
  43724. * r Result of squaring.
  43725. * a Number to square in Montogmery form.
  43726. */
  43727. #ifndef __APPLE__
  43728. .text
  43729. .globl sp_384_sqr_avx2_6
  43730. .type sp_384_sqr_avx2_6,@function
  43731. .align 16
  43732. sp_384_sqr_avx2_6:
  43733. #else
  43734. .section __TEXT,__text
  43735. .globl _sp_384_sqr_avx2_6
  43736. .p2align 4
  43737. _sp_384_sqr_avx2_6:
  43738. #endif /* __APPLE__ */
  43739. pushq %r12
  43740. pushq %r13
  43741. pushq %r14
  43742. pushq %r15
  43743. pushq %rbx
  43744. pushq %rbp
  43745. push %rdi
  43746. xorq %rdi, %rdi
  43747. movq (%rsi), %rdx
  43748. movq 8(%rsi), %r15
  43749. movq 16(%rsi), %rbx
  43750. movq 24(%rsi), %rbp
  43751. # Diagonal 0
  43752. # A[1] * A[0]
  43753. mulxq 8(%rsi), %r8, %r9
  43754. # A[2] * A[0]
  43755. mulxq 16(%rsi), %rax, %r10
  43756. adcxq %rax, %r9
  43757. # A[3] * A[0]
  43758. mulxq 24(%rsi), %rax, %r11
  43759. adcxq %rax, %r10
  43760. # A[4] * A[0]
  43761. mulxq 32(%rsi), %rax, %r12
  43762. adcxq %rax, %r11
  43763. # A[5] * A[0]
  43764. mulxq 40(%rsi), %rax, %r13
  43765. adcxq %rax, %r12
  43766. adcxq %rdi, %r13
  43767. # Diagonal 1
  43768. movq %r15, %rdx
  43769. # A[2] * A[1]
  43770. mulxq 16(%rsi), %rax, %rcx
  43771. adcxq %rax, %r10
  43772. adoxq %rcx, %r11
  43773. # A[3] * A[1]
  43774. mulxq 24(%rsi), %rax, %rcx
  43775. adcxq %rax, %r11
  43776. adoxq %rcx, %r12
  43777. # A[4] * A[1]
  43778. mulxq 32(%rsi), %rax, %rcx
  43779. adcxq %rax, %r12
  43780. adoxq %rcx, %r13
  43781. # A[5] * A[1]
  43782. mulxq 40(%rsi), %rax, %r14
  43783. adcxq %rax, %r13
  43784. adoxq %rdi, %r14
  43785. movq %rbx, %rdx
  43786. # A[5] * A[2]
  43787. mulxq 40(%rsi), %rax, %r15
  43788. adcxq %rax, %r14
  43789. adoxq %rdi, %r15
  43790. adcxq %rdi, %r15
  43791. adcxq %rdi, %rbx
  43792. # Diagonal 2
  43793. # A[3] * A[2]
  43794. mulxq 24(%rsi), %rax, %rcx
  43795. adcxq %rax, %r12
  43796. adoxq %rcx, %r13
  43797. # A[4] * A[2]
  43798. mulxq 32(%rsi), %rax, %rcx
  43799. adcxq %rax, %r13
  43800. adoxq %rcx, %r14
  43801. movq %rbp, %rdx
  43802. # A[4] * A[3]
  43803. mulxq 32(%rsi), %rax, %rcx
  43804. adcxq %rax, %r14
  43805. adoxq %rcx, %r15
  43806. # A[5] * A[3]
  43807. mulxq 40(%rsi), %rax, %rbx
  43808. adcxq %rax, %r15
  43809. adoxq %rdi, %rbx
  43810. movq 32(%rsi), %rdx
  43811. # A[5] * A[4]
  43812. mulxq 40(%rsi), %rax, %rbp
  43813. adcxq %rax, %rbx
  43814. adoxq %rdi, %rbp
  43815. adcxq %rdi, %rbp
  43816. adcxq %rdi, %rdi
  43817. # Doubling previous result as we add in square words results
  43818. # A[0] * A[0]
  43819. movq (%rsi), %rdx
  43820. mulxq %rdx, %rax, %rcx
  43821. pop %rdx
  43822. movq %rax, (%rdx)
  43823. adoxq %r8, %r8
  43824. push %rdx
  43825. adcxq %rcx, %r8
  43826. # A[1] * A[1]
  43827. movq 8(%rsi), %rdx
  43828. mulxq %rdx, %rax, %rcx
  43829. adoxq %r9, %r9
  43830. adcxq %rax, %r9
  43831. adoxq %r10, %r10
  43832. adcxq %rcx, %r10
  43833. # A[2] * A[2]
  43834. movq 16(%rsi), %rdx
  43835. mulxq %rdx, %rax, %rcx
  43836. adoxq %r11, %r11
  43837. adcxq %rax, %r11
  43838. adoxq %r12, %r12
  43839. adcxq %rcx, %r12
  43840. # A[3] * A[3]
  43841. movq 24(%rsi), %rdx
  43842. mulxq %rdx, %rax, %rcx
  43843. adoxq %r13, %r13
  43844. adcxq %rax, %r13
  43845. adoxq %r14, %r14
  43846. adcxq %rcx, %r14
  43847. # A[4] * A[4]
  43848. movq 32(%rsi), %rdx
  43849. mulxq %rdx, %rax, %rcx
  43850. adoxq %r15, %r15
  43851. adcxq %rax, %r15
  43852. adoxq %rbx, %rbx
  43853. adcxq %rcx, %rbx
  43854. # A[5] * A[5]
  43855. movq 40(%rsi), %rdx
  43856. mulxq %rdx, %rax, %rcx
  43857. adoxq %rbp, %rbp
  43858. adcxq %rax, %rbp
  43859. adcxq %rdi, %rcx
  43860. movq $0x00, %rax
  43861. adoxq %rax, %rcx
  43862. pop %rdi
  43863. movq %r8, 8(%rdi)
  43864. movq %r9, 16(%rdi)
  43865. movq %r10, 24(%rdi)
  43866. movq %r11, 32(%rdi)
  43867. movq %r12, 40(%rdi)
  43868. movq %r13, 48(%rdi)
  43869. movq %r14, 56(%rdi)
  43870. movq %r15, 64(%rdi)
  43871. movq %rbx, 72(%rdi)
  43872. movq %rbp, 80(%rdi)
  43873. movq %rcx, 88(%rdi)
  43874. popq %rbp
  43875. popq %rbx
  43876. popq %r15
  43877. popq %r14
  43878. popq %r13
  43879. popq %r12
  43880. repz retq
  43881. #ifndef __APPLE__
  43882. .size sp_384_sqr_avx2_6,.-sp_384_sqr_avx2_6
  43883. #endif /* __APPLE__ */
  43884. #ifndef WC_NO_CACHE_RESISTANT
  43885. /* Touch each possible entry that could be being copied.
  43886. *
  43887. * r Point to copy into.
  43888. * table Table - start of the entires to access
  43889. * idx Index of entry to retrieve.
  43890. */
  43891. #ifndef __APPLE__
  43892. .text
  43893. .globl sp_384_get_entry_256_6
  43894. .type sp_384_get_entry_256_6,@function
  43895. .align 16
  43896. sp_384_get_entry_256_6:
  43897. #else
  43898. .section __TEXT,__text
  43899. .globl _sp_384_get_entry_256_6
  43900. .p2align 4
  43901. _sp_384_get_entry_256_6:
  43902. #endif /* __APPLE__ */
  43903. movq $0x01, %rax
  43904. movd %edx, %xmm13
  43905. addq $0x60, %rsi
  43906. movd %eax, %xmm15
  43907. movq $0xff, %rax
  43908. pshufd $0x00, %xmm15, %xmm15
  43909. pshufd $0x00, %xmm13, %xmm13
  43910. pxor %xmm14, %xmm14
  43911. pxor %xmm0, %xmm0
  43912. pxor %xmm1, %xmm1
  43913. pxor %xmm2, %xmm2
  43914. pxor %xmm3, %xmm3
  43915. pxor %xmm4, %xmm4
  43916. pxor %xmm5, %xmm5
  43917. movdqa %xmm15, %xmm14
  43918. L_384_get_entry_256_6_start:
  43919. movdqa %xmm14, %xmm12
  43920. paddd %xmm15, %xmm14
  43921. pcmpeqd %xmm13, %xmm12
  43922. movdqa (%rsi), %xmm6
  43923. movdqa 16(%rsi), %xmm7
  43924. movdqa 32(%rsi), %xmm8
  43925. movdqa 48(%rsi), %xmm9
  43926. movdqa 64(%rsi), %xmm10
  43927. movdqa 80(%rsi), %xmm11
  43928. addq $0x60, %rsi
  43929. pand %xmm12, %xmm6
  43930. pand %xmm12, %xmm7
  43931. pand %xmm12, %xmm8
  43932. pand %xmm12, %xmm9
  43933. pand %xmm12, %xmm10
  43934. pand %xmm12, %xmm11
  43935. por %xmm6, %xmm0
  43936. por %xmm7, %xmm1
  43937. por %xmm8, %xmm2
  43938. por %xmm9, %xmm3
  43939. por %xmm10, %xmm4
  43940. por %xmm11, %xmm5
  43941. decq %rax
  43942. jnz L_384_get_entry_256_6_start
  43943. movdqu %xmm0, (%rdi)
  43944. movdqu %xmm1, 16(%rdi)
  43945. movdqu %xmm2, 32(%rdi)
  43946. movdqu %xmm3, 96(%rdi)
  43947. movdqu %xmm4, 112(%rdi)
  43948. movdqu %xmm5, 128(%rdi)
  43949. repz retq
  43950. #ifndef __APPLE__
  43951. .size sp_384_get_entry_256_6,.-sp_384_get_entry_256_6
  43952. #endif /* __APPLE__ */
  43953. /* Touch each possible entry that could be being copied.
  43954. *
  43955. * r Point to copy into.
  43956. * table Table - start of the entires to access
  43957. * idx Index of entry to retrieve.
  43958. */
  43959. #ifndef __APPLE__
  43960. .text
  43961. .globl sp_384_get_entry_256_avx2_6
  43962. .type sp_384_get_entry_256_avx2_6,@function
  43963. .align 16
  43964. sp_384_get_entry_256_avx2_6:
  43965. #else
  43966. .section __TEXT,__text
  43967. .globl _sp_384_get_entry_256_avx2_6
  43968. .p2align 4
  43969. _sp_384_get_entry_256_avx2_6:
  43970. #endif /* __APPLE__ */
  43971. movq $0x01, %rax
  43972. movd %edx, %xmm9
  43973. addq $0x60, %rsi
  43974. movd %eax, %xmm11
  43975. movq $0x100, %rax
  43976. vpxor %ymm10, %ymm10, %ymm10
  43977. vpermd %ymm9, %ymm10, %ymm9
  43978. vpermd %ymm11, %ymm10, %ymm11
  43979. vpxor %ymm0, %ymm0, %ymm0
  43980. vpxor %xmm1, %xmm1, %xmm1
  43981. vpxor %ymm2, %ymm2, %ymm2
  43982. vpxor %xmm3, %xmm3, %xmm3
  43983. vmovdqa %ymm11, %ymm10
  43984. L_384_get_entry_256_avx2_6_start:
  43985. vpcmpeqd %ymm9, %ymm10, %ymm8
  43986. vpaddd %ymm11, %ymm10, %ymm10
  43987. vmovdqu (%rsi), %ymm4
  43988. vmovdqu 32(%rsi), %xmm5
  43989. vmovdqu 48(%rsi), %ymm6
  43990. vmovdqu 80(%rsi), %xmm7
  43991. addq $0x60, %rsi
  43992. vpand %ymm8, %ymm4, %ymm4
  43993. vpand %xmm8, %xmm5, %xmm5
  43994. vpand %ymm8, %ymm6, %ymm6
  43995. vpand %xmm8, %xmm7, %xmm7
  43996. vpor %ymm4, %ymm0, %ymm0
  43997. vpor %xmm5, %xmm1, %xmm1
  43998. vpor %ymm6, %ymm2, %ymm2
  43999. vpor %xmm7, %xmm3, %xmm3
  44000. decq %rax
  44001. jnz L_384_get_entry_256_avx2_6_start
  44002. vmovdqu %ymm0, (%rdi)
  44003. vmovdqu %xmm1, 32(%rdi)
  44004. vmovdqu %ymm2, 96(%rdi)
  44005. vmovdqu %xmm3, 128(%rdi)
  44006. repz retq
  44007. #ifndef __APPLE__
  44008. .size sp_384_get_entry_256_avx2_6,.-sp_384_get_entry_256_avx2_6
  44009. #endif /* __APPLE__ */
  44010. #endif /* !WC_NO_CACHE_RESISTANT */
  44011. /* Add 1 to a. (a = a + 1)
  44012. *
  44013. * a A single precision integer.
  44014. */
  44015. #ifndef __APPLE__
  44016. .text
  44017. .globl sp_384_add_one_6
  44018. .type sp_384_add_one_6,@function
  44019. .align 16
  44020. sp_384_add_one_6:
  44021. #else
  44022. .section __TEXT,__text
  44023. .globl _sp_384_add_one_6
  44024. .p2align 4
  44025. _sp_384_add_one_6:
  44026. #endif /* __APPLE__ */
  44027. addq $0x01, (%rdi)
  44028. adcq $0x00, 8(%rdi)
  44029. adcq $0x00, 16(%rdi)
  44030. adcq $0x00, 24(%rdi)
  44031. adcq $0x00, 32(%rdi)
  44032. adcq $0x00, 40(%rdi)
  44033. repz retq
  44034. #ifndef __APPLE__
  44035. .size sp_384_add_one_6,.-sp_384_add_one_6
  44036. #endif /* __APPLE__ */
  44037. /* Read big endian unsigned byte array into r.
  44038. * Uses the bswap instruction.
  44039. *
  44040. * r A single precision integer.
  44041. * size Maximum number of bytes to convert
  44042. * a Byte array.
  44043. * n Number of bytes in array to read.
  44044. */
  44045. #ifndef __APPLE__
  44046. .text
  44047. .globl sp_384_from_bin_bswap
  44048. .type sp_384_from_bin_bswap,@function
  44049. .align 16
  44050. sp_384_from_bin_bswap:
  44051. #else
  44052. .section __TEXT,__text
  44053. .globl _sp_384_from_bin_bswap
  44054. .p2align 4
  44055. _sp_384_from_bin_bswap:
  44056. #endif /* __APPLE__ */
  44057. movq %rdx, %r9
  44058. movq %rdi, %r10
  44059. addq %rcx, %r9
  44060. addq $48, %r10
  44061. xorq %r11, %r11
  44062. jmp L_384_from_bin_bswap_64_end
  44063. L_384_from_bin_bswap_64_start:
  44064. subq $0x40, %r9
  44065. movq 56(%r9), %rax
  44066. movq 48(%r9), %r8
  44067. bswapq %rax
  44068. bswapq %r8
  44069. movq %rax, (%rdi)
  44070. movq %r8, 8(%rdi)
  44071. movq 40(%r9), %rax
  44072. movq 32(%r9), %r8
  44073. bswapq %rax
  44074. bswapq %r8
  44075. movq %rax, 16(%rdi)
  44076. movq %r8, 24(%rdi)
  44077. movq 24(%r9), %rax
  44078. movq 16(%r9), %r8
  44079. bswapq %rax
  44080. bswapq %r8
  44081. movq %rax, 32(%rdi)
  44082. movq %r8, 40(%rdi)
  44083. movq 8(%r9), %rax
  44084. movq (%r9), %r8
  44085. bswapq %rax
  44086. bswapq %r8
  44087. movq %rax, 48(%rdi)
  44088. movq %r8, 56(%rdi)
  44089. addq $0x40, %rdi
  44090. subq $0x40, %rcx
  44091. L_384_from_bin_bswap_64_end:
  44092. cmpq $63, %rcx
  44093. jg L_384_from_bin_bswap_64_start
  44094. jmp L_384_from_bin_bswap_8_end
  44095. L_384_from_bin_bswap_8_start:
  44096. subq $8, %r9
  44097. movq (%r9), %rax
  44098. bswapq %rax
  44099. movq %rax, (%rdi)
  44100. addq $8, %rdi
  44101. subq $8, %rcx
  44102. L_384_from_bin_bswap_8_end:
  44103. cmpq $7, %rcx
  44104. jg L_384_from_bin_bswap_8_start
  44105. cmpq %r11, %rcx
  44106. je L_384_from_bin_bswap_hi_end
  44107. movq %r11, %r8
  44108. movq %r11, %rax
  44109. L_384_from_bin_bswap_hi_start:
  44110. movb (%rdx), %al
  44111. shlq $8, %r8
  44112. incq %rdx
  44113. addq %rax, %r8
  44114. decq %rcx
  44115. jg L_384_from_bin_bswap_hi_start
  44116. movq %r8, (%rdi)
  44117. addq $8, %rdi
  44118. L_384_from_bin_bswap_hi_end:
  44119. cmpq %r10, %rdi
  44120. je L_384_from_bin_bswap_zero_end
  44121. L_384_from_bin_bswap_zero_start:
  44122. movq %r11, (%rdi)
  44123. addq $8, %rdi
  44124. cmpq %r10, %rdi
  44125. jl L_384_from_bin_bswap_zero_start
  44126. L_384_from_bin_bswap_zero_end:
  44127. repz retq
  44128. #ifndef __APPLE__
  44129. .size sp_384_from_bin_bswap,.-sp_384_from_bin_bswap
  44130. #endif /* __APPLE__ */
  44131. /* Read big endian unsigned byte array into r.
  44132. * Uses the movbe instruction which is an optional instruction.
  44133. *
  44134. * r A single precision integer.
  44135. * size Maximum number of bytes to convert
  44136. * a Byte array.
  44137. * n Number of bytes in array to read.
  44138. */
  44139. #ifndef __APPLE__
  44140. .text
  44141. .globl sp_384_from_bin_movbe
  44142. .type sp_384_from_bin_movbe,@function
  44143. .align 16
  44144. sp_384_from_bin_movbe:
  44145. #else
  44146. .section __TEXT,__text
  44147. .globl _sp_384_from_bin_movbe
  44148. .p2align 4
  44149. _sp_384_from_bin_movbe:
  44150. #endif /* __APPLE__ */
  44151. movq %rdx, %r9
  44152. movq %rdi, %r10
  44153. addq %rcx, %r9
  44154. addq $48, %r10
  44155. xorq %r11, %r11
  44156. jmp L_384_from_bin_movbe_64_end
  44157. L_384_from_bin_movbe_64_start:
  44158. subq $0x40, %r9
  44159. movbeq 56(%r9), %rax
  44160. movbeq 48(%r9), %r8
  44161. movq %rax, (%rdi)
  44162. movq %r8, 8(%rdi)
  44163. movbeq 40(%r9), %rax
  44164. movbeq 32(%r9), %r8
  44165. movq %rax, 16(%rdi)
  44166. movq %r8, 24(%rdi)
  44167. movbeq 24(%r9), %rax
  44168. movbeq 16(%r9), %r8
  44169. movq %rax, 32(%rdi)
  44170. movq %r8, 40(%rdi)
  44171. movbeq 8(%r9), %rax
  44172. movbeq (%r9), %r8
  44173. movq %rax, 48(%rdi)
  44174. movq %r8, 56(%rdi)
  44175. addq $0x40, %rdi
  44176. subq $0x40, %rcx
  44177. L_384_from_bin_movbe_64_end:
  44178. cmpq $63, %rcx
  44179. jg L_384_from_bin_movbe_64_start
  44180. jmp L_384_from_bin_movbe_8_end
  44181. L_384_from_bin_movbe_8_start:
  44182. subq $8, %r9
  44183. movbeq (%r9), %rax
  44184. movq %rax, (%rdi)
  44185. addq $8, %rdi
  44186. subq $8, %rcx
  44187. L_384_from_bin_movbe_8_end:
  44188. cmpq $7, %rcx
  44189. jg L_384_from_bin_movbe_8_start
  44190. cmpq %r11, %rcx
  44191. je L_384_from_bin_movbe_hi_end
  44192. movq %r11, %r8
  44193. movq %r11, %rax
  44194. L_384_from_bin_movbe_hi_start:
  44195. movb (%rdx), %al
  44196. shlq $8, %r8
  44197. incq %rdx
  44198. addq %rax, %r8
  44199. decq %rcx
  44200. jg L_384_from_bin_movbe_hi_start
  44201. movq %r8, (%rdi)
  44202. addq $8, %rdi
  44203. L_384_from_bin_movbe_hi_end:
  44204. cmpq %r10, %rdi
  44205. je L_384_from_bin_movbe_zero_end
  44206. L_384_from_bin_movbe_zero_start:
  44207. movq %r11, (%rdi)
  44208. addq $8, %rdi
  44209. cmpq %r10, %rdi
  44210. jl L_384_from_bin_movbe_zero_start
  44211. L_384_from_bin_movbe_zero_end:
  44212. repz retq
  44213. #ifndef __APPLE__
  44214. .size sp_384_from_bin_movbe,.-sp_384_from_bin_movbe
  44215. #endif /* __APPLE__ */
  44216. /* Write r as big endian to byte array.
  44217. * Fixed length number of bytes written: 48
  44218. * Uses the bswap instruction.
  44219. *
  44220. * r A single precision integer.
  44221. * a Byte array.
  44222. */
  44223. #ifndef __APPLE__
  44224. .text
  44225. .globl sp_384_to_bin_bswap
  44226. .type sp_384_to_bin_bswap,@function
  44227. .align 16
  44228. sp_384_to_bin_bswap:
  44229. #else
  44230. .section __TEXT,__text
  44231. .globl _sp_384_to_bin_bswap
  44232. .p2align 4
  44233. _sp_384_to_bin_bswap:
  44234. #endif /* __APPLE__ */
  44235. movq 40(%rdi), %rdx
  44236. movq 32(%rdi), %rax
  44237. bswapq %rdx
  44238. bswapq %rax
  44239. movq %rdx, (%rsi)
  44240. movq %rax, 8(%rsi)
  44241. movq 24(%rdi), %rdx
  44242. movq 16(%rdi), %rax
  44243. bswapq %rdx
  44244. bswapq %rax
  44245. movq %rdx, 16(%rsi)
  44246. movq %rax, 24(%rsi)
  44247. movq 8(%rdi), %rdx
  44248. movq (%rdi), %rax
  44249. bswapq %rdx
  44250. bswapq %rax
  44251. movq %rdx, 32(%rsi)
  44252. movq %rax, 40(%rsi)
  44253. repz retq
  44254. #ifndef __APPLE__
  44255. .size sp_384_to_bin_bswap,.-sp_384_to_bin_bswap
  44256. #endif /* __APPLE__ */
  44257. /* Write r as big endian to byte array.
  44258. * Fixed length number of bytes written: 48
  44259. * Uses the movbe instruction which is optional.
  44260. *
  44261. * r A single precision integer.
  44262. * a Byte array.
  44263. */
  44264. #ifndef __APPLE__
  44265. .text
  44266. .globl sp_384_to_bin_movbe
  44267. .type sp_384_to_bin_movbe,@function
  44268. .align 16
  44269. sp_384_to_bin_movbe:
  44270. #else
  44271. .section __TEXT,__text
  44272. .globl _sp_384_to_bin_movbe
  44273. .p2align 4
  44274. _sp_384_to_bin_movbe:
  44275. #endif /* __APPLE__ */
  44276. movbeq 40(%rdi), %rdx
  44277. movbeq 32(%rdi), %rax
  44278. movq %rdx, (%rsi)
  44279. movq %rax, 8(%rsi)
  44280. movbeq 24(%rdi), %rdx
  44281. movbeq 16(%rdi), %rax
  44282. movq %rdx, 16(%rsi)
  44283. movq %rax, 24(%rsi)
  44284. movbeq 8(%rdi), %rdx
  44285. movbeq (%rdi), %rax
  44286. movq %rdx, 32(%rsi)
  44287. movq %rax, 40(%rsi)
  44288. repz retq
  44289. #ifndef __APPLE__
  44290. .size sp_384_to_bin_movbe,.-sp_384_to_bin_movbe
  44291. #endif /* __APPLE__ */
  44292. /* Sub b from a into a. (a -= b)
  44293. *
  44294. * a A single precision integer and result.
  44295. * b A single precision integer.
  44296. */
  44297. #ifndef __APPLE__
  44298. .text
  44299. .globl sp_384_sub_in_place_6
  44300. .type sp_384_sub_in_place_6,@function
  44301. .align 16
  44302. sp_384_sub_in_place_6:
  44303. #else
  44304. .section __TEXT,__text
  44305. .globl _sp_384_sub_in_place_6
  44306. .p2align 4
  44307. _sp_384_sub_in_place_6:
  44308. #endif /* __APPLE__ */
  44309. xorq %rax, %rax
  44310. movq (%rsi), %rdx
  44311. movq 8(%rsi), %rcx
  44312. movq 16(%rsi), %r8
  44313. movq 24(%rsi), %r9
  44314. movq 32(%rsi), %r10
  44315. movq 40(%rsi), %r11
  44316. subq %rdx, (%rdi)
  44317. sbbq %rcx, 8(%rdi)
  44318. sbbq %r8, 16(%rdi)
  44319. sbbq %r9, 24(%rdi)
  44320. sbbq %r10, 32(%rdi)
  44321. sbbq %r11, 40(%rdi)
  44322. sbbq $0x00, %rax
  44323. repz retq
  44324. #ifndef __APPLE__
  44325. .size sp_384_sub_in_place_6,.-sp_384_sub_in_place_6
  44326. #endif /* __APPLE__ */
  44327. /* Conditionally subtract b from a using the mask m.
  44328. * m is -1 to subtract and 0 when not copying.
  44329. *
  44330. * r A single precision number representing condition subtract result.
  44331. * a A single precision number to subtract from.
  44332. * b A single precision number to subtract.
  44333. * m Mask value to apply.
  44334. */
  44335. #ifndef __APPLE__
  44336. .text
  44337. .globl sp_384_cond_sub_avx2_6
  44338. .type sp_384_cond_sub_avx2_6,@function
  44339. .align 16
  44340. sp_384_cond_sub_avx2_6:
  44341. #else
  44342. .section __TEXT,__text
  44343. .globl _sp_384_cond_sub_avx2_6
  44344. .p2align 4
  44345. _sp_384_cond_sub_avx2_6:
  44346. #endif /* __APPLE__ */
  44347. movq $0x00, %rax
  44348. movq (%rdx), %r10
  44349. movq (%rsi), %r8
  44350. pextq %rcx, %r10, %r10
  44351. subq %r10, %r8
  44352. movq 8(%rdx), %r10
  44353. movq 8(%rsi), %r9
  44354. pextq %rcx, %r10, %r10
  44355. movq %r8, (%rdi)
  44356. sbbq %r10, %r9
  44357. movq 16(%rdx), %r8
  44358. movq 16(%rsi), %r10
  44359. pextq %rcx, %r8, %r8
  44360. movq %r9, 8(%rdi)
  44361. sbbq %r8, %r10
  44362. movq 24(%rdx), %r9
  44363. movq 24(%rsi), %r8
  44364. pextq %rcx, %r9, %r9
  44365. movq %r10, 16(%rdi)
  44366. sbbq %r9, %r8
  44367. movq 32(%rdx), %r10
  44368. movq 32(%rsi), %r9
  44369. pextq %rcx, %r10, %r10
  44370. movq %r8, 24(%rdi)
  44371. sbbq %r10, %r9
  44372. movq 40(%rdx), %r8
  44373. movq 40(%rsi), %r10
  44374. pextq %rcx, %r8, %r8
  44375. movq %r9, 32(%rdi)
  44376. sbbq %r8, %r10
  44377. movq %r10, 40(%rdi)
  44378. sbbq $0x00, %rax
  44379. repz retq
  44380. #ifndef __APPLE__
  44381. .size sp_384_cond_sub_avx2_6,.-sp_384_cond_sub_avx2_6
  44382. #endif /* __APPLE__ */
  44383. /* Mul a by digit b into r. (r = a * b)
  44384. *
  44385. * r A single precision integer.
  44386. * a A single precision integer.
  44387. * b A single precision digit.
  44388. */
  44389. #ifndef __APPLE__
  44390. .text
  44391. .globl sp_384_mul_d_6
  44392. .type sp_384_mul_d_6,@function
  44393. .align 16
  44394. sp_384_mul_d_6:
  44395. #else
  44396. .section __TEXT,__text
  44397. .globl _sp_384_mul_d_6
  44398. .p2align 4
  44399. _sp_384_mul_d_6:
  44400. #endif /* __APPLE__ */
  44401. movq %rdx, %rcx
  44402. # A[0] * B
  44403. movq %rcx, %rax
  44404. xorq %r10, %r10
  44405. mulq (%rsi)
  44406. movq %rax, %r8
  44407. movq %rdx, %r9
  44408. movq %r8, (%rdi)
  44409. # A[1] * B
  44410. movq %rcx, %rax
  44411. xorq %r8, %r8
  44412. mulq 8(%rsi)
  44413. addq %rax, %r9
  44414. movq %r9, 8(%rdi)
  44415. adcq %rdx, %r10
  44416. adcq $0x00, %r8
  44417. # A[2] * B
  44418. movq %rcx, %rax
  44419. xorq %r9, %r9
  44420. mulq 16(%rsi)
  44421. addq %rax, %r10
  44422. movq %r10, 16(%rdi)
  44423. adcq %rdx, %r8
  44424. adcq $0x00, %r9
  44425. # A[3] * B
  44426. movq %rcx, %rax
  44427. xorq %r10, %r10
  44428. mulq 24(%rsi)
  44429. addq %rax, %r8
  44430. movq %r8, 24(%rdi)
  44431. adcq %rdx, %r9
  44432. adcq $0x00, %r10
  44433. # A[4] * B
  44434. movq %rcx, %rax
  44435. xorq %r8, %r8
  44436. mulq 32(%rsi)
  44437. addq %rax, %r9
  44438. movq %r9, 32(%rdi)
  44439. adcq %rdx, %r10
  44440. adcq $0x00, %r8
  44441. # A[5] * B
  44442. movq %rcx, %rax
  44443. mulq 40(%rsi)
  44444. addq %rax, %r10
  44445. adcq %rdx, %r8
  44446. movq %r10, 40(%rdi)
  44447. movq %r8, 48(%rdi)
  44448. repz retq
  44449. #ifndef __APPLE__
  44450. .size sp_384_mul_d_6,.-sp_384_mul_d_6
  44451. #endif /* __APPLE__ */
  44452. #ifdef HAVE_INTEL_AVX2
  44453. /* Mul a by digit b into r. (r = a * b)
  44454. *
  44455. * r A single precision integer.
  44456. * a A single precision integer.
  44457. * b A single precision digit.
  44458. */
  44459. #ifndef __APPLE__
  44460. .text
  44461. .globl sp_384_mul_d_avx2_6
  44462. .type sp_384_mul_d_avx2_6,@function
  44463. .align 16
  44464. sp_384_mul_d_avx2_6:
  44465. #else
  44466. .section __TEXT,__text
  44467. .globl _sp_384_mul_d_avx2_6
  44468. .p2align 4
  44469. _sp_384_mul_d_avx2_6:
  44470. #endif /* __APPLE__ */
  44471. movq %rdx, %rax
  44472. # A[0] * B
  44473. movq %rax, %rdx
  44474. xorq %r11, %r11
  44475. mulxq (%rsi), %r9, %r10
  44476. movq %r9, (%rdi)
  44477. # A[1] * B
  44478. mulxq 8(%rsi), %rcx, %r8
  44479. movq %r11, %r9
  44480. adcxq %rcx, %r10
  44481. movq %r10, 8(%rdi)
  44482. adoxq %r8, %r9
  44483. # A[2] * B
  44484. mulxq 16(%rsi), %rcx, %r8
  44485. movq %r11, %r10
  44486. adcxq %rcx, %r9
  44487. movq %r9, 16(%rdi)
  44488. adoxq %r8, %r10
  44489. # A[3] * B
  44490. mulxq 24(%rsi), %rcx, %r8
  44491. movq %r11, %r9
  44492. adcxq %rcx, %r10
  44493. movq %r10, 24(%rdi)
  44494. adoxq %r8, %r9
  44495. # A[4] * B
  44496. mulxq 32(%rsi), %rcx, %r8
  44497. movq %r11, %r10
  44498. adcxq %rcx, %r9
  44499. movq %r9, 32(%rdi)
  44500. adoxq %r8, %r10
  44501. # A[5] * B
  44502. mulxq 40(%rsi), %rcx, %r8
  44503. movq %r11, %r9
  44504. adcxq %rcx, %r10
  44505. adoxq %r8, %r9
  44506. adcxq %r11, %r9
  44507. movq %r10, 40(%rdi)
  44508. movq %r9, 48(%rdi)
  44509. repz retq
  44510. #ifndef __APPLE__
  44511. .size sp_384_mul_d_avx2_6,.-sp_384_mul_d_avx2_6
  44512. #endif /* __APPLE__ */
  44513. #endif /* HAVE_INTEL_AVX2 */
  44514. /* Shift number right by 1 bit. (r = a >> 1)
  44515. *
  44516. * r Result of right shift by 1.
  44517. * a Number to shift.
  44518. */
  44519. #ifndef __APPLE__
  44520. .text
  44521. .globl sp_384_rshift1_6
  44522. .type sp_384_rshift1_6,@function
  44523. .align 16
  44524. sp_384_rshift1_6:
  44525. #else
  44526. .section __TEXT,__text
  44527. .globl _sp_384_rshift1_6
  44528. .p2align 4
  44529. _sp_384_rshift1_6:
  44530. #endif /* __APPLE__ */
  44531. movq (%rsi), %rdx
  44532. movq 8(%rsi), %rax
  44533. movq 16(%rsi), %rcx
  44534. movq 24(%rsi), %r8
  44535. movq 32(%rsi), %r9
  44536. movq 40(%rsi), %r10
  44537. shrdq $0x01, %rax, %rdx
  44538. shrdq $0x01, %rcx, %rax
  44539. shrdq $0x01, %r8, %rcx
  44540. shrdq $0x01, %r9, %r8
  44541. shrdq $0x01, %r10, %r9
  44542. shrq $0x01, %r10
  44543. movq %rdx, (%rdi)
  44544. movq %rax, 8(%rdi)
  44545. movq %rcx, 16(%rdi)
  44546. movq %r8, 24(%rdi)
  44547. movq %r9, 32(%rdi)
  44548. movq %r10, 40(%rdi)
  44549. repz retq
  44550. #ifndef __APPLE__
  44551. .size sp_384_rshift1_6,.-sp_384_rshift1_6
  44552. #endif /* __APPLE__ */
  44553. /* Divide the number by 2 mod the prime. (r = a / 2 % m)
  44554. *
  44555. * r Result of division by 2.
  44556. * a Number to divide.
  44557. * m Modulus
  44558. */
  44559. #ifndef __APPLE__
  44560. .text
  44561. .globl sp_384_div2_mod_6
  44562. .type sp_384_div2_mod_6,@function
  44563. .align 16
  44564. sp_384_div2_mod_6:
  44565. #else
  44566. .section __TEXT,__text
  44567. .globl _sp_384_div2_mod_6
  44568. .p2align 4
  44569. _sp_384_div2_mod_6:
  44570. #endif /* __APPLE__ */
  44571. pushq %r12
  44572. pushq %r13
  44573. pushq %r14
  44574. pushq %r15
  44575. pushq %rbx
  44576. pushq %rbp
  44577. movq (%rsi), %rax
  44578. movq 8(%rsi), %rcx
  44579. movq 16(%rsi), %r8
  44580. movq 24(%rsi), %r9
  44581. movq 32(%rsi), %r10
  44582. movq 40(%rsi), %r11
  44583. movq (%rdx), %r12
  44584. movq 8(%rdx), %r13
  44585. movq 16(%rdx), %r14
  44586. movq 24(%rdx), %r15
  44587. movq 32(%rdx), %rbx
  44588. movq 40(%rdx), %rbp
  44589. movq %rax, %rdx
  44590. andq $0x01, %rdx
  44591. je L_384_mod_inv_6_div2_mod_no_add
  44592. addq %r12, %rax
  44593. adcq %r13, %rcx
  44594. adcq %r14, %r8
  44595. adcq %r15, %r9
  44596. adcq %rbx, %r10
  44597. adcq %rbp, %r11
  44598. movq $0x00, %rdx
  44599. adcq $0x00, %rdx
  44600. L_384_mod_inv_6_div2_mod_no_add:
  44601. shrdq $0x01, %rcx, %rax
  44602. shrdq $0x01, %r8, %rcx
  44603. shrdq $0x01, %r9, %r8
  44604. shrdq $0x01, %r10, %r9
  44605. shrdq $0x01, %r11, %r10
  44606. shrdq $0x01, %rdx, %r11
  44607. movq %rax, (%rdi)
  44608. movq %rcx, 8(%rdi)
  44609. movq %r8, 16(%rdi)
  44610. movq %r9, 24(%rdi)
  44611. movq %r10, 32(%rdi)
  44612. movq %r11, 40(%rdi)
  44613. popq %rbp
  44614. popq %rbx
  44615. popq %r15
  44616. popq %r14
  44617. popq %r13
  44618. popq %r12
  44619. repz retq
  44620. #ifndef __APPLE__
  44621. .size sp_384_div2_mod_6,.-sp_384_div2_mod_6
  44622. #endif /* __APPLE__ */
  44623. #ifndef __APPLE__
  44624. .text
  44625. .globl sp_384_num_bits_6
  44626. .type sp_384_num_bits_6,@function
  44627. .align 16
  44628. sp_384_num_bits_6:
  44629. #else
  44630. .section __TEXT,__text
  44631. .globl _sp_384_num_bits_6
  44632. .p2align 4
  44633. _sp_384_num_bits_6:
  44634. #endif /* __APPLE__ */
  44635. xorq %rax, %rax
  44636. movq 40(%rdi), %rdx
  44637. cmpq $0x00, %rdx
  44638. je L_384_num_bits_6_end_320
  44639. movq $-1, %rax
  44640. bsr %rdx, %rax
  44641. addq $0x141, %rax
  44642. jmp L_384_num_bits_6_done
  44643. L_384_num_bits_6_end_320:
  44644. movq 32(%rdi), %rdx
  44645. cmpq $0x00, %rdx
  44646. je L_384_num_bits_6_end_256
  44647. movq $-1, %rax
  44648. bsr %rdx, %rax
  44649. addq $0x101, %rax
  44650. jmp L_384_num_bits_6_done
  44651. L_384_num_bits_6_end_256:
  44652. movq 24(%rdi), %rdx
  44653. cmpq $0x00, %rdx
  44654. je L_384_num_bits_6_end_192
  44655. movq $-1, %rax
  44656. bsr %rdx, %rax
  44657. addq $0xc1, %rax
  44658. jmp L_384_num_bits_6_done
  44659. L_384_num_bits_6_end_192:
  44660. movq 16(%rdi), %rdx
  44661. cmpq $0x00, %rdx
  44662. je L_384_num_bits_6_end_128
  44663. movq $-1, %rax
  44664. bsr %rdx, %rax
  44665. addq $0x81, %rax
  44666. jmp L_384_num_bits_6_done
  44667. L_384_num_bits_6_end_128:
  44668. movq 8(%rdi), %rdx
  44669. cmpq $0x00, %rdx
  44670. je L_384_num_bits_6_end_64
  44671. movq $-1, %rax
  44672. bsr %rdx, %rax
  44673. addq $0x41, %rax
  44674. jmp L_384_num_bits_6_done
  44675. L_384_num_bits_6_end_64:
  44676. movq (%rdi), %rdx
  44677. cmpq $0x00, %rdx
  44678. je L_384_num_bits_6_end_0
  44679. movq $-1, %rax
  44680. bsr %rdx, %rax
  44681. addq $0x01, %rax
  44682. jmp L_384_num_bits_6_done
  44683. L_384_num_bits_6_end_0:
  44684. L_384_num_bits_6_done:
  44685. repz retq
  44686. #ifndef __APPLE__
  44687. .size sp_384_num_bits_6,.-sp_384_num_bits_6
  44688. #endif /* __APPLE__ */
  44689. #endif /* WOLFSSL_SP_384 */
  44690. #if defined(__linux__) && defined(__ELF__)
  44691. .section .note.GNU-stack,"",%progbits
  44692. #endif