aes_gcm_asm.asm 497 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791
  1. ; /* aes_gcm_asm.asm */
  2. ; /*
  3. ; * Copyright (C) 2006-2023 wolfSSL Inc.
  4. ; *
  5. ; * This file is part of wolfSSL.
  6. ; *
  7. ; * wolfSSL is free software; you can redistribute it and/or modify
  8. ; * it under the terms of the GNU General Public License as published by
  9. ; * the Free Software Foundation; either version 2 of the License, or
  10. ; * (at your option) any later version.
  11. ; *
  12. ; * wolfSSL is distributed in the hope that it will be useful,
  13. ; * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ; * GNU General Public License for more details.
  16. ; *
  17. ; * You should have received a copy of the GNU General Public License
  18. ; * along with this program; if not, write to the Free Software
  19. ; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. ; */
  21. IF @Version LT 1200
  22. ; AVX2 instructions not recognized by old versions of MASM
  23. IFNDEF NO_AVX2_SUPPORT
  24. NO_AVX2_SUPPORT = 1
  25. ENDIF
  26. ; MOVBE instruction not recognized by old versions of MASM
  27. IFNDEF NO_MOVBE_SUPPORT
  28. NO_MOVBE_SUPPORT = 1
  29. ENDIF
  30. ENDIF
  31. IFNDEF HAVE_INTEL_AVX1
  32. HAVE_INTEL_AVX1 = 1
  33. ENDIF
  34. IFNDEF NO_AVX2_SUPPORT
  35. HAVE_INTEL_AVX2 = 1
  36. ENDIF
  37. IFNDEF _WIN64
  38. _WIN64 = 1
  39. ENDIF
  40. _DATA SEGMENT
  41. ALIGN 16
  42. L_aes_gcm_one QWORD 0, 1
  43. ptr_L_aes_gcm_one QWORD L_aes_gcm_one
  44. _DATA ENDS
  45. _DATA SEGMENT
  46. ALIGN 16
  47. L_aes_gcm_two QWORD 0, 2
  48. ptr_L_aes_gcm_two QWORD L_aes_gcm_two
  49. _DATA ENDS
  50. _DATA SEGMENT
  51. ALIGN 16
  52. L_aes_gcm_three QWORD 0, 3
  53. ptr_L_aes_gcm_three QWORD L_aes_gcm_three
  54. _DATA ENDS
  55. _DATA SEGMENT
  56. ALIGN 16
  57. L_aes_gcm_four QWORD 0, 4
  58. ptr_L_aes_gcm_four QWORD L_aes_gcm_four
  59. _DATA ENDS
  60. _DATA SEGMENT
  61. ALIGN 16
  62. L_aes_gcm_five QWORD 0, 5
  63. ptr_L_aes_gcm_five QWORD L_aes_gcm_five
  64. _DATA ENDS
  65. _DATA SEGMENT
  66. ALIGN 16
  67. L_aes_gcm_six QWORD 0, 6
  68. ptr_L_aes_gcm_six QWORD L_aes_gcm_six
  69. _DATA ENDS
  70. _DATA SEGMENT
  71. ALIGN 16
  72. L_aes_gcm_seven QWORD 0, 7
  73. ptr_L_aes_gcm_seven QWORD L_aes_gcm_seven
  74. _DATA ENDS
  75. _DATA SEGMENT
  76. ALIGN 16
  77. L_aes_gcm_eight QWORD 0, 8
  78. ptr_L_aes_gcm_eight QWORD L_aes_gcm_eight
  79. _DATA ENDS
  80. _DATA SEGMENT
  81. ALIGN 16
  82. L_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567
  83. ptr_L_aes_gcm_bswap_epi64 QWORD L_aes_gcm_bswap_epi64
  84. _DATA ENDS
  85. _DATA SEGMENT
  86. ALIGN 16
  87. L_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183
  88. ptr_L_aes_gcm_bswap_mask QWORD L_aes_gcm_bswap_mask
  89. _DATA ENDS
  90. _DATA SEGMENT
  91. ALIGN 16
  92. L_aes_gcm_mod2_128 QWORD 1, 13979173243358019584
  93. ptr_L_aes_gcm_mod2_128 QWORD L_aes_gcm_mod2_128
  94. _DATA ENDS
  95. _text SEGMENT READONLY PARA
  96. AES_GCM_encrypt_aesni PROC
  97. push r13
  98. push rdi
  99. push rsi
  100. push r12
  101. push rbx
  102. push r14
  103. push r15
  104. mov rdi, rcx
  105. mov rsi, rdx
  106. mov r12, r8
  107. mov rax, r9
  108. mov r8, QWORD PTR [rsp+96]
  109. mov r9d, DWORD PTR [rsp+104]
  110. mov r11d, DWORD PTR [rsp+112]
  111. mov ebx, DWORD PTR [rsp+120]
  112. mov r14d, DWORD PTR [rsp+128]
  113. mov r15, QWORD PTR [rsp+136]
  114. mov r10d, DWORD PTR [rsp+144]
  115. sub rsp, 320
  116. movdqu [rsp+160], xmm6
  117. movdqu [rsp+176], xmm7
  118. movdqu [rsp+192], xmm8
  119. movdqu [rsp+208], xmm9
  120. movdqu [rsp+224], xmm10
  121. movdqu [rsp+240], xmm11
  122. movdqu [rsp+256], xmm12
  123. movdqu [rsp+272], xmm13
  124. movdqu [rsp+288], xmm14
  125. movdqu [rsp+304], xmm15
  126. pxor xmm4, xmm4
  127. pxor xmm6, xmm6
  128. cmp ebx, 12
  129. mov edx, ebx
  130. jne L_AES_GCM_encrypt_aesni_iv_not_12
  131. ; # Calculate values when IV is 12 bytes
  132. ; Set counter based on IV
  133. mov ecx, 16777216
  134. pinsrq xmm4, QWORD PTR [rax], 0
  135. pinsrd xmm4, DWORD PTR [rax+8], 2
  136. pinsrd xmm4, ecx, 3
  137. ; H = Encrypt X(=0) and T = Encrypt counter
  138. movdqa xmm1, xmm4
  139. movdqa xmm5, OWORD PTR [r15]
  140. pxor xmm1, xmm5
  141. movdqa xmm7, OWORD PTR [r15+16]
  142. aesenc xmm5, xmm7
  143. aesenc xmm1, xmm7
  144. movdqa xmm7, OWORD PTR [r15+32]
  145. aesenc xmm5, xmm7
  146. aesenc xmm1, xmm7
  147. movdqa xmm7, OWORD PTR [r15+48]
  148. aesenc xmm5, xmm7
  149. aesenc xmm1, xmm7
  150. movdqa xmm7, OWORD PTR [r15+64]
  151. aesenc xmm5, xmm7
  152. aesenc xmm1, xmm7
  153. movdqa xmm7, OWORD PTR [r15+80]
  154. aesenc xmm5, xmm7
  155. aesenc xmm1, xmm7
  156. movdqa xmm7, OWORD PTR [r15+96]
  157. aesenc xmm5, xmm7
  158. aesenc xmm1, xmm7
  159. movdqa xmm7, OWORD PTR [r15+112]
  160. aesenc xmm5, xmm7
  161. aesenc xmm1, xmm7
  162. movdqa xmm7, OWORD PTR [r15+128]
  163. aesenc xmm5, xmm7
  164. aesenc xmm1, xmm7
  165. movdqa xmm7, OWORD PTR [r15+144]
  166. aesenc xmm5, xmm7
  167. aesenc xmm1, xmm7
  168. cmp r10d, 11
  169. movdqa xmm7, OWORD PTR [r15+160]
  170. jl L_AES_GCM_encrypt_aesni_calc_iv_12_last
  171. aesenc xmm5, xmm7
  172. aesenc xmm1, xmm7
  173. movdqa xmm7, OWORD PTR [r15+176]
  174. aesenc xmm5, xmm7
  175. aesenc xmm1, xmm7
  176. cmp r10d, 13
  177. movdqa xmm7, OWORD PTR [r15+192]
  178. jl L_AES_GCM_encrypt_aesni_calc_iv_12_last
  179. aesenc xmm5, xmm7
  180. aesenc xmm1, xmm7
  181. movdqa xmm7, OWORD PTR [r15+208]
  182. aesenc xmm5, xmm7
  183. aesenc xmm1, xmm7
  184. movdqa xmm7, OWORD PTR [r15+224]
  185. L_AES_GCM_encrypt_aesni_calc_iv_12_last:
  186. aesenclast xmm5, xmm7
  187. aesenclast xmm1, xmm7
  188. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  189. movdqu [rsp+144], xmm1
  190. jmp L_AES_GCM_encrypt_aesni_iv_done
  191. L_AES_GCM_encrypt_aesni_iv_not_12:
  192. ; Calculate values when IV is not 12 bytes
  193. ; H = Encrypt X(=0)
  194. movdqa xmm5, OWORD PTR [r15]
  195. aesenc xmm5, [r15+16]
  196. aesenc xmm5, [r15+32]
  197. aesenc xmm5, [r15+48]
  198. aesenc xmm5, [r15+64]
  199. aesenc xmm5, [r15+80]
  200. aesenc xmm5, [r15+96]
  201. aesenc xmm5, [r15+112]
  202. aesenc xmm5, [r15+128]
  203. aesenc xmm5, [r15+144]
  204. cmp r10d, 11
  205. movdqa xmm9, OWORD PTR [r15+160]
  206. jl L_AES_GCM_encrypt_aesni_calc_iv_1_aesenc_avx_last
  207. aesenc xmm5, xmm9
  208. aesenc xmm5, [r15+176]
  209. cmp r10d, 13
  210. movdqa xmm9, OWORD PTR [r15+192]
  211. jl L_AES_GCM_encrypt_aesni_calc_iv_1_aesenc_avx_last
  212. aesenc xmm5, xmm9
  213. aesenc xmm5, [r15+208]
  214. movdqa xmm9, OWORD PTR [r15+224]
  215. L_AES_GCM_encrypt_aesni_calc_iv_1_aesenc_avx_last:
  216. aesenclast xmm5, xmm9
  217. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  218. ; Calc counter
  219. ; Initialization vector
  220. cmp edx, 0
  221. mov rcx, 0
  222. je L_AES_GCM_encrypt_aesni_calc_iv_done
  223. cmp edx, 16
  224. jl L_AES_GCM_encrypt_aesni_calc_iv_lt16
  225. and edx, 4294967280
  226. L_AES_GCM_encrypt_aesni_calc_iv_16_loop:
  227. movdqu xmm8, [rax+rcx]
  228. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  229. pxor xmm4, xmm8
  230. pshufd xmm1, xmm4, 78
  231. pshufd xmm2, xmm5, 78
  232. movdqa xmm3, xmm5
  233. movdqa xmm0, xmm5
  234. pclmulqdq xmm3, xmm4, 17
  235. pclmulqdq xmm0, xmm4, 0
  236. pxor xmm1, xmm4
  237. pxor xmm2, xmm5
  238. pclmulqdq xmm1, xmm2, 0
  239. pxor xmm1, xmm0
  240. pxor xmm1, xmm3
  241. movdqa xmm2, xmm1
  242. movdqa xmm7, xmm0
  243. movdqa xmm4, xmm3
  244. pslldq xmm2, 8
  245. psrldq xmm1, 8
  246. pxor xmm7, xmm2
  247. pxor xmm4, xmm1
  248. movdqa xmm0, xmm7
  249. movdqa xmm1, xmm4
  250. psrld xmm0, 31
  251. psrld xmm1, 31
  252. pslld xmm7, 1
  253. pslld xmm4, 1
  254. movdqa xmm2, xmm0
  255. pslldq xmm0, 4
  256. psrldq xmm2, 12
  257. pslldq xmm1, 4
  258. por xmm4, xmm2
  259. por xmm7, xmm0
  260. por xmm4, xmm1
  261. movdqa xmm0, xmm7
  262. movdqa xmm1, xmm7
  263. movdqa xmm2, xmm7
  264. pslld xmm0, 31
  265. pslld xmm1, 30
  266. pslld xmm2, 25
  267. pxor xmm0, xmm1
  268. pxor xmm0, xmm2
  269. movdqa xmm1, xmm0
  270. psrldq xmm1, 4
  271. pslldq xmm0, 12
  272. pxor xmm7, xmm0
  273. movdqa xmm2, xmm7
  274. movdqa xmm3, xmm7
  275. movdqa xmm0, xmm7
  276. psrld xmm2, 1
  277. psrld xmm3, 2
  278. psrld xmm0, 7
  279. pxor xmm2, xmm3
  280. pxor xmm2, xmm0
  281. pxor xmm2, xmm1
  282. pxor xmm2, xmm7
  283. pxor xmm4, xmm2
  284. add ecx, 16
  285. cmp ecx, edx
  286. jl L_AES_GCM_encrypt_aesni_calc_iv_16_loop
  287. mov edx, ebx
  288. cmp ecx, edx
  289. je L_AES_GCM_encrypt_aesni_calc_iv_done
  290. L_AES_GCM_encrypt_aesni_calc_iv_lt16:
  291. sub rsp, 16
  292. pxor xmm8, xmm8
  293. xor ebx, ebx
  294. movdqu [rsp], xmm8
  295. L_AES_GCM_encrypt_aesni_calc_iv_loop:
  296. movzx r13d, BYTE PTR [rax+rcx]
  297. mov BYTE PTR [rsp+rbx], r13b
  298. inc ecx
  299. inc ebx
  300. cmp ecx, edx
  301. jl L_AES_GCM_encrypt_aesni_calc_iv_loop
  302. movdqu xmm8, [rsp]
  303. add rsp, 16
  304. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  305. pxor xmm4, xmm8
  306. pshufd xmm1, xmm4, 78
  307. pshufd xmm2, xmm5, 78
  308. movdqa xmm3, xmm5
  309. movdqa xmm0, xmm5
  310. pclmulqdq xmm3, xmm4, 17
  311. pclmulqdq xmm0, xmm4, 0
  312. pxor xmm1, xmm4
  313. pxor xmm2, xmm5
  314. pclmulqdq xmm1, xmm2, 0
  315. pxor xmm1, xmm0
  316. pxor xmm1, xmm3
  317. movdqa xmm2, xmm1
  318. movdqa xmm7, xmm0
  319. movdqa xmm4, xmm3
  320. pslldq xmm2, 8
  321. psrldq xmm1, 8
  322. pxor xmm7, xmm2
  323. pxor xmm4, xmm1
  324. movdqa xmm0, xmm7
  325. movdqa xmm1, xmm4
  326. psrld xmm0, 31
  327. psrld xmm1, 31
  328. pslld xmm7, 1
  329. pslld xmm4, 1
  330. movdqa xmm2, xmm0
  331. pslldq xmm0, 4
  332. psrldq xmm2, 12
  333. pslldq xmm1, 4
  334. por xmm4, xmm2
  335. por xmm7, xmm0
  336. por xmm4, xmm1
  337. movdqa xmm0, xmm7
  338. movdqa xmm1, xmm7
  339. movdqa xmm2, xmm7
  340. pslld xmm0, 31
  341. pslld xmm1, 30
  342. pslld xmm2, 25
  343. pxor xmm0, xmm1
  344. pxor xmm0, xmm2
  345. movdqa xmm1, xmm0
  346. psrldq xmm1, 4
  347. pslldq xmm0, 12
  348. pxor xmm7, xmm0
  349. movdqa xmm2, xmm7
  350. movdqa xmm3, xmm7
  351. movdqa xmm0, xmm7
  352. psrld xmm2, 1
  353. psrld xmm3, 2
  354. psrld xmm0, 7
  355. pxor xmm2, xmm3
  356. pxor xmm2, xmm0
  357. pxor xmm2, xmm1
  358. pxor xmm2, xmm7
  359. pxor xmm4, xmm2
  360. L_AES_GCM_encrypt_aesni_calc_iv_done:
  361. ; T = Encrypt counter
  362. pxor xmm0, xmm0
  363. shl edx, 3
  364. pinsrq xmm0, rdx, 0
  365. pxor xmm4, xmm0
  366. pshufd xmm1, xmm4, 78
  367. pshufd xmm2, xmm5, 78
  368. movdqa xmm3, xmm5
  369. movdqa xmm0, xmm5
  370. pclmulqdq xmm3, xmm4, 17
  371. pclmulqdq xmm0, xmm4, 0
  372. pxor xmm1, xmm4
  373. pxor xmm2, xmm5
  374. pclmulqdq xmm1, xmm2, 0
  375. pxor xmm1, xmm0
  376. pxor xmm1, xmm3
  377. movdqa xmm2, xmm1
  378. movdqa xmm7, xmm0
  379. movdqa xmm4, xmm3
  380. pslldq xmm2, 8
  381. psrldq xmm1, 8
  382. pxor xmm7, xmm2
  383. pxor xmm4, xmm1
  384. movdqa xmm0, xmm7
  385. movdqa xmm1, xmm4
  386. psrld xmm0, 31
  387. psrld xmm1, 31
  388. pslld xmm7, 1
  389. pslld xmm4, 1
  390. movdqa xmm2, xmm0
  391. pslldq xmm0, 4
  392. psrldq xmm2, 12
  393. pslldq xmm1, 4
  394. por xmm4, xmm2
  395. por xmm7, xmm0
  396. por xmm4, xmm1
  397. movdqa xmm0, xmm7
  398. movdqa xmm1, xmm7
  399. movdqa xmm2, xmm7
  400. pslld xmm0, 31
  401. pslld xmm1, 30
  402. pslld xmm2, 25
  403. pxor xmm0, xmm1
  404. pxor xmm0, xmm2
  405. movdqa xmm1, xmm0
  406. psrldq xmm1, 4
  407. pslldq xmm0, 12
  408. pxor xmm7, xmm0
  409. movdqa xmm2, xmm7
  410. movdqa xmm3, xmm7
  411. movdqa xmm0, xmm7
  412. psrld xmm2, 1
  413. psrld xmm3, 2
  414. psrld xmm0, 7
  415. pxor xmm2, xmm3
  416. pxor xmm2, xmm0
  417. pxor xmm2, xmm1
  418. pxor xmm2, xmm7
  419. pxor xmm4, xmm2
  420. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  421. ; Encrypt counter
  422. movdqa xmm8, OWORD PTR [r15]
  423. pxor xmm8, xmm4
  424. aesenc xmm8, [r15+16]
  425. aesenc xmm8, [r15+32]
  426. aesenc xmm8, [r15+48]
  427. aesenc xmm8, [r15+64]
  428. aesenc xmm8, [r15+80]
  429. aesenc xmm8, [r15+96]
  430. aesenc xmm8, [r15+112]
  431. aesenc xmm8, [r15+128]
  432. aesenc xmm8, [r15+144]
  433. cmp r10d, 11
  434. movdqa xmm9, OWORD PTR [r15+160]
  435. jl L_AES_GCM_encrypt_aesni_calc_iv_2_aesenc_avx_last
  436. aesenc xmm8, xmm9
  437. aesenc xmm8, [r15+176]
  438. cmp r10d, 13
  439. movdqa xmm9, OWORD PTR [r15+192]
  440. jl L_AES_GCM_encrypt_aesni_calc_iv_2_aesenc_avx_last
  441. aesenc xmm8, xmm9
  442. aesenc xmm8, [r15+208]
  443. movdqa xmm9, OWORD PTR [r15+224]
  444. L_AES_GCM_encrypt_aesni_calc_iv_2_aesenc_avx_last:
  445. aesenclast xmm8, xmm9
  446. movdqu [rsp+144], xmm8
  447. L_AES_GCM_encrypt_aesni_iv_done:
  448. ; Additional authentication data
  449. mov edx, r11d
  450. cmp edx, 0
  451. je L_AES_GCM_encrypt_aesni_calc_aad_done
  452. xor ecx, ecx
  453. cmp edx, 16
  454. jl L_AES_GCM_encrypt_aesni_calc_aad_lt16
  455. and edx, 4294967280
  456. L_AES_GCM_encrypt_aesni_calc_aad_16_loop:
  457. movdqu xmm8, [r12+rcx]
  458. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  459. pxor xmm6, xmm8
  460. pshufd xmm1, xmm6, 78
  461. pshufd xmm2, xmm5, 78
  462. movdqa xmm3, xmm5
  463. movdqa xmm0, xmm5
  464. pclmulqdq xmm3, xmm6, 17
  465. pclmulqdq xmm0, xmm6, 0
  466. pxor xmm1, xmm6
  467. pxor xmm2, xmm5
  468. pclmulqdq xmm1, xmm2, 0
  469. pxor xmm1, xmm0
  470. pxor xmm1, xmm3
  471. movdqa xmm2, xmm1
  472. movdqa xmm7, xmm0
  473. movdqa xmm6, xmm3
  474. pslldq xmm2, 8
  475. psrldq xmm1, 8
  476. pxor xmm7, xmm2
  477. pxor xmm6, xmm1
  478. movdqa xmm0, xmm7
  479. movdqa xmm1, xmm6
  480. psrld xmm0, 31
  481. psrld xmm1, 31
  482. pslld xmm7, 1
  483. pslld xmm6, 1
  484. movdqa xmm2, xmm0
  485. pslldq xmm0, 4
  486. psrldq xmm2, 12
  487. pslldq xmm1, 4
  488. por xmm6, xmm2
  489. por xmm7, xmm0
  490. por xmm6, xmm1
  491. movdqa xmm0, xmm7
  492. movdqa xmm1, xmm7
  493. movdqa xmm2, xmm7
  494. pslld xmm0, 31
  495. pslld xmm1, 30
  496. pslld xmm2, 25
  497. pxor xmm0, xmm1
  498. pxor xmm0, xmm2
  499. movdqa xmm1, xmm0
  500. psrldq xmm1, 4
  501. pslldq xmm0, 12
  502. pxor xmm7, xmm0
  503. movdqa xmm2, xmm7
  504. movdqa xmm3, xmm7
  505. movdqa xmm0, xmm7
  506. psrld xmm2, 1
  507. psrld xmm3, 2
  508. psrld xmm0, 7
  509. pxor xmm2, xmm3
  510. pxor xmm2, xmm0
  511. pxor xmm2, xmm1
  512. pxor xmm2, xmm7
  513. pxor xmm6, xmm2
  514. add ecx, 16
  515. cmp ecx, edx
  516. jl L_AES_GCM_encrypt_aesni_calc_aad_16_loop
  517. mov edx, r11d
  518. cmp ecx, edx
  519. je L_AES_GCM_encrypt_aesni_calc_aad_done
  520. L_AES_GCM_encrypt_aesni_calc_aad_lt16:
  521. sub rsp, 16
  522. pxor xmm8, xmm8
  523. xor ebx, ebx
  524. movdqu [rsp], xmm8
  525. L_AES_GCM_encrypt_aesni_calc_aad_loop:
  526. movzx r13d, BYTE PTR [r12+rcx]
  527. mov BYTE PTR [rsp+rbx], r13b
  528. inc ecx
  529. inc ebx
  530. cmp ecx, edx
  531. jl L_AES_GCM_encrypt_aesni_calc_aad_loop
  532. movdqu xmm8, [rsp]
  533. add rsp, 16
  534. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  535. pxor xmm6, xmm8
  536. pshufd xmm1, xmm6, 78
  537. pshufd xmm2, xmm5, 78
  538. movdqa xmm3, xmm5
  539. movdqa xmm0, xmm5
  540. pclmulqdq xmm3, xmm6, 17
  541. pclmulqdq xmm0, xmm6, 0
  542. pxor xmm1, xmm6
  543. pxor xmm2, xmm5
  544. pclmulqdq xmm1, xmm2, 0
  545. pxor xmm1, xmm0
  546. pxor xmm1, xmm3
  547. movdqa xmm2, xmm1
  548. movdqa xmm7, xmm0
  549. movdqa xmm6, xmm3
  550. pslldq xmm2, 8
  551. psrldq xmm1, 8
  552. pxor xmm7, xmm2
  553. pxor xmm6, xmm1
  554. movdqa xmm0, xmm7
  555. movdqa xmm1, xmm6
  556. psrld xmm0, 31
  557. psrld xmm1, 31
  558. pslld xmm7, 1
  559. pslld xmm6, 1
  560. movdqa xmm2, xmm0
  561. pslldq xmm0, 4
  562. psrldq xmm2, 12
  563. pslldq xmm1, 4
  564. por xmm6, xmm2
  565. por xmm7, xmm0
  566. por xmm6, xmm1
  567. movdqa xmm0, xmm7
  568. movdqa xmm1, xmm7
  569. movdqa xmm2, xmm7
  570. pslld xmm0, 31
  571. pslld xmm1, 30
  572. pslld xmm2, 25
  573. pxor xmm0, xmm1
  574. pxor xmm0, xmm2
  575. movdqa xmm1, xmm0
  576. psrldq xmm1, 4
  577. pslldq xmm0, 12
  578. pxor xmm7, xmm0
  579. movdqa xmm2, xmm7
  580. movdqa xmm3, xmm7
  581. movdqa xmm0, xmm7
  582. psrld xmm2, 1
  583. psrld xmm3, 2
  584. psrld xmm0, 7
  585. pxor xmm2, xmm3
  586. pxor xmm2, xmm0
  587. pxor xmm2, xmm1
  588. pxor xmm2, xmm7
  589. pxor xmm6, xmm2
  590. L_AES_GCM_encrypt_aesni_calc_aad_done:
  591. ; Calculate counter and H
  592. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
  593. movdqa xmm9, xmm5
  594. paddd xmm4, OWORD PTR L_aes_gcm_one
  595. movdqa xmm8, xmm5
  596. movdqu [rsp+128], xmm4
  597. psrlq xmm9, 63
  598. psllq xmm8, 1
  599. pslldq xmm9, 8
  600. por xmm8, xmm9
  601. pshufd xmm5, xmm5, 255
  602. psrad xmm5, 31
  603. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  604. pxor xmm5, xmm8
  605. xor rbx, rbx
  606. cmp r9d, 128
  607. mov r13d, r9d
  608. jl L_AES_GCM_encrypt_aesni_done_128
  609. and r13d, 4294967168
  610. movdqa xmm2, xmm6
  611. ; H ^ 1
  612. movdqu [rsp], xmm5
  613. ; H ^ 2
  614. pshufd xmm9, xmm5, 78
  615. pshufd xmm10, xmm5, 78
  616. movdqa xmm11, xmm5
  617. movdqa xmm8, xmm5
  618. pclmulqdq xmm11, xmm5, 17
  619. pclmulqdq xmm8, xmm5, 0
  620. pxor xmm9, xmm5
  621. pxor xmm10, xmm5
  622. pclmulqdq xmm9, xmm10, 0
  623. pxor xmm9, xmm8
  624. pxor xmm9, xmm11
  625. movdqa xmm10, xmm9
  626. movdqa xmm0, xmm11
  627. pslldq xmm10, 8
  628. psrldq xmm9, 8
  629. pxor xmm8, xmm10
  630. pxor xmm0, xmm9
  631. movdqa xmm12, xmm8
  632. movdqa xmm13, xmm8
  633. movdqa xmm14, xmm8
  634. pslld xmm12, 31
  635. pslld xmm13, 30
  636. pslld xmm14, 25
  637. pxor xmm12, xmm13
  638. pxor xmm12, xmm14
  639. movdqa xmm13, xmm12
  640. psrldq xmm13, 4
  641. pslldq xmm12, 12
  642. pxor xmm8, xmm12
  643. movdqa xmm14, xmm8
  644. movdqa xmm10, xmm8
  645. movdqa xmm9, xmm8
  646. psrld xmm14, 1
  647. psrld xmm10, 2
  648. psrld xmm9, 7
  649. pxor xmm14, xmm10
  650. pxor xmm14, xmm9
  651. pxor xmm14, xmm13
  652. pxor xmm14, xmm8
  653. pxor xmm0, xmm14
  654. movdqu [rsp+16], xmm0
  655. ; H ^ 3
  656. pshufd xmm9, xmm5, 78
  657. pshufd xmm10, xmm0, 78
  658. movdqa xmm11, xmm0
  659. movdqa xmm8, xmm0
  660. pclmulqdq xmm11, xmm5, 17
  661. pclmulqdq xmm8, xmm5, 0
  662. pxor xmm9, xmm5
  663. pxor xmm10, xmm0
  664. pclmulqdq xmm9, xmm10, 0
  665. pxor xmm9, xmm8
  666. pxor xmm9, xmm11
  667. movdqa xmm10, xmm9
  668. movdqa xmm1, xmm11
  669. pslldq xmm10, 8
  670. psrldq xmm9, 8
  671. pxor xmm8, xmm10
  672. pxor xmm1, xmm9
  673. movdqa xmm12, xmm8
  674. movdqa xmm13, xmm8
  675. movdqa xmm14, xmm8
  676. pslld xmm12, 31
  677. pslld xmm13, 30
  678. pslld xmm14, 25
  679. pxor xmm12, xmm13
  680. pxor xmm12, xmm14
  681. movdqa xmm13, xmm12
  682. psrldq xmm13, 4
  683. pslldq xmm12, 12
  684. pxor xmm8, xmm12
  685. movdqa xmm14, xmm8
  686. movdqa xmm10, xmm8
  687. movdqa xmm9, xmm8
  688. psrld xmm14, 1
  689. psrld xmm10, 2
  690. psrld xmm9, 7
  691. pxor xmm14, xmm10
  692. pxor xmm14, xmm9
  693. pxor xmm14, xmm13
  694. pxor xmm14, xmm8
  695. pxor xmm1, xmm14
  696. movdqu [rsp+32], xmm1
  697. ; H ^ 4
  698. pshufd xmm9, xmm0, 78
  699. pshufd xmm10, xmm0, 78
  700. movdqa xmm11, xmm0
  701. movdqa xmm8, xmm0
  702. pclmulqdq xmm11, xmm0, 17
  703. pclmulqdq xmm8, xmm0, 0
  704. pxor xmm9, xmm0
  705. pxor xmm10, xmm0
  706. pclmulqdq xmm9, xmm10, 0
  707. pxor xmm9, xmm8
  708. pxor xmm9, xmm11
  709. movdqa xmm10, xmm9
  710. movdqa xmm3, xmm11
  711. pslldq xmm10, 8
  712. psrldq xmm9, 8
  713. pxor xmm8, xmm10
  714. pxor xmm3, xmm9
  715. movdqa xmm12, xmm8
  716. movdqa xmm13, xmm8
  717. movdqa xmm14, xmm8
  718. pslld xmm12, 31
  719. pslld xmm13, 30
  720. pslld xmm14, 25
  721. pxor xmm12, xmm13
  722. pxor xmm12, xmm14
  723. movdqa xmm13, xmm12
  724. psrldq xmm13, 4
  725. pslldq xmm12, 12
  726. pxor xmm8, xmm12
  727. movdqa xmm14, xmm8
  728. movdqa xmm10, xmm8
  729. movdqa xmm9, xmm8
  730. psrld xmm14, 1
  731. psrld xmm10, 2
  732. psrld xmm9, 7
  733. pxor xmm14, xmm10
  734. pxor xmm14, xmm9
  735. pxor xmm14, xmm13
  736. pxor xmm14, xmm8
  737. pxor xmm3, xmm14
  738. movdqu [rsp+48], xmm3
  739. ; H ^ 5
  740. pshufd xmm9, xmm0, 78
  741. pshufd xmm10, xmm1, 78
  742. movdqa xmm11, xmm1
  743. movdqa xmm8, xmm1
  744. pclmulqdq xmm11, xmm0, 17
  745. pclmulqdq xmm8, xmm0, 0
  746. pxor xmm9, xmm0
  747. pxor xmm10, xmm1
  748. pclmulqdq xmm9, xmm10, 0
  749. pxor xmm9, xmm8
  750. pxor xmm9, xmm11
  751. movdqa xmm10, xmm9
  752. movdqa xmm7, xmm11
  753. pslldq xmm10, 8
  754. psrldq xmm9, 8
  755. pxor xmm8, xmm10
  756. pxor xmm7, xmm9
  757. movdqa xmm12, xmm8
  758. movdqa xmm13, xmm8
  759. movdqa xmm14, xmm8
  760. pslld xmm12, 31
  761. pslld xmm13, 30
  762. pslld xmm14, 25
  763. pxor xmm12, xmm13
  764. pxor xmm12, xmm14
  765. movdqa xmm13, xmm12
  766. psrldq xmm13, 4
  767. pslldq xmm12, 12
  768. pxor xmm8, xmm12
  769. movdqa xmm14, xmm8
  770. movdqa xmm10, xmm8
  771. movdqa xmm9, xmm8
  772. psrld xmm14, 1
  773. psrld xmm10, 2
  774. psrld xmm9, 7
  775. pxor xmm14, xmm10
  776. pxor xmm14, xmm9
  777. pxor xmm14, xmm13
  778. pxor xmm14, xmm8
  779. pxor xmm7, xmm14
  780. movdqu [rsp+64], xmm7
  781. ; H ^ 6
  782. pshufd xmm9, xmm1, 78
  783. pshufd xmm10, xmm1, 78
  784. movdqa xmm11, xmm1
  785. movdqa xmm8, xmm1
  786. pclmulqdq xmm11, xmm1, 17
  787. pclmulqdq xmm8, xmm1, 0
  788. pxor xmm9, xmm1
  789. pxor xmm10, xmm1
  790. pclmulqdq xmm9, xmm10, 0
  791. pxor xmm9, xmm8
  792. pxor xmm9, xmm11
  793. movdqa xmm10, xmm9
  794. movdqa xmm7, xmm11
  795. pslldq xmm10, 8
  796. psrldq xmm9, 8
  797. pxor xmm8, xmm10
  798. pxor xmm7, xmm9
  799. movdqa xmm12, xmm8
  800. movdqa xmm13, xmm8
  801. movdqa xmm14, xmm8
  802. pslld xmm12, 31
  803. pslld xmm13, 30
  804. pslld xmm14, 25
  805. pxor xmm12, xmm13
  806. pxor xmm12, xmm14
  807. movdqa xmm13, xmm12
  808. psrldq xmm13, 4
  809. pslldq xmm12, 12
  810. pxor xmm8, xmm12
  811. movdqa xmm14, xmm8
  812. movdqa xmm10, xmm8
  813. movdqa xmm9, xmm8
  814. psrld xmm14, 1
  815. psrld xmm10, 2
  816. psrld xmm9, 7
  817. pxor xmm14, xmm10
  818. pxor xmm14, xmm9
  819. pxor xmm14, xmm13
  820. pxor xmm14, xmm8
  821. pxor xmm7, xmm14
  822. movdqu [rsp+80], xmm7
  823. ; H ^ 7
  824. pshufd xmm9, xmm1, 78
  825. pshufd xmm10, xmm3, 78
  826. movdqa xmm11, xmm3
  827. movdqa xmm8, xmm3
  828. pclmulqdq xmm11, xmm1, 17
  829. pclmulqdq xmm8, xmm1, 0
  830. pxor xmm9, xmm1
  831. pxor xmm10, xmm3
  832. pclmulqdq xmm9, xmm10, 0
  833. pxor xmm9, xmm8
  834. pxor xmm9, xmm11
  835. movdqa xmm10, xmm9
  836. movdqa xmm7, xmm11
  837. pslldq xmm10, 8
  838. psrldq xmm9, 8
  839. pxor xmm8, xmm10
  840. pxor xmm7, xmm9
  841. movdqa xmm12, xmm8
  842. movdqa xmm13, xmm8
  843. movdqa xmm14, xmm8
  844. pslld xmm12, 31
  845. pslld xmm13, 30
  846. pslld xmm14, 25
  847. pxor xmm12, xmm13
  848. pxor xmm12, xmm14
  849. movdqa xmm13, xmm12
  850. psrldq xmm13, 4
  851. pslldq xmm12, 12
  852. pxor xmm8, xmm12
  853. movdqa xmm14, xmm8
  854. movdqa xmm10, xmm8
  855. movdqa xmm9, xmm8
  856. psrld xmm14, 1
  857. psrld xmm10, 2
  858. psrld xmm9, 7
  859. pxor xmm14, xmm10
  860. pxor xmm14, xmm9
  861. pxor xmm14, xmm13
  862. pxor xmm14, xmm8
  863. pxor xmm7, xmm14
  864. movdqu [rsp+96], xmm7
  865. ; H ^ 8
  866. pshufd xmm9, xmm3, 78
  867. pshufd xmm10, xmm3, 78
  868. movdqa xmm11, xmm3
  869. movdqa xmm8, xmm3
  870. pclmulqdq xmm11, xmm3, 17
  871. pclmulqdq xmm8, xmm3, 0
  872. pxor xmm9, xmm3
  873. pxor xmm10, xmm3
  874. pclmulqdq xmm9, xmm10, 0
  875. pxor xmm9, xmm8
  876. pxor xmm9, xmm11
  877. movdqa xmm10, xmm9
  878. movdqa xmm7, xmm11
  879. pslldq xmm10, 8
  880. psrldq xmm9, 8
  881. pxor xmm8, xmm10
  882. pxor xmm7, xmm9
  883. movdqa xmm12, xmm8
  884. movdqa xmm13, xmm8
  885. movdqa xmm14, xmm8
  886. pslld xmm12, 31
  887. pslld xmm13, 30
  888. pslld xmm14, 25
  889. pxor xmm12, xmm13
  890. pxor xmm12, xmm14
  891. movdqa xmm13, xmm12
  892. psrldq xmm13, 4
  893. pslldq xmm12, 12
  894. pxor xmm8, xmm12
  895. movdqa xmm14, xmm8
  896. movdqa xmm10, xmm8
  897. movdqa xmm9, xmm8
  898. psrld xmm14, 1
  899. psrld xmm10, 2
  900. psrld xmm9, 7
  901. pxor xmm14, xmm10
  902. pxor xmm14, xmm9
  903. pxor xmm14, xmm13
  904. pxor xmm14, xmm8
  905. pxor xmm7, xmm14
  906. movdqu [rsp+112], xmm7
  907. ; First 128 bytes of input
  908. movdqu xmm8, [rsp+128]
  909. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  910. movdqa xmm0, xmm8
  911. pshufb xmm8, xmm1
  912. movdqa xmm9, xmm0
  913. paddd xmm9, OWORD PTR L_aes_gcm_one
  914. pshufb xmm9, xmm1
  915. movdqa xmm10, xmm0
  916. paddd xmm10, OWORD PTR L_aes_gcm_two
  917. pshufb xmm10, xmm1
  918. movdqa xmm11, xmm0
  919. paddd xmm11, OWORD PTR L_aes_gcm_three
  920. pshufb xmm11, xmm1
  921. movdqa xmm12, xmm0
  922. paddd xmm12, OWORD PTR L_aes_gcm_four
  923. pshufb xmm12, xmm1
  924. movdqa xmm13, xmm0
  925. paddd xmm13, OWORD PTR L_aes_gcm_five
  926. pshufb xmm13, xmm1
  927. movdqa xmm14, xmm0
  928. paddd xmm14, OWORD PTR L_aes_gcm_six
  929. pshufb xmm14, xmm1
  930. movdqa xmm15, xmm0
  931. paddd xmm15, OWORD PTR L_aes_gcm_seven
  932. pshufb xmm15, xmm1
  933. paddd xmm0, OWORD PTR L_aes_gcm_eight
  934. movdqa xmm7, OWORD PTR [r15]
  935. movdqu [rsp+128], xmm0
  936. pxor xmm8, xmm7
  937. pxor xmm9, xmm7
  938. pxor xmm10, xmm7
  939. pxor xmm11, xmm7
  940. pxor xmm12, xmm7
  941. pxor xmm13, xmm7
  942. pxor xmm14, xmm7
  943. pxor xmm15, xmm7
  944. movdqa xmm7, OWORD PTR [r15+16]
  945. aesenc xmm8, xmm7
  946. aesenc xmm9, xmm7
  947. aesenc xmm10, xmm7
  948. aesenc xmm11, xmm7
  949. aesenc xmm12, xmm7
  950. aesenc xmm13, xmm7
  951. aesenc xmm14, xmm7
  952. aesenc xmm15, xmm7
  953. movdqa xmm7, OWORD PTR [r15+32]
  954. aesenc xmm8, xmm7
  955. aesenc xmm9, xmm7
  956. aesenc xmm10, xmm7
  957. aesenc xmm11, xmm7
  958. aesenc xmm12, xmm7
  959. aesenc xmm13, xmm7
  960. aesenc xmm14, xmm7
  961. aesenc xmm15, xmm7
  962. movdqa xmm7, OWORD PTR [r15+48]
  963. aesenc xmm8, xmm7
  964. aesenc xmm9, xmm7
  965. aesenc xmm10, xmm7
  966. aesenc xmm11, xmm7
  967. aesenc xmm12, xmm7
  968. aesenc xmm13, xmm7
  969. aesenc xmm14, xmm7
  970. aesenc xmm15, xmm7
  971. movdqa xmm7, OWORD PTR [r15+64]
  972. aesenc xmm8, xmm7
  973. aesenc xmm9, xmm7
  974. aesenc xmm10, xmm7
  975. aesenc xmm11, xmm7
  976. aesenc xmm12, xmm7
  977. aesenc xmm13, xmm7
  978. aesenc xmm14, xmm7
  979. aesenc xmm15, xmm7
  980. movdqa xmm7, OWORD PTR [r15+80]
  981. aesenc xmm8, xmm7
  982. aesenc xmm9, xmm7
  983. aesenc xmm10, xmm7
  984. aesenc xmm11, xmm7
  985. aesenc xmm12, xmm7
  986. aesenc xmm13, xmm7
  987. aesenc xmm14, xmm7
  988. aesenc xmm15, xmm7
  989. movdqa xmm7, OWORD PTR [r15+96]
  990. aesenc xmm8, xmm7
  991. aesenc xmm9, xmm7
  992. aesenc xmm10, xmm7
  993. aesenc xmm11, xmm7
  994. aesenc xmm12, xmm7
  995. aesenc xmm13, xmm7
  996. aesenc xmm14, xmm7
  997. aesenc xmm15, xmm7
  998. movdqa xmm7, OWORD PTR [r15+112]
  999. aesenc xmm8, xmm7
  1000. aesenc xmm9, xmm7
  1001. aesenc xmm10, xmm7
  1002. aesenc xmm11, xmm7
  1003. aesenc xmm12, xmm7
  1004. aesenc xmm13, xmm7
  1005. aesenc xmm14, xmm7
  1006. aesenc xmm15, xmm7
  1007. movdqa xmm7, OWORD PTR [r15+128]
  1008. aesenc xmm8, xmm7
  1009. aesenc xmm9, xmm7
  1010. aesenc xmm10, xmm7
  1011. aesenc xmm11, xmm7
  1012. aesenc xmm12, xmm7
  1013. aesenc xmm13, xmm7
  1014. aesenc xmm14, xmm7
  1015. aesenc xmm15, xmm7
  1016. movdqa xmm7, OWORD PTR [r15+144]
  1017. aesenc xmm8, xmm7
  1018. aesenc xmm9, xmm7
  1019. aesenc xmm10, xmm7
  1020. aesenc xmm11, xmm7
  1021. aesenc xmm12, xmm7
  1022. aesenc xmm13, xmm7
  1023. aesenc xmm14, xmm7
  1024. aesenc xmm15, xmm7
  1025. cmp r10d, 11
  1026. movdqa xmm7, OWORD PTR [r15+160]
  1027. jl L_AES_GCM_encrypt_aesni_enc_done
  1028. aesenc xmm8, xmm7
  1029. aesenc xmm9, xmm7
  1030. aesenc xmm10, xmm7
  1031. aesenc xmm11, xmm7
  1032. aesenc xmm12, xmm7
  1033. aesenc xmm13, xmm7
  1034. aesenc xmm14, xmm7
  1035. aesenc xmm15, xmm7
  1036. movdqa xmm7, OWORD PTR [r15+176]
  1037. aesenc xmm8, xmm7
  1038. aesenc xmm9, xmm7
  1039. aesenc xmm10, xmm7
  1040. aesenc xmm11, xmm7
  1041. aesenc xmm12, xmm7
  1042. aesenc xmm13, xmm7
  1043. aesenc xmm14, xmm7
  1044. aesenc xmm15, xmm7
  1045. cmp r10d, 13
  1046. movdqa xmm7, OWORD PTR [r15+192]
  1047. jl L_AES_GCM_encrypt_aesni_enc_done
  1048. aesenc xmm8, xmm7
  1049. aesenc xmm9, xmm7
  1050. aesenc xmm10, xmm7
  1051. aesenc xmm11, xmm7
  1052. aesenc xmm12, xmm7
  1053. aesenc xmm13, xmm7
  1054. aesenc xmm14, xmm7
  1055. aesenc xmm15, xmm7
  1056. movdqa xmm7, OWORD PTR [r15+208]
  1057. aesenc xmm8, xmm7
  1058. aesenc xmm9, xmm7
  1059. aesenc xmm10, xmm7
  1060. aesenc xmm11, xmm7
  1061. aesenc xmm12, xmm7
  1062. aesenc xmm13, xmm7
  1063. aesenc xmm14, xmm7
  1064. aesenc xmm15, xmm7
  1065. movdqa xmm7, OWORD PTR [r15+224]
  1066. L_AES_GCM_encrypt_aesni_enc_done:
  1067. aesenclast xmm8, xmm7
  1068. aesenclast xmm9, xmm7
  1069. movdqu xmm0, [rdi]
  1070. movdqu xmm1, [rdi+16]
  1071. pxor xmm8, xmm0
  1072. pxor xmm9, xmm1
  1073. movdqu [rsi], xmm8
  1074. movdqu [rsi+16], xmm9
  1075. aesenclast xmm10, xmm7
  1076. aesenclast xmm11, xmm7
  1077. movdqu xmm0, [rdi+32]
  1078. movdqu xmm1, [rdi+48]
  1079. pxor xmm10, xmm0
  1080. pxor xmm11, xmm1
  1081. movdqu [rsi+32], xmm10
  1082. movdqu [rsi+48], xmm11
  1083. aesenclast xmm12, xmm7
  1084. aesenclast xmm13, xmm7
  1085. movdqu xmm0, [rdi+64]
  1086. movdqu xmm1, [rdi+80]
  1087. pxor xmm12, xmm0
  1088. pxor xmm13, xmm1
  1089. movdqu [rsi+64], xmm12
  1090. movdqu [rsi+80], xmm13
  1091. aesenclast xmm14, xmm7
  1092. aesenclast xmm15, xmm7
  1093. movdqu xmm0, [rdi+96]
  1094. movdqu xmm1, [rdi+112]
  1095. pxor xmm14, xmm0
  1096. pxor xmm15, xmm1
  1097. movdqu [rsi+96], xmm14
  1098. movdqu [rsi+112], xmm15
  1099. cmp r13d, 128
  1100. mov ebx, 128
  1101. jle L_AES_GCM_encrypt_aesni_end_128
  1102. ; More 128 bytes of input
  1103. L_AES_GCM_encrypt_aesni_ghash_128:
  1104. lea rcx, QWORD PTR [rdi+rbx]
  1105. lea rdx, QWORD PTR [rsi+rbx]
  1106. movdqu xmm8, [rsp+128]
  1107. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  1108. movdqa xmm0, xmm8
  1109. pshufb xmm8, xmm1
  1110. movdqa xmm9, xmm0
  1111. paddd xmm9, OWORD PTR L_aes_gcm_one
  1112. pshufb xmm9, xmm1
  1113. movdqa xmm10, xmm0
  1114. paddd xmm10, OWORD PTR L_aes_gcm_two
  1115. pshufb xmm10, xmm1
  1116. movdqa xmm11, xmm0
  1117. paddd xmm11, OWORD PTR L_aes_gcm_three
  1118. pshufb xmm11, xmm1
  1119. movdqa xmm12, xmm0
  1120. paddd xmm12, OWORD PTR L_aes_gcm_four
  1121. pshufb xmm12, xmm1
  1122. movdqa xmm13, xmm0
  1123. paddd xmm13, OWORD PTR L_aes_gcm_five
  1124. pshufb xmm13, xmm1
  1125. movdqa xmm14, xmm0
  1126. paddd xmm14, OWORD PTR L_aes_gcm_six
  1127. pshufb xmm14, xmm1
  1128. movdqa xmm15, xmm0
  1129. paddd xmm15, OWORD PTR L_aes_gcm_seven
  1130. pshufb xmm15, xmm1
  1131. paddd xmm0, OWORD PTR L_aes_gcm_eight
  1132. movdqa xmm7, OWORD PTR [r15]
  1133. movdqu [rsp+128], xmm0
  1134. pxor xmm8, xmm7
  1135. pxor xmm9, xmm7
  1136. pxor xmm10, xmm7
  1137. pxor xmm11, xmm7
  1138. pxor xmm12, xmm7
  1139. pxor xmm13, xmm7
  1140. pxor xmm14, xmm7
  1141. pxor xmm15, xmm7
  1142. movdqu xmm7, [rsp+112]
  1143. movdqu xmm0, [rdx+-128]
  1144. aesenc xmm8, [r15+16]
  1145. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1146. pxor xmm0, xmm2
  1147. pshufd xmm1, xmm7, 78
  1148. pshufd xmm5, xmm0, 78
  1149. pxor xmm1, xmm7
  1150. pxor xmm5, xmm0
  1151. movdqa xmm3, xmm0
  1152. pclmulqdq xmm3, xmm7, 17
  1153. aesenc xmm9, [r15+16]
  1154. aesenc xmm10, [r15+16]
  1155. movdqa xmm2, xmm0
  1156. pclmulqdq xmm2, xmm7, 0
  1157. aesenc xmm11, [r15+16]
  1158. aesenc xmm12, [r15+16]
  1159. pclmulqdq xmm1, xmm5, 0
  1160. aesenc xmm13, [r15+16]
  1161. aesenc xmm14, [r15+16]
  1162. aesenc xmm15, [r15+16]
  1163. pxor xmm1, xmm2
  1164. pxor xmm1, xmm3
  1165. movdqu xmm7, [rsp+96]
  1166. movdqu xmm0, [rdx+-112]
  1167. pshufd xmm4, xmm7, 78
  1168. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1169. aesenc xmm8, [r15+32]
  1170. pxor xmm4, xmm7
  1171. pshufd xmm5, xmm0, 78
  1172. pxor xmm5, xmm0
  1173. movdqa xmm6, xmm0
  1174. pclmulqdq xmm6, xmm7, 17
  1175. aesenc xmm9, [r15+32]
  1176. aesenc xmm10, [r15+32]
  1177. pclmulqdq xmm7, xmm0, 0
  1178. aesenc xmm11, [r15+32]
  1179. aesenc xmm12, [r15+32]
  1180. pclmulqdq xmm4, xmm5, 0
  1181. aesenc xmm13, [r15+32]
  1182. aesenc xmm14, [r15+32]
  1183. aesenc xmm15, [r15+32]
  1184. pxor xmm1, xmm7
  1185. pxor xmm2, xmm7
  1186. pxor xmm1, xmm6
  1187. pxor xmm3, xmm6
  1188. pxor xmm1, xmm4
  1189. movdqu xmm7, [rsp+80]
  1190. movdqu xmm0, [rdx+-96]
  1191. pshufd xmm4, xmm7, 78
  1192. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1193. aesenc xmm8, [r15+48]
  1194. pxor xmm4, xmm7
  1195. pshufd xmm5, xmm0, 78
  1196. pxor xmm5, xmm0
  1197. movdqa xmm6, xmm0
  1198. pclmulqdq xmm6, xmm7, 17
  1199. aesenc xmm9, [r15+48]
  1200. aesenc xmm10, [r15+48]
  1201. pclmulqdq xmm7, xmm0, 0
  1202. aesenc xmm11, [r15+48]
  1203. aesenc xmm12, [r15+48]
  1204. pclmulqdq xmm4, xmm5, 0
  1205. aesenc xmm13, [r15+48]
  1206. aesenc xmm14, [r15+48]
  1207. aesenc xmm15, [r15+48]
  1208. pxor xmm1, xmm7
  1209. pxor xmm2, xmm7
  1210. pxor xmm1, xmm6
  1211. pxor xmm3, xmm6
  1212. pxor xmm1, xmm4
  1213. movdqu xmm7, [rsp+64]
  1214. movdqu xmm0, [rdx+-80]
  1215. pshufd xmm4, xmm7, 78
  1216. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1217. aesenc xmm8, [r15+64]
  1218. pxor xmm4, xmm7
  1219. pshufd xmm5, xmm0, 78
  1220. pxor xmm5, xmm0
  1221. movdqa xmm6, xmm0
  1222. pclmulqdq xmm6, xmm7, 17
  1223. aesenc xmm9, [r15+64]
  1224. aesenc xmm10, [r15+64]
  1225. pclmulqdq xmm7, xmm0, 0
  1226. aesenc xmm11, [r15+64]
  1227. aesenc xmm12, [r15+64]
  1228. pclmulqdq xmm4, xmm5, 0
  1229. aesenc xmm13, [r15+64]
  1230. aesenc xmm14, [r15+64]
  1231. aesenc xmm15, [r15+64]
  1232. pxor xmm1, xmm7
  1233. pxor xmm2, xmm7
  1234. pxor xmm1, xmm6
  1235. pxor xmm3, xmm6
  1236. pxor xmm1, xmm4
  1237. movdqu xmm7, [rsp+48]
  1238. movdqu xmm0, [rdx+-64]
  1239. pshufd xmm4, xmm7, 78
  1240. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1241. aesenc xmm8, [r15+80]
  1242. pxor xmm4, xmm7
  1243. pshufd xmm5, xmm0, 78
  1244. pxor xmm5, xmm0
  1245. movdqa xmm6, xmm0
  1246. pclmulqdq xmm6, xmm7, 17
  1247. aesenc xmm9, [r15+80]
  1248. aesenc xmm10, [r15+80]
  1249. pclmulqdq xmm7, xmm0, 0
  1250. aesenc xmm11, [r15+80]
  1251. aesenc xmm12, [r15+80]
  1252. pclmulqdq xmm4, xmm5, 0
  1253. aesenc xmm13, [r15+80]
  1254. aesenc xmm14, [r15+80]
  1255. aesenc xmm15, [r15+80]
  1256. pxor xmm1, xmm7
  1257. pxor xmm2, xmm7
  1258. pxor xmm1, xmm6
  1259. pxor xmm3, xmm6
  1260. pxor xmm1, xmm4
  1261. movdqu xmm7, [rsp+32]
  1262. movdqu xmm0, [rdx+-48]
  1263. pshufd xmm4, xmm7, 78
  1264. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1265. aesenc xmm8, [r15+96]
  1266. pxor xmm4, xmm7
  1267. pshufd xmm5, xmm0, 78
  1268. pxor xmm5, xmm0
  1269. movdqa xmm6, xmm0
  1270. pclmulqdq xmm6, xmm7, 17
  1271. aesenc xmm9, [r15+96]
  1272. aesenc xmm10, [r15+96]
  1273. pclmulqdq xmm7, xmm0, 0
  1274. aesenc xmm11, [r15+96]
  1275. aesenc xmm12, [r15+96]
  1276. pclmulqdq xmm4, xmm5, 0
  1277. aesenc xmm13, [r15+96]
  1278. aesenc xmm14, [r15+96]
  1279. aesenc xmm15, [r15+96]
  1280. pxor xmm1, xmm7
  1281. pxor xmm2, xmm7
  1282. pxor xmm1, xmm6
  1283. pxor xmm3, xmm6
  1284. pxor xmm1, xmm4
  1285. movdqu xmm7, [rsp+16]
  1286. movdqu xmm0, [rdx+-32]
  1287. pshufd xmm4, xmm7, 78
  1288. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1289. aesenc xmm8, [r15+112]
  1290. pxor xmm4, xmm7
  1291. pshufd xmm5, xmm0, 78
  1292. pxor xmm5, xmm0
  1293. movdqa xmm6, xmm0
  1294. pclmulqdq xmm6, xmm7, 17
  1295. aesenc xmm9, [r15+112]
  1296. aesenc xmm10, [r15+112]
  1297. pclmulqdq xmm7, xmm0, 0
  1298. aesenc xmm11, [r15+112]
  1299. aesenc xmm12, [r15+112]
  1300. pclmulqdq xmm4, xmm5, 0
  1301. aesenc xmm13, [r15+112]
  1302. aesenc xmm14, [r15+112]
  1303. aesenc xmm15, [r15+112]
  1304. pxor xmm1, xmm7
  1305. pxor xmm2, xmm7
  1306. pxor xmm1, xmm6
  1307. pxor xmm3, xmm6
  1308. pxor xmm1, xmm4
  1309. movdqu xmm7, [rsp]
  1310. movdqu xmm0, [rdx+-16]
  1311. pshufd xmm4, xmm7, 78
  1312. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1313. aesenc xmm8, [r15+128]
  1314. pxor xmm4, xmm7
  1315. pshufd xmm5, xmm0, 78
  1316. pxor xmm5, xmm0
  1317. movdqa xmm6, xmm0
  1318. pclmulqdq xmm6, xmm7, 17
  1319. aesenc xmm9, [r15+128]
  1320. aesenc xmm10, [r15+128]
  1321. pclmulqdq xmm7, xmm0, 0
  1322. aesenc xmm11, [r15+128]
  1323. aesenc xmm12, [r15+128]
  1324. pclmulqdq xmm4, xmm5, 0
  1325. aesenc xmm13, [r15+128]
  1326. aesenc xmm14, [r15+128]
  1327. aesenc xmm15, [r15+128]
  1328. pxor xmm1, xmm7
  1329. pxor xmm2, xmm7
  1330. pxor xmm1, xmm6
  1331. pxor xmm3, xmm6
  1332. pxor xmm1, xmm4
  1333. movdqa xmm5, xmm1
  1334. psrldq xmm1, 8
  1335. pslldq xmm5, 8
  1336. aesenc xmm8, [r15+144]
  1337. pxor xmm2, xmm5
  1338. pxor xmm3, xmm1
  1339. movdqa xmm7, xmm2
  1340. movdqa xmm4, xmm2
  1341. movdqa xmm5, xmm2
  1342. aesenc xmm9, [r15+144]
  1343. pslld xmm7, 31
  1344. pslld xmm4, 30
  1345. pslld xmm5, 25
  1346. aesenc xmm10, [r15+144]
  1347. pxor xmm7, xmm4
  1348. pxor xmm7, xmm5
  1349. aesenc xmm11, [r15+144]
  1350. movdqa xmm4, xmm7
  1351. pslldq xmm7, 12
  1352. psrldq xmm4, 4
  1353. aesenc xmm12, [r15+144]
  1354. pxor xmm2, xmm7
  1355. movdqa xmm5, xmm2
  1356. movdqa xmm1, xmm2
  1357. movdqa xmm0, xmm2
  1358. aesenc xmm13, [r15+144]
  1359. psrld xmm5, 1
  1360. psrld xmm1, 2
  1361. psrld xmm0, 7
  1362. aesenc xmm14, [r15+144]
  1363. pxor xmm5, xmm1
  1364. pxor xmm5, xmm0
  1365. aesenc xmm15, [r15+144]
  1366. pxor xmm5, xmm4
  1367. pxor xmm2, xmm5
  1368. pxor xmm2, xmm3
  1369. cmp r10d, 11
  1370. movdqa xmm7, OWORD PTR [r15+160]
  1371. jl L_AES_GCM_encrypt_aesni_aesenc_128_ghash_avx_done
  1372. aesenc xmm8, xmm7
  1373. aesenc xmm9, xmm7
  1374. aesenc xmm10, xmm7
  1375. aesenc xmm11, xmm7
  1376. aesenc xmm12, xmm7
  1377. aesenc xmm13, xmm7
  1378. aesenc xmm14, xmm7
  1379. aesenc xmm15, xmm7
  1380. movdqa xmm7, OWORD PTR [r15+176]
  1381. aesenc xmm8, xmm7
  1382. aesenc xmm9, xmm7
  1383. aesenc xmm10, xmm7
  1384. aesenc xmm11, xmm7
  1385. aesenc xmm12, xmm7
  1386. aesenc xmm13, xmm7
  1387. aesenc xmm14, xmm7
  1388. aesenc xmm15, xmm7
  1389. cmp r10d, 13
  1390. movdqa xmm7, OWORD PTR [r15+192]
  1391. jl L_AES_GCM_encrypt_aesni_aesenc_128_ghash_avx_done
  1392. aesenc xmm8, xmm7
  1393. aesenc xmm9, xmm7
  1394. aesenc xmm10, xmm7
  1395. aesenc xmm11, xmm7
  1396. aesenc xmm12, xmm7
  1397. aesenc xmm13, xmm7
  1398. aesenc xmm14, xmm7
  1399. aesenc xmm15, xmm7
  1400. movdqa xmm7, OWORD PTR [r15+208]
  1401. aesenc xmm8, xmm7
  1402. aesenc xmm9, xmm7
  1403. aesenc xmm10, xmm7
  1404. aesenc xmm11, xmm7
  1405. aesenc xmm12, xmm7
  1406. aesenc xmm13, xmm7
  1407. aesenc xmm14, xmm7
  1408. aesenc xmm15, xmm7
  1409. movdqa xmm7, OWORD PTR [r15+224]
  1410. L_AES_GCM_encrypt_aesni_aesenc_128_ghash_avx_done:
  1411. aesenclast xmm8, xmm7
  1412. aesenclast xmm9, xmm7
  1413. movdqu xmm0, [rcx]
  1414. movdqu xmm1, [rcx+16]
  1415. pxor xmm8, xmm0
  1416. pxor xmm9, xmm1
  1417. movdqu [rdx], xmm8
  1418. movdqu [rdx+16], xmm9
  1419. aesenclast xmm10, xmm7
  1420. aesenclast xmm11, xmm7
  1421. movdqu xmm0, [rcx+32]
  1422. movdqu xmm1, [rcx+48]
  1423. pxor xmm10, xmm0
  1424. pxor xmm11, xmm1
  1425. movdqu [rdx+32], xmm10
  1426. movdqu [rdx+48], xmm11
  1427. aesenclast xmm12, xmm7
  1428. aesenclast xmm13, xmm7
  1429. movdqu xmm0, [rcx+64]
  1430. movdqu xmm1, [rcx+80]
  1431. pxor xmm12, xmm0
  1432. pxor xmm13, xmm1
  1433. movdqu [rdx+64], xmm12
  1434. movdqu [rdx+80], xmm13
  1435. aesenclast xmm14, xmm7
  1436. aesenclast xmm15, xmm7
  1437. movdqu xmm0, [rcx+96]
  1438. movdqu xmm1, [rcx+112]
  1439. pxor xmm14, xmm0
  1440. pxor xmm15, xmm1
  1441. movdqu [rdx+96], xmm14
  1442. movdqu [rdx+112], xmm15
  1443. add ebx, 128
  1444. cmp ebx, r13d
  1445. jl L_AES_GCM_encrypt_aesni_ghash_128
  1446. L_AES_GCM_encrypt_aesni_end_128:
  1447. movdqa xmm4, OWORD PTR L_aes_gcm_bswap_mask
  1448. pshufb xmm8, xmm4
  1449. pshufb xmm9, xmm4
  1450. pshufb xmm10, xmm4
  1451. pshufb xmm11, xmm4
  1452. pxor xmm8, xmm2
  1453. pshufb xmm12, xmm4
  1454. pshufb xmm13, xmm4
  1455. pshufb xmm14, xmm4
  1456. pshufb xmm15, xmm4
  1457. movdqu xmm7, [rsp+112]
  1458. pshufd xmm1, xmm8, 78
  1459. pshufd xmm2, xmm7, 78
  1460. movdqa xmm3, xmm7
  1461. movdqa xmm0, xmm7
  1462. pclmulqdq xmm3, xmm8, 17
  1463. pclmulqdq xmm0, xmm8, 0
  1464. pxor xmm1, xmm8
  1465. pxor xmm2, xmm7
  1466. pclmulqdq xmm1, xmm2, 0
  1467. pxor xmm1, xmm0
  1468. pxor xmm1, xmm3
  1469. movdqa xmm2, xmm1
  1470. movdqa xmm4, xmm0
  1471. movdqa xmm6, xmm3
  1472. pslldq xmm2, 8
  1473. psrldq xmm1, 8
  1474. pxor xmm4, xmm2
  1475. pxor xmm6, xmm1
  1476. movdqu xmm7, [rsp+96]
  1477. pshufd xmm1, xmm9, 78
  1478. pshufd xmm2, xmm7, 78
  1479. movdqa xmm3, xmm7
  1480. movdqa xmm0, xmm7
  1481. pclmulqdq xmm3, xmm9, 17
  1482. pclmulqdq xmm0, xmm9, 0
  1483. pxor xmm1, xmm9
  1484. pxor xmm2, xmm7
  1485. pclmulqdq xmm1, xmm2, 0
  1486. pxor xmm1, xmm0
  1487. pxor xmm1, xmm3
  1488. movdqa xmm2, xmm1
  1489. pxor xmm4, xmm0
  1490. pxor xmm6, xmm3
  1491. pslldq xmm2, 8
  1492. psrldq xmm1, 8
  1493. pxor xmm4, xmm2
  1494. pxor xmm6, xmm1
  1495. movdqu xmm7, [rsp+80]
  1496. pshufd xmm1, xmm10, 78
  1497. pshufd xmm2, xmm7, 78
  1498. movdqa xmm3, xmm7
  1499. movdqa xmm0, xmm7
  1500. pclmulqdq xmm3, xmm10, 17
  1501. pclmulqdq xmm0, xmm10, 0
  1502. pxor xmm1, xmm10
  1503. pxor xmm2, xmm7
  1504. pclmulqdq xmm1, xmm2, 0
  1505. pxor xmm1, xmm0
  1506. pxor xmm1, xmm3
  1507. movdqa xmm2, xmm1
  1508. pxor xmm4, xmm0
  1509. pxor xmm6, xmm3
  1510. pslldq xmm2, 8
  1511. psrldq xmm1, 8
  1512. pxor xmm4, xmm2
  1513. pxor xmm6, xmm1
  1514. movdqu xmm7, [rsp+64]
  1515. pshufd xmm1, xmm11, 78
  1516. pshufd xmm2, xmm7, 78
  1517. movdqa xmm3, xmm7
  1518. movdqa xmm0, xmm7
  1519. pclmulqdq xmm3, xmm11, 17
  1520. pclmulqdq xmm0, xmm11, 0
  1521. pxor xmm1, xmm11
  1522. pxor xmm2, xmm7
  1523. pclmulqdq xmm1, xmm2, 0
  1524. pxor xmm1, xmm0
  1525. pxor xmm1, xmm3
  1526. movdqa xmm2, xmm1
  1527. pxor xmm4, xmm0
  1528. pxor xmm6, xmm3
  1529. pslldq xmm2, 8
  1530. psrldq xmm1, 8
  1531. pxor xmm4, xmm2
  1532. pxor xmm6, xmm1
  1533. movdqu xmm7, [rsp+48]
  1534. pshufd xmm1, xmm12, 78
  1535. pshufd xmm2, xmm7, 78
  1536. movdqa xmm3, xmm7
  1537. movdqa xmm0, xmm7
  1538. pclmulqdq xmm3, xmm12, 17
  1539. pclmulqdq xmm0, xmm12, 0
  1540. pxor xmm1, xmm12
  1541. pxor xmm2, xmm7
  1542. pclmulqdq xmm1, xmm2, 0
  1543. pxor xmm1, xmm0
  1544. pxor xmm1, xmm3
  1545. movdqa xmm2, xmm1
  1546. pxor xmm4, xmm0
  1547. pxor xmm6, xmm3
  1548. pslldq xmm2, 8
  1549. psrldq xmm1, 8
  1550. pxor xmm4, xmm2
  1551. pxor xmm6, xmm1
  1552. movdqu xmm7, [rsp+32]
  1553. pshufd xmm1, xmm13, 78
  1554. pshufd xmm2, xmm7, 78
  1555. movdqa xmm3, xmm7
  1556. movdqa xmm0, xmm7
  1557. pclmulqdq xmm3, xmm13, 17
  1558. pclmulqdq xmm0, xmm13, 0
  1559. pxor xmm1, xmm13
  1560. pxor xmm2, xmm7
  1561. pclmulqdq xmm1, xmm2, 0
  1562. pxor xmm1, xmm0
  1563. pxor xmm1, xmm3
  1564. movdqa xmm2, xmm1
  1565. pxor xmm4, xmm0
  1566. pxor xmm6, xmm3
  1567. pslldq xmm2, 8
  1568. psrldq xmm1, 8
  1569. pxor xmm4, xmm2
  1570. pxor xmm6, xmm1
  1571. movdqu xmm7, [rsp+16]
  1572. pshufd xmm1, xmm14, 78
  1573. pshufd xmm2, xmm7, 78
  1574. movdqa xmm3, xmm7
  1575. movdqa xmm0, xmm7
  1576. pclmulqdq xmm3, xmm14, 17
  1577. pclmulqdq xmm0, xmm14, 0
  1578. pxor xmm1, xmm14
  1579. pxor xmm2, xmm7
  1580. pclmulqdq xmm1, xmm2, 0
  1581. pxor xmm1, xmm0
  1582. pxor xmm1, xmm3
  1583. movdqa xmm2, xmm1
  1584. pxor xmm4, xmm0
  1585. pxor xmm6, xmm3
  1586. pslldq xmm2, 8
  1587. psrldq xmm1, 8
  1588. pxor xmm4, xmm2
  1589. pxor xmm6, xmm1
  1590. movdqu xmm7, [rsp]
  1591. pshufd xmm1, xmm15, 78
  1592. pshufd xmm2, xmm7, 78
  1593. movdqa xmm3, xmm7
  1594. movdqa xmm0, xmm7
  1595. pclmulqdq xmm3, xmm15, 17
  1596. pclmulqdq xmm0, xmm15, 0
  1597. pxor xmm1, xmm15
  1598. pxor xmm2, xmm7
  1599. pclmulqdq xmm1, xmm2, 0
  1600. pxor xmm1, xmm0
  1601. pxor xmm1, xmm3
  1602. movdqa xmm2, xmm1
  1603. pxor xmm4, xmm0
  1604. pxor xmm6, xmm3
  1605. pslldq xmm2, 8
  1606. psrldq xmm1, 8
  1607. pxor xmm4, xmm2
  1608. pxor xmm6, xmm1
  1609. movdqa xmm0, xmm4
  1610. movdqa xmm1, xmm4
  1611. movdqa xmm2, xmm4
  1612. pslld xmm0, 31
  1613. pslld xmm1, 30
  1614. pslld xmm2, 25
  1615. pxor xmm0, xmm1
  1616. pxor xmm0, xmm2
  1617. movdqa xmm1, xmm0
  1618. psrldq xmm1, 4
  1619. pslldq xmm0, 12
  1620. pxor xmm4, xmm0
  1621. movdqa xmm2, xmm4
  1622. movdqa xmm3, xmm4
  1623. movdqa xmm0, xmm4
  1624. psrld xmm2, 1
  1625. psrld xmm3, 2
  1626. psrld xmm0, 7
  1627. pxor xmm2, xmm3
  1628. pxor xmm2, xmm0
  1629. pxor xmm2, xmm1
  1630. pxor xmm2, xmm4
  1631. pxor xmm6, xmm2
  1632. movdqu xmm5, [rsp]
  1633. L_AES_GCM_encrypt_aesni_done_128:
  1634. mov edx, r9d
  1635. cmp ebx, edx
  1636. jge L_AES_GCM_encrypt_aesni_done_enc
  1637. mov r13d, r9d
  1638. and r13d, 4294967280
  1639. cmp ebx, r13d
  1640. jge L_AES_GCM_encrypt_aesni_last_block_done
  1641. lea rcx, QWORD PTR [rdi+rbx]
  1642. lea rdx, QWORD PTR [rsi+rbx]
  1643. movdqu xmm8, [rsp+128]
  1644. movdqa xmm9, xmm8
  1645. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  1646. paddd xmm9, OWORD PTR L_aes_gcm_one
  1647. pxor xmm8, [r15]
  1648. movdqu [rsp+128], xmm9
  1649. aesenc xmm8, [r15+16]
  1650. aesenc xmm8, [r15+32]
  1651. aesenc xmm8, [r15+48]
  1652. aesenc xmm8, [r15+64]
  1653. aesenc xmm8, [r15+80]
  1654. aesenc xmm8, [r15+96]
  1655. aesenc xmm8, [r15+112]
  1656. aesenc xmm8, [r15+128]
  1657. aesenc xmm8, [r15+144]
  1658. cmp r10d, 11
  1659. movdqa xmm9, OWORD PTR [r15+160]
  1660. jl L_AES_GCM_encrypt_aesni_aesenc_block_aesenc_avx_last
  1661. aesenc xmm8, xmm9
  1662. aesenc xmm8, [r15+176]
  1663. cmp r10d, 13
  1664. movdqa xmm9, OWORD PTR [r15+192]
  1665. jl L_AES_GCM_encrypt_aesni_aesenc_block_aesenc_avx_last
  1666. aesenc xmm8, xmm9
  1667. aesenc xmm8, [r15+208]
  1668. movdqa xmm9, OWORD PTR [r15+224]
  1669. L_AES_GCM_encrypt_aesni_aesenc_block_aesenc_avx_last:
  1670. aesenclast xmm8, xmm9
  1671. movdqu xmm9, [rcx]
  1672. pxor xmm8, xmm9
  1673. movdqu [rdx], xmm8
  1674. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  1675. pxor xmm6, xmm8
  1676. add ebx, 16
  1677. cmp ebx, r13d
  1678. jge L_AES_GCM_encrypt_aesni_last_block_ghash
  1679. L_AES_GCM_encrypt_aesni_last_block_start:
  1680. lea rcx, QWORD PTR [rdi+rbx]
  1681. lea rdx, QWORD PTR [rsi+rbx]
  1682. movdqu xmm8, [rsp+128]
  1683. movdqa xmm9, xmm8
  1684. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  1685. paddd xmm9, OWORD PTR L_aes_gcm_one
  1686. pxor xmm8, [r15]
  1687. movdqu [rsp+128], xmm9
  1688. movdqa xmm10, xmm6
  1689. pclmulqdq xmm10, xmm5, 16
  1690. aesenc xmm8, [r15+16]
  1691. aesenc xmm8, [r15+32]
  1692. movdqa xmm11, xmm6
  1693. pclmulqdq xmm11, xmm5, 1
  1694. aesenc xmm8, [r15+48]
  1695. aesenc xmm8, [r15+64]
  1696. movdqa xmm12, xmm6
  1697. pclmulqdq xmm12, xmm5, 0
  1698. aesenc xmm8, [r15+80]
  1699. movdqa xmm1, xmm6
  1700. pclmulqdq xmm1, xmm5, 17
  1701. aesenc xmm8, [r15+96]
  1702. pxor xmm10, xmm11
  1703. movdqa xmm2, xmm10
  1704. psrldq xmm10, 8
  1705. pslldq xmm2, 8
  1706. aesenc xmm8, [r15+112]
  1707. movdqa xmm3, xmm1
  1708. pxor xmm2, xmm12
  1709. pxor xmm3, xmm10
  1710. movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
  1711. movdqa xmm11, xmm2
  1712. pclmulqdq xmm11, xmm0, 16
  1713. aesenc xmm8, [r15+128]
  1714. pshufd xmm10, xmm2, 78
  1715. pxor xmm10, xmm11
  1716. movdqa xmm11, xmm10
  1717. pclmulqdq xmm11, xmm0, 16
  1718. aesenc xmm8, [r15+144]
  1719. pshufd xmm6, xmm10, 78
  1720. pxor xmm6, xmm11
  1721. pxor xmm6, xmm3
  1722. cmp r10d, 11
  1723. movdqa xmm9, OWORD PTR [r15+160]
  1724. jl L_AES_GCM_encrypt_aesni_aesenc_gfmul_last
  1725. aesenc xmm8, xmm9
  1726. aesenc xmm8, [r15+176]
  1727. cmp r10d, 13
  1728. movdqa xmm9, OWORD PTR [r15+192]
  1729. jl L_AES_GCM_encrypt_aesni_aesenc_gfmul_last
  1730. aesenc xmm8, xmm9
  1731. aesenc xmm8, [r15+208]
  1732. movdqa xmm9, OWORD PTR [r15+224]
  1733. L_AES_GCM_encrypt_aesni_aesenc_gfmul_last:
  1734. aesenclast xmm8, xmm9
  1735. movdqu xmm9, [rcx]
  1736. pxor xmm8, xmm9
  1737. movdqu [rdx], xmm8
  1738. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  1739. pxor xmm6, xmm8
  1740. add ebx, 16
  1741. cmp ebx, r13d
  1742. jl L_AES_GCM_encrypt_aesni_last_block_start
  1743. L_AES_GCM_encrypt_aesni_last_block_ghash:
  1744. pshufd xmm9, xmm5, 78
  1745. pshufd xmm10, xmm6, 78
  1746. movdqa xmm11, xmm6
  1747. movdqa xmm8, xmm6
  1748. pclmulqdq xmm11, xmm5, 17
  1749. pclmulqdq xmm8, xmm5, 0
  1750. pxor xmm9, xmm5
  1751. pxor xmm10, xmm6
  1752. pclmulqdq xmm9, xmm10, 0
  1753. pxor xmm9, xmm8
  1754. pxor xmm9, xmm11
  1755. movdqa xmm10, xmm9
  1756. movdqa xmm6, xmm11
  1757. pslldq xmm10, 8
  1758. psrldq xmm9, 8
  1759. pxor xmm8, xmm10
  1760. pxor xmm6, xmm9
  1761. movdqa xmm12, xmm8
  1762. movdqa xmm13, xmm8
  1763. movdqa xmm14, xmm8
  1764. pslld xmm12, 31
  1765. pslld xmm13, 30
  1766. pslld xmm14, 25
  1767. pxor xmm12, xmm13
  1768. pxor xmm12, xmm14
  1769. movdqa xmm13, xmm12
  1770. psrldq xmm13, 4
  1771. pslldq xmm12, 12
  1772. pxor xmm8, xmm12
  1773. movdqa xmm14, xmm8
  1774. movdqa xmm10, xmm8
  1775. movdqa xmm9, xmm8
  1776. psrld xmm14, 1
  1777. psrld xmm10, 2
  1778. psrld xmm9, 7
  1779. pxor xmm14, xmm10
  1780. pxor xmm14, xmm9
  1781. pxor xmm14, xmm13
  1782. pxor xmm14, xmm8
  1783. pxor xmm6, xmm14
  1784. L_AES_GCM_encrypt_aesni_last_block_done:
  1785. mov ecx, r9d
  1786. mov edx, ecx
  1787. and ecx, 15
  1788. jz L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_done
  1789. movdqu xmm4, [rsp+128]
  1790. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
  1791. pxor xmm4, [r15]
  1792. aesenc xmm4, [r15+16]
  1793. aesenc xmm4, [r15+32]
  1794. aesenc xmm4, [r15+48]
  1795. aesenc xmm4, [r15+64]
  1796. aesenc xmm4, [r15+80]
  1797. aesenc xmm4, [r15+96]
  1798. aesenc xmm4, [r15+112]
  1799. aesenc xmm4, [r15+128]
  1800. aesenc xmm4, [r15+144]
  1801. cmp r10d, 11
  1802. movdqa xmm9, OWORD PTR [r15+160]
  1803. jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_aesenc_avx_last
  1804. aesenc xmm4, xmm9
  1805. aesenc xmm4, [r15+176]
  1806. cmp r10d, 13
  1807. movdqa xmm9, OWORD PTR [r15+192]
  1808. jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_aesenc_avx_last
  1809. aesenc xmm4, xmm9
  1810. aesenc xmm4, [r15+208]
  1811. movdqa xmm9, OWORD PTR [r15+224]
  1812. L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_aesenc_avx_last:
  1813. aesenclast xmm4, xmm9
  1814. sub rsp, 16
  1815. xor ecx, ecx
  1816. movdqu [rsp], xmm4
  1817. L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_loop:
  1818. movzx r13d, BYTE PTR [rdi+rbx]
  1819. xor r13b, BYTE PTR [rsp+rcx]
  1820. mov BYTE PTR [rsi+rbx], r13b
  1821. mov BYTE PTR [rsp+rcx], r13b
  1822. inc ebx
  1823. inc ecx
  1824. cmp ebx, edx
  1825. jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_loop
  1826. xor r13, r13
  1827. cmp ecx, 16
  1828. je L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_finish_enc
  1829. L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_byte_loop:
  1830. mov BYTE PTR [rsp+rcx], r13b
  1831. inc ecx
  1832. cmp ecx, 16
  1833. jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_byte_loop
  1834. L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_finish_enc:
  1835. movdqu xmm4, [rsp]
  1836. add rsp, 16
  1837. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  1838. pxor xmm6, xmm4
  1839. pshufd xmm9, xmm5, 78
  1840. pshufd xmm10, xmm6, 78
  1841. movdqa xmm11, xmm6
  1842. movdqa xmm8, xmm6
  1843. pclmulqdq xmm11, xmm5, 17
  1844. pclmulqdq xmm8, xmm5, 0
  1845. pxor xmm9, xmm5
  1846. pxor xmm10, xmm6
  1847. pclmulqdq xmm9, xmm10, 0
  1848. pxor xmm9, xmm8
  1849. pxor xmm9, xmm11
  1850. movdqa xmm10, xmm9
  1851. movdqa xmm6, xmm11
  1852. pslldq xmm10, 8
  1853. psrldq xmm9, 8
  1854. pxor xmm8, xmm10
  1855. pxor xmm6, xmm9
  1856. movdqa xmm12, xmm8
  1857. movdqa xmm13, xmm8
  1858. movdqa xmm14, xmm8
  1859. pslld xmm12, 31
  1860. pslld xmm13, 30
  1861. pslld xmm14, 25
  1862. pxor xmm12, xmm13
  1863. pxor xmm12, xmm14
  1864. movdqa xmm13, xmm12
  1865. psrldq xmm13, 4
  1866. pslldq xmm12, 12
  1867. pxor xmm8, xmm12
  1868. movdqa xmm14, xmm8
  1869. movdqa xmm10, xmm8
  1870. movdqa xmm9, xmm8
  1871. psrld xmm14, 1
  1872. psrld xmm10, 2
  1873. psrld xmm9, 7
  1874. pxor xmm14, xmm10
  1875. pxor xmm14, xmm9
  1876. pxor xmm14, xmm13
  1877. pxor xmm14, xmm8
  1878. pxor xmm6, xmm14
  1879. L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_done:
  1880. L_AES_GCM_encrypt_aesni_done_enc:
  1881. mov edx, r9d
  1882. mov ecx, r11d
  1883. shl rdx, 3
  1884. shl rcx, 3
  1885. pinsrq xmm0, rdx, 0
  1886. pinsrq xmm0, rcx, 1
  1887. pxor xmm6, xmm0
  1888. pshufd xmm9, xmm5, 78
  1889. pshufd xmm10, xmm6, 78
  1890. movdqa xmm11, xmm6
  1891. movdqa xmm8, xmm6
  1892. pclmulqdq xmm11, xmm5, 17
  1893. pclmulqdq xmm8, xmm5, 0
  1894. pxor xmm9, xmm5
  1895. pxor xmm10, xmm6
  1896. pclmulqdq xmm9, xmm10, 0
  1897. pxor xmm9, xmm8
  1898. pxor xmm9, xmm11
  1899. movdqa xmm10, xmm9
  1900. movdqa xmm6, xmm11
  1901. pslldq xmm10, 8
  1902. psrldq xmm9, 8
  1903. pxor xmm8, xmm10
  1904. pxor xmm6, xmm9
  1905. movdqa xmm12, xmm8
  1906. movdqa xmm13, xmm8
  1907. movdqa xmm14, xmm8
  1908. pslld xmm12, 31
  1909. pslld xmm13, 30
  1910. pslld xmm14, 25
  1911. pxor xmm12, xmm13
  1912. pxor xmm12, xmm14
  1913. movdqa xmm13, xmm12
  1914. psrldq xmm13, 4
  1915. pslldq xmm12, 12
  1916. pxor xmm8, xmm12
  1917. movdqa xmm14, xmm8
  1918. movdqa xmm10, xmm8
  1919. movdqa xmm9, xmm8
  1920. psrld xmm14, 1
  1921. psrld xmm10, 2
  1922. psrld xmm9, 7
  1923. pxor xmm14, xmm10
  1924. pxor xmm14, xmm9
  1925. pxor xmm14, xmm13
  1926. pxor xmm14, xmm8
  1927. pxor xmm6, xmm14
  1928. pshufb xmm6, OWORD PTR L_aes_gcm_bswap_mask
  1929. movdqu xmm0, [rsp+144]
  1930. pxor xmm0, xmm6
  1931. cmp r14d, 16
  1932. je L_AES_GCM_encrypt_aesni_store_tag_16
  1933. xor rcx, rcx
  1934. movdqu [rsp], xmm0
  1935. L_AES_GCM_encrypt_aesni_store_tag_loop:
  1936. movzx r13d, BYTE PTR [rsp+rcx]
  1937. mov BYTE PTR [r8+rcx], r13b
  1938. inc ecx
  1939. cmp ecx, r14d
  1940. jne L_AES_GCM_encrypt_aesni_store_tag_loop
  1941. jmp L_AES_GCM_encrypt_aesni_store_tag_done
  1942. L_AES_GCM_encrypt_aesni_store_tag_16:
  1943. movdqu [r8], xmm0
  1944. L_AES_GCM_encrypt_aesni_store_tag_done:
  1945. movdqu xmm6, [rsp+160]
  1946. movdqu xmm7, [rsp+176]
  1947. movdqu xmm8, [rsp+192]
  1948. movdqu xmm9, [rsp+208]
  1949. movdqu xmm10, [rsp+224]
  1950. movdqu xmm11, [rsp+240]
  1951. movdqu xmm12, [rsp+256]
  1952. movdqu xmm13, [rsp+272]
  1953. movdqu xmm14, [rsp+288]
  1954. movdqu xmm15, [rsp+304]
  1955. add rsp, 320
  1956. pop r15
  1957. pop r14
  1958. pop rbx
  1959. pop r12
  1960. pop rsi
  1961. pop rdi
  1962. pop r13
  1963. ret
  1964. AES_GCM_encrypt_aesni ENDP
  1965. _text ENDS
  1966. _text SEGMENT READONLY PARA
  1967. AES_GCM_decrypt_aesni PROC
  1968. push r13
  1969. push rdi
  1970. push rsi
  1971. push r12
  1972. push rbx
  1973. push r14
  1974. push r15
  1975. push rbp
  1976. mov rdi, rcx
  1977. mov rsi, rdx
  1978. mov r12, r8
  1979. mov rax, r9
  1980. mov r8, QWORD PTR [rsp+104]
  1981. mov r9d, DWORD PTR [rsp+112]
  1982. mov r11d, DWORD PTR [rsp+120]
  1983. mov ebx, DWORD PTR [rsp+128]
  1984. mov r14d, DWORD PTR [rsp+136]
  1985. mov r15, QWORD PTR [rsp+144]
  1986. mov r10d, DWORD PTR [rsp+152]
  1987. mov rbp, QWORD PTR [rsp+160]
  1988. sub rsp, 328
  1989. movdqu [rsp+168], xmm6
  1990. movdqu [rsp+184], xmm7
  1991. movdqu [rsp+200], xmm8
  1992. movdqu [rsp+216], xmm9
  1993. movdqu [rsp+232], xmm10
  1994. movdqu [rsp+248], xmm11
  1995. movdqu [rsp+264], xmm12
  1996. movdqu [rsp+280], xmm13
  1997. movdqu [rsp+296], xmm14
  1998. movdqu [rsp+312], xmm15
  1999. pxor xmm4, xmm4
  2000. pxor xmm6, xmm6
  2001. cmp ebx, 12
  2002. mov edx, ebx
  2003. jne L_AES_GCM_decrypt_aesni_iv_not_12
  2004. ; # Calculate values when IV is 12 bytes
  2005. ; Set counter based on IV
  2006. mov ecx, 16777216
  2007. pinsrq xmm4, QWORD PTR [rax], 0
  2008. pinsrd xmm4, DWORD PTR [rax+8], 2
  2009. pinsrd xmm4, ecx, 3
  2010. ; H = Encrypt X(=0) and T = Encrypt counter
  2011. movdqa xmm1, xmm4
  2012. movdqa xmm5, OWORD PTR [r15]
  2013. pxor xmm1, xmm5
  2014. movdqa xmm7, OWORD PTR [r15+16]
  2015. aesenc xmm5, xmm7
  2016. aesenc xmm1, xmm7
  2017. movdqa xmm7, OWORD PTR [r15+32]
  2018. aesenc xmm5, xmm7
  2019. aesenc xmm1, xmm7
  2020. movdqa xmm7, OWORD PTR [r15+48]
  2021. aesenc xmm5, xmm7
  2022. aesenc xmm1, xmm7
  2023. movdqa xmm7, OWORD PTR [r15+64]
  2024. aesenc xmm5, xmm7
  2025. aesenc xmm1, xmm7
  2026. movdqa xmm7, OWORD PTR [r15+80]
  2027. aesenc xmm5, xmm7
  2028. aesenc xmm1, xmm7
  2029. movdqa xmm7, OWORD PTR [r15+96]
  2030. aesenc xmm5, xmm7
  2031. aesenc xmm1, xmm7
  2032. movdqa xmm7, OWORD PTR [r15+112]
  2033. aesenc xmm5, xmm7
  2034. aesenc xmm1, xmm7
  2035. movdqa xmm7, OWORD PTR [r15+128]
  2036. aesenc xmm5, xmm7
  2037. aesenc xmm1, xmm7
  2038. movdqa xmm7, OWORD PTR [r15+144]
  2039. aesenc xmm5, xmm7
  2040. aesenc xmm1, xmm7
  2041. cmp r10d, 11
  2042. movdqa xmm7, OWORD PTR [r15+160]
  2043. jl L_AES_GCM_decrypt_aesni_calc_iv_12_last
  2044. aesenc xmm5, xmm7
  2045. aesenc xmm1, xmm7
  2046. movdqa xmm7, OWORD PTR [r15+176]
  2047. aesenc xmm5, xmm7
  2048. aesenc xmm1, xmm7
  2049. cmp r10d, 13
  2050. movdqa xmm7, OWORD PTR [r15+192]
  2051. jl L_AES_GCM_decrypt_aesni_calc_iv_12_last
  2052. aesenc xmm5, xmm7
  2053. aesenc xmm1, xmm7
  2054. movdqa xmm7, OWORD PTR [r15+208]
  2055. aesenc xmm5, xmm7
  2056. aesenc xmm1, xmm7
  2057. movdqa xmm7, OWORD PTR [r15+224]
  2058. L_AES_GCM_decrypt_aesni_calc_iv_12_last:
  2059. aesenclast xmm5, xmm7
  2060. aesenclast xmm1, xmm7
  2061. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  2062. movdqu [rsp+144], xmm1
  2063. jmp L_AES_GCM_decrypt_aesni_iv_done
  2064. L_AES_GCM_decrypt_aesni_iv_not_12:
  2065. ; Calculate values when IV is not 12 bytes
  2066. ; H = Encrypt X(=0)
  2067. movdqa xmm5, OWORD PTR [r15]
  2068. aesenc xmm5, [r15+16]
  2069. aesenc xmm5, [r15+32]
  2070. aesenc xmm5, [r15+48]
  2071. aesenc xmm5, [r15+64]
  2072. aesenc xmm5, [r15+80]
  2073. aesenc xmm5, [r15+96]
  2074. aesenc xmm5, [r15+112]
  2075. aesenc xmm5, [r15+128]
  2076. aesenc xmm5, [r15+144]
  2077. cmp r10d, 11
  2078. movdqa xmm9, OWORD PTR [r15+160]
  2079. jl L_AES_GCM_decrypt_aesni_calc_iv_1_aesenc_avx_last
  2080. aesenc xmm5, xmm9
  2081. aesenc xmm5, [r15+176]
  2082. cmp r10d, 13
  2083. movdqa xmm9, OWORD PTR [r15+192]
  2084. jl L_AES_GCM_decrypt_aesni_calc_iv_1_aesenc_avx_last
  2085. aesenc xmm5, xmm9
  2086. aesenc xmm5, [r15+208]
  2087. movdqa xmm9, OWORD PTR [r15+224]
  2088. L_AES_GCM_decrypt_aesni_calc_iv_1_aesenc_avx_last:
  2089. aesenclast xmm5, xmm9
  2090. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  2091. ; Calc counter
  2092. ; Initialization vector
  2093. cmp edx, 0
  2094. mov rcx, 0
  2095. je L_AES_GCM_decrypt_aesni_calc_iv_done
  2096. cmp edx, 16
  2097. jl L_AES_GCM_decrypt_aesni_calc_iv_lt16
  2098. and edx, 4294967280
  2099. L_AES_GCM_decrypt_aesni_calc_iv_16_loop:
  2100. movdqu xmm8, [rax+rcx]
  2101. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  2102. pxor xmm4, xmm8
  2103. pshufd xmm1, xmm4, 78
  2104. pshufd xmm2, xmm5, 78
  2105. movdqa xmm3, xmm5
  2106. movdqa xmm0, xmm5
  2107. pclmulqdq xmm3, xmm4, 17
  2108. pclmulqdq xmm0, xmm4, 0
  2109. pxor xmm1, xmm4
  2110. pxor xmm2, xmm5
  2111. pclmulqdq xmm1, xmm2, 0
  2112. pxor xmm1, xmm0
  2113. pxor xmm1, xmm3
  2114. movdqa xmm2, xmm1
  2115. movdqa xmm7, xmm0
  2116. movdqa xmm4, xmm3
  2117. pslldq xmm2, 8
  2118. psrldq xmm1, 8
  2119. pxor xmm7, xmm2
  2120. pxor xmm4, xmm1
  2121. movdqa xmm0, xmm7
  2122. movdqa xmm1, xmm4
  2123. psrld xmm0, 31
  2124. psrld xmm1, 31
  2125. pslld xmm7, 1
  2126. pslld xmm4, 1
  2127. movdqa xmm2, xmm0
  2128. pslldq xmm0, 4
  2129. psrldq xmm2, 12
  2130. pslldq xmm1, 4
  2131. por xmm4, xmm2
  2132. por xmm7, xmm0
  2133. por xmm4, xmm1
  2134. movdqa xmm0, xmm7
  2135. movdqa xmm1, xmm7
  2136. movdqa xmm2, xmm7
  2137. pslld xmm0, 31
  2138. pslld xmm1, 30
  2139. pslld xmm2, 25
  2140. pxor xmm0, xmm1
  2141. pxor xmm0, xmm2
  2142. movdqa xmm1, xmm0
  2143. psrldq xmm1, 4
  2144. pslldq xmm0, 12
  2145. pxor xmm7, xmm0
  2146. movdqa xmm2, xmm7
  2147. movdqa xmm3, xmm7
  2148. movdqa xmm0, xmm7
  2149. psrld xmm2, 1
  2150. psrld xmm3, 2
  2151. psrld xmm0, 7
  2152. pxor xmm2, xmm3
  2153. pxor xmm2, xmm0
  2154. pxor xmm2, xmm1
  2155. pxor xmm2, xmm7
  2156. pxor xmm4, xmm2
  2157. add ecx, 16
  2158. cmp ecx, edx
  2159. jl L_AES_GCM_decrypt_aesni_calc_iv_16_loop
  2160. mov edx, ebx
  2161. cmp ecx, edx
  2162. je L_AES_GCM_decrypt_aesni_calc_iv_done
  2163. L_AES_GCM_decrypt_aesni_calc_iv_lt16:
  2164. sub rsp, 16
  2165. pxor xmm8, xmm8
  2166. xor ebx, ebx
  2167. movdqu [rsp], xmm8
  2168. L_AES_GCM_decrypt_aesni_calc_iv_loop:
  2169. movzx r13d, BYTE PTR [rax+rcx]
  2170. mov BYTE PTR [rsp+rbx], r13b
  2171. inc ecx
  2172. inc ebx
  2173. cmp ecx, edx
  2174. jl L_AES_GCM_decrypt_aesni_calc_iv_loop
  2175. movdqu xmm8, [rsp]
  2176. add rsp, 16
  2177. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  2178. pxor xmm4, xmm8
  2179. pshufd xmm1, xmm4, 78
  2180. pshufd xmm2, xmm5, 78
  2181. movdqa xmm3, xmm5
  2182. movdqa xmm0, xmm5
  2183. pclmulqdq xmm3, xmm4, 17
  2184. pclmulqdq xmm0, xmm4, 0
  2185. pxor xmm1, xmm4
  2186. pxor xmm2, xmm5
  2187. pclmulqdq xmm1, xmm2, 0
  2188. pxor xmm1, xmm0
  2189. pxor xmm1, xmm3
  2190. movdqa xmm2, xmm1
  2191. movdqa xmm7, xmm0
  2192. movdqa xmm4, xmm3
  2193. pslldq xmm2, 8
  2194. psrldq xmm1, 8
  2195. pxor xmm7, xmm2
  2196. pxor xmm4, xmm1
  2197. movdqa xmm0, xmm7
  2198. movdqa xmm1, xmm4
  2199. psrld xmm0, 31
  2200. psrld xmm1, 31
  2201. pslld xmm7, 1
  2202. pslld xmm4, 1
  2203. movdqa xmm2, xmm0
  2204. pslldq xmm0, 4
  2205. psrldq xmm2, 12
  2206. pslldq xmm1, 4
  2207. por xmm4, xmm2
  2208. por xmm7, xmm0
  2209. por xmm4, xmm1
  2210. movdqa xmm0, xmm7
  2211. movdqa xmm1, xmm7
  2212. movdqa xmm2, xmm7
  2213. pslld xmm0, 31
  2214. pslld xmm1, 30
  2215. pslld xmm2, 25
  2216. pxor xmm0, xmm1
  2217. pxor xmm0, xmm2
  2218. movdqa xmm1, xmm0
  2219. psrldq xmm1, 4
  2220. pslldq xmm0, 12
  2221. pxor xmm7, xmm0
  2222. movdqa xmm2, xmm7
  2223. movdqa xmm3, xmm7
  2224. movdqa xmm0, xmm7
  2225. psrld xmm2, 1
  2226. psrld xmm3, 2
  2227. psrld xmm0, 7
  2228. pxor xmm2, xmm3
  2229. pxor xmm2, xmm0
  2230. pxor xmm2, xmm1
  2231. pxor xmm2, xmm7
  2232. pxor xmm4, xmm2
  2233. L_AES_GCM_decrypt_aesni_calc_iv_done:
  2234. ; T = Encrypt counter
  2235. pxor xmm0, xmm0
  2236. shl edx, 3
  2237. pinsrq xmm0, rdx, 0
  2238. pxor xmm4, xmm0
  2239. pshufd xmm1, xmm4, 78
  2240. pshufd xmm2, xmm5, 78
  2241. movdqa xmm3, xmm5
  2242. movdqa xmm0, xmm5
  2243. pclmulqdq xmm3, xmm4, 17
  2244. pclmulqdq xmm0, xmm4, 0
  2245. pxor xmm1, xmm4
  2246. pxor xmm2, xmm5
  2247. pclmulqdq xmm1, xmm2, 0
  2248. pxor xmm1, xmm0
  2249. pxor xmm1, xmm3
  2250. movdqa xmm2, xmm1
  2251. movdqa xmm7, xmm0
  2252. movdqa xmm4, xmm3
  2253. pslldq xmm2, 8
  2254. psrldq xmm1, 8
  2255. pxor xmm7, xmm2
  2256. pxor xmm4, xmm1
  2257. movdqa xmm0, xmm7
  2258. movdqa xmm1, xmm4
  2259. psrld xmm0, 31
  2260. psrld xmm1, 31
  2261. pslld xmm7, 1
  2262. pslld xmm4, 1
  2263. movdqa xmm2, xmm0
  2264. pslldq xmm0, 4
  2265. psrldq xmm2, 12
  2266. pslldq xmm1, 4
  2267. por xmm4, xmm2
  2268. por xmm7, xmm0
  2269. por xmm4, xmm1
  2270. movdqa xmm0, xmm7
  2271. movdqa xmm1, xmm7
  2272. movdqa xmm2, xmm7
  2273. pslld xmm0, 31
  2274. pslld xmm1, 30
  2275. pslld xmm2, 25
  2276. pxor xmm0, xmm1
  2277. pxor xmm0, xmm2
  2278. movdqa xmm1, xmm0
  2279. psrldq xmm1, 4
  2280. pslldq xmm0, 12
  2281. pxor xmm7, xmm0
  2282. movdqa xmm2, xmm7
  2283. movdqa xmm3, xmm7
  2284. movdqa xmm0, xmm7
  2285. psrld xmm2, 1
  2286. psrld xmm3, 2
  2287. psrld xmm0, 7
  2288. pxor xmm2, xmm3
  2289. pxor xmm2, xmm0
  2290. pxor xmm2, xmm1
  2291. pxor xmm2, xmm7
  2292. pxor xmm4, xmm2
  2293. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  2294. ; Encrypt counter
  2295. movdqa xmm8, OWORD PTR [r15]
  2296. pxor xmm8, xmm4
  2297. aesenc xmm8, [r15+16]
  2298. aesenc xmm8, [r15+32]
  2299. aesenc xmm8, [r15+48]
  2300. aesenc xmm8, [r15+64]
  2301. aesenc xmm8, [r15+80]
  2302. aesenc xmm8, [r15+96]
  2303. aesenc xmm8, [r15+112]
  2304. aesenc xmm8, [r15+128]
  2305. aesenc xmm8, [r15+144]
  2306. cmp r10d, 11
  2307. movdqa xmm9, OWORD PTR [r15+160]
  2308. jl L_AES_GCM_decrypt_aesni_calc_iv_2_aesenc_avx_last
  2309. aesenc xmm8, xmm9
  2310. aesenc xmm8, [r15+176]
  2311. cmp r10d, 13
  2312. movdqa xmm9, OWORD PTR [r15+192]
  2313. jl L_AES_GCM_decrypt_aesni_calc_iv_2_aesenc_avx_last
  2314. aesenc xmm8, xmm9
  2315. aesenc xmm8, [r15+208]
  2316. movdqa xmm9, OWORD PTR [r15+224]
  2317. L_AES_GCM_decrypt_aesni_calc_iv_2_aesenc_avx_last:
  2318. aesenclast xmm8, xmm9
  2319. movdqu [rsp+144], xmm8
  2320. L_AES_GCM_decrypt_aesni_iv_done:
  2321. ; Additional authentication data
  2322. mov edx, r11d
  2323. cmp edx, 0
  2324. je L_AES_GCM_decrypt_aesni_calc_aad_done
  2325. xor ecx, ecx
  2326. cmp edx, 16
  2327. jl L_AES_GCM_decrypt_aesni_calc_aad_lt16
  2328. and edx, 4294967280
  2329. L_AES_GCM_decrypt_aesni_calc_aad_16_loop:
  2330. movdqu xmm8, [r12+rcx]
  2331. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  2332. pxor xmm6, xmm8
  2333. pshufd xmm1, xmm6, 78
  2334. pshufd xmm2, xmm5, 78
  2335. movdqa xmm3, xmm5
  2336. movdqa xmm0, xmm5
  2337. pclmulqdq xmm3, xmm6, 17
  2338. pclmulqdq xmm0, xmm6, 0
  2339. pxor xmm1, xmm6
  2340. pxor xmm2, xmm5
  2341. pclmulqdq xmm1, xmm2, 0
  2342. pxor xmm1, xmm0
  2343. pxor xmm1, xmm3
  2344. movdqa xmm2, xmm1
  2345. movdqa xmm7, xmm0
  2346. movdqa xmm6, xmm3
  2347. pslldq xmm2, 8
  2348. psrldq xmm1, 8
  2349. pxor xmm7, xmm2
  2350. pxor xmm6, xmm1
  2351. movdqa xmm0, xmm7
  2352. movdqa xmm1, xmm6
  2353. psrld xmm0, 31
  2354. psrld xmm1, 31
  2355. pslld xmm7, 1
  2356. pslld xmm6, 1
  2357. movdqa xmm2, xmm0
  2358. pslldq xmm0, 4
  2359. psrldq xmm2, 12
  2360. pslldq xmm1, 4
  2361. por xmm6, xmm2
  2362. por xmm7, xmm0
  2363. por xmm6, xmm1
  2364. movdqa xmm0, xmm7
  2365. movdqa xmm1, xmm7
  2366. movdqa xmm2, xmm7
  2367. pslld xmm0, 31
  2368. pslld xmm1, 30
  2369. pslld xmm2, 25
  2370. pxor xmm0, xmm1
  2371. pxor xmm0, xmm2
  2372. movdqa xmm1, xmm0
  2373. psrldq xmm1, 4
  2374. pslldq xmm0, 12
  2375. pxor xmm7, xmm0
  2376. movdqa xmm2, xmm7
  2377. movdqa xmm3, xmm7
  2378. movdqa xmm0, xmm7
  2379. psrld xmm2, 1
  2380. psrld xmm3, 2
  2381. psrld xmm0, 7
  2382. pxor xmm2, xmm3
  2383. pxor xmm2, xmm0
  2384. pxor xmm2, xmm1
  2385. pxor xmm2, xmm7
  2386. pxor xmm6, xmm2
  2387. add ecx, 16
  2388. cmp ecx, edx
  2389. jl L_AES_GCM_decrypt_aesni_calc_aad_16_loop
  2390. mov edx, r11d
  2391. cmp ecx, edx
  2392. je L_AES_GCM_decrypt_aesni_calc_aad_done
  2393. L_AES_GCM_decrypt_aesni_calc_aad_lt16:
  2394. sub rsp, 16
  2395. pxor xmm8, xmm8
  2396. xor ebx, ebx
  2397. movdqu [rsp], xmm8
  2398. L_AES_GCM_decrypt_aesni_calc_aad_loop:
  2399. movzx r13d, BYTE PTR [r12+rcx]
  2400. mov BYTE PTR [rsp+rbx], r13b
  2401. inc ecx
  2402. inc ebx
  2403. cmp ecx, edx
  2404. jl L_AES_GCM_decrypt_aesni_calc_aad_loop
  2405. movdqu xmm8, [rsp]
  2406. add rsp, 16
  2407. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  2408. pxor xmm6, xmm8
  2409. pshufd xmm1, xmm6, 78
  2410. pshufd xmm2, xmm5, 78
  2411. movdqa xmm3, xmm5
  2412. movdqa xmm0, xmm5
  2413. pclmulqdq xmm3, xmm6, 17
  2414. pclmulqdq xmm0, xmm6, 0
  2415. pxor xmm1, xmm6
  2416. pxor xmm2, xmm5
  2417. pclmulqdq xmm1, xmm2, 0
  2418. pxor xmm1, xmm0
  2419. pxor xmm1, xmm3
  2420. movdqa xmm2, xmm1
  2421. movdqa xmm7, xmm0
  2422. movdqa xmm6, xmm3
  2423. pslldq xmm2, 8
  2424. psrldq xmm1, 8
  2425. pxor xmm7, xmm2
  2426. pxor xmm6, xmm1
  2427. movdqa xmm0, xmm7
  2428. movdqa xmm1, xmm6
  2429. psrld xmm0, 31
  2430. psrld xmm1, 31
  2431. pslld xmm7, 1
  2432. pslld xmm6, 1
  2433. movdqa xmm2, xmm0
  2434. pslldq xmm0, 4
  2435. psrldq xmm2, 12
  2436. pslldq xmm1, 4
  2437. por xmm6, xmm2
  2438. por xmm7, xmm0
  2439. por xmm6, xmm1
  2440. movdqa xmm0, xmm7
  2441. movdqa xmm1, xmm7
  2442. movdqa xmm2, xmm7
  2443. pslld xmm0, 31
  2444. pslld xmm1, 30
  2445. pslld xmm2, 25
  2446. pxor xmm0, xmm1
  2447. pxor xmm0, xmm2
  2448. movdqa xmm1, xmm0
  2449. psrldq xmm1, 4
  2450. pslldq xmm0, 12
  2451. pxor xmm7, xmm0
  2452. movdqa xmm2, xmm7
  2453. movdqa xmm3, xmm7
  2454. movdqa xmm0, xmm7
  2455. psrld xmm2, 1
  2456. psrld xmm3, 2
  2457. psrld xmm0, 7
  2458. pxor xmm2, xmm3
  2459. pxor xmm2, xmm0
  2460. pxor xmm2, xmm1
  2461. pxor xmm2, xmm7
  2462. pxor xmm6, xmm2
  2463. L_AES_GCM_decrypt_aesni_calc_aad_done:
  2464. ; Calculate counter and H
  2465. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
  2466. movdqa xmm9, xmm5
  2467. paddd xmm4, OWORD PTR L_aes_gcm_one
  2468. movdqa xmm8, xmm5
  2469. movdqu [rsp+128], xmm4
  2470. psrlq xmm9, 63
  2471. psllq xmm8, 1
  2472. pslldq xmm9, 8
  2473. por xmm8, xmm9
  2474. pshufd xmm5, xmm5, 255
  2475. psrad xmm5, 31
  2476. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  2477. pxor xmm5, xmm8
  2478. xor ebx, ebx
  2479. cmp r9d, 128
  2480. mov r13d, r9d
  2481. jl L_AES_GCM_decrypt_aesni_done_128
  2482. and r13d, 4294967168
  2483. movdqa xmm2, xmm6
  2484. ; H ^ 1
  2485. movdqu [rsp], xmm5
  2486. ; H ^ 2
  2487. pshufd xmm9, xmm5, 78
  2488. pshufd xmm10, xmm5, 78
  2489. movdqa xmm11, xmm5
  2490. movdqa xmm8, xmm5
  2491. pclmulqdq xmm11, xmm5, 17
  2492. pclmulqdq xmm8, xmm5, 0
  2493. pxor xmm9, xmm5
  2494. pxor xmm10, xmm5
  2495. pclmulqdq xmm9, xmm10, 0
  2496. pxor xmm9, xmm8
  2497. pxor xmm9, xmm11
  2498. movdqa xmm10, xmm9
  2499. movdqa xmm0, xmm11
  2500. pslldq xmm10, 8
  2501. psrldq xmm9, 8
  2502. pxor xmm8, xmm10
  2503. pxor xmm0, xmm9
  2504. movdqa xmm12, xmm8
  2505. movdqa xmm13, xmm8
  2506. movdqa xmm14, xmm8
  2507. pslld xmm12, 31
  2508. pslld xmm13, 30
  2509. pslld xmm14, 25
  2510. pxor xmm12, xmm13
  2511. pxor xmm12, xmm14
  2512. movdqa xmm13, xmm12
  2513. psrldq xmm13, 4
  2514. pslldq xmm12, 12
  2515. pxor xmm8, xmm12
  2516. movdqa xmm14, xmm8
  2517. movdqa xmm10, xmm8
  2518. movdqa xmm9, xmm8
  2519. psrld xmm14, 1
  2520. psrld xmm10, 2
  2521. psrld xmm9, 7
  2522. pxor xmm14, xmm10
  2523. pxor xmm14, xmm9
  2524. pxor xmm14, xmm13
  2525. pxor xmm14, xmm8
  2526. pxor xmm0, xmm14
  2527. movdqu [rsp+16], xmm0
  2528. ; H ^ 3
  2529. pshufd xmm9, xmm5, 78
  2530. pshufd xmm10, xmm0, 78
  2531. movdqa xmm11, xmm0
  2532. movdqa xmm8, xmm0
  2533. pclmulqdq xmm11, xmm5, 17
  2534. pclmulqdq xmm8, xmm5, 0
  2535. pxor xmm9, xmm5
  2536. pxor xmm10, xmm0
  2537. pclmulqdq xmm9, xmm10, 0
  2538. pxor xmm9, xmm8
  2539. pxor xmm9, xmm11
  2540. movdqa xmm10, xmm9
  2541. movdqa xmm1, xmm11
  2542. pslldq xmm10, 8
  2543. psrldq xmm9, 8
  2544. pxor xmm8, xmm10
  2545. pxor xmm1, xmm9
  2546. movdqa xmm12, xmm8
  2547. movdqa xmm13, xmm8
  2548. movdqa xmm14, xmm8
  2549. pslld xmm12, 31
  2550. pslld xmm13, 30
  2551. pslld xmm14, 25
  2552. pxor xmm12, xmm13
  2553. pxor xmm12, xmm14
  2554. movdqa xmm13, xmm12
  2555. psrldq xmm13, 4
  2556. pslldq xmm12, 12
  2557. pxor xmm8, xmm12
  2558. movdqa xmm14, xmm8
  2559. movdqa xmm10, xmm8
  2560. movdqa xmm9, xmm8
  2561. psrld xmm14, 1
  2562. psrld xmm10, 2
  2563. psrld xmm9, 7
  2564. pxor xmm14, xmm10
  2565. pxor xmm14, xmm9
  2566. pxor xmm14, xmm13
  2567. pxor xmm14, xmm8
  2568. pxor xmm1, xmm14
  2569. movdqu [rsp+32], xmm1
  2570. ; H ^ 4
  2571. pshufd xmm9, xmm0, 78
  2572. pshufd xmm10, xmm0, 78
  2573. movdqa xmm11, xmm0
  2574. movdqa xmm8, xmm0
  2575. pclmulqdq xmm11, xmm0, 17
  2576. pclmulqdq xmm8, xmm0, 0
  2577. pxor xmm9, xmm0
  2578. pxor xmm10, xmm0
  2579. pclmulqdq xmm9, xmm10, 0
  2580. pxor xmm9, xmm8
  2581. pxor xmm9, xmm11
  2582. movdqa xmm10, xmm9
  2583. movdqa xmm3, xmm11
  2584. pslldq xmm10, 8
  2585. psrldq xmm9, 8
  2586. pxor xmm8, xmm10
  2587. pxor xmm3, xmm9
  2588. movdqa xmm12, xmm8
  2589. movdqa xmm13, xmm8
  2590. movdqa xmm14, xmm8
  2591. pslld xmm12, 31
  2592. pslld xmm13, 30
  2593. pslld xmm14, 25
  2594. pxor xmm12, xmm13
  2595. pxor xmm12, xmm14
  2596. movdqa xmm13, xmm12
  2597. psrldq xmm13, 4
  2598. pslldq xmm12, 12
  2599. pxor xmm8, xmm12
  2600. movdqa xmm14, xmm8
  2601. movdqa xmm10, xmm8
  2602. movdqa xmm9, xmm8
  2603. psrld xmm14, 1
  2604. psrld xmm10, 2
  2605. psrld xmm9, 7
  2606. pxor xmm14, xmm10
  2607. pxor xmm14, xmm9
  2608. pxor xmm14, xmm13
  2609. pxor xmm14, xmm8
  2610. pxor xmm3, xmm14
  2611. movdqu [rsp+48], xmm3
  2612. ; H ^ 5
  2613. pshufd xmm9, xmm0, 78
  2614. pshufd xmm10, xmm1, 78
  2615. movdqa xmm11, xmm1
  2616. movdqa xmm8, xmm1
  2617. pclmulqdq xmm11, xmm0, 17
  2618. pclmulqdq xmm8, xmm0, 0
  2619. pxor xmm9, xmm0
  2620. pxor xmm10, xmm1
  2621. pclmulqdq xmm9, xmm10, 0
  2622. pxor xmm9, xmm8
  2623. pxor xmm9, xmm11
  2624. movdqa xmm10, xmm9
  2625. movdqa xmm7, xmm11
  2626. pslldq xmm10, 8
  2627. psrldq xmm9, 8
  2628. pxor xmm8, xmm10
  2629. pxor xmm7, xmm9
  2630. movdqa xmm12, xmm8
  2631. movdqa xmm13, xmm8
  2632. movdqa xmm14, xmm8
  2633. pslld xmm12, 31
  2634. pslld xmm13, 30
  2635. pslld xmm14, 25
  2636. pxor xmm12, xmm13
  2637. pxor xmm12, xmm14
  2638. movdqa xmm13, xmm12
  2639. psrldq xmm13, 4
  2640. pslldq xmm12, 12
  2641. pxor xmm8, xmm12
  2642. movdqa xmm14, xmm8
  2643. movdqa xmm10, xmm8
  2644. movdqa xmm9, xmm8
  2645. psrld xmm14, 1
  2646. psrld xmm10, 2
  2647. psrld xmm9, 7
  2648. pxor xmm14, xmm10
  2649. pxor xmm14, xmm9
  2650. pxor xmm14, xmm13
  2651. pxor xmm14, xmm8
  2652. pxor xmm7, xmm14
  2653. movdqu [rsp+64], xmm7
  2654. ; H ^ 6
  2655. pshufd xmm9, xmm1, 78
  2656. pshufd xmm10, xmm1, 78
  2657. movdqa xmm11, xmm1
  2658. movdqa xmm8, xmm1
  2659. pclmulqdq xmm11, xmm1, 17
  2660. pclmulqdq xmm8, xmm1, 0
  2661. pxor xmm9, xmm1
  2662. pxor xmm10, xmm1
  2663. pclmulqdq xmm9, xmm10, 0
  2664. pxor xmm9, xmm8
  2665. pxor xmm9, xmm11
  2666. movdqa xmm10, xmm9
  2667. movdqa xmm7, xmm11
  2668. pslldq xmm10, 8
  2669. psrldq xmm9, 8
  2670. pxor xmm8, xmm10
  2671. pxor xmm7, xmm9
  2672. movdqa xmm12, xmm8
  2673. movdqa xmm13, xmm8
  2674. movdqa xmm14, xmm8
  2675. pslld xmm12, 31
  2676. pslld xmm13, 30
  2677. pslld xmm14, 25
  2678. pxor xmm12, xmm13
  2679. pxor xmm12, xmm14
  2680. movdqa xmm13, xmm12
  2681. psrldq xmm13, 4
  2682. pslldq xmm12, 12
  2683. pxor xmm8, xmm12
  2684. movdqa xmm14, xmm8
  2685. movdqa xmm10, xmm8
  2686. movdqa xmm9, xmm8
  2687. psrld xmm14, 1
  2688. psrld xmm10, 2
  2689. psrld xmm9, 7
  2690. pxor xmm14, xmm10
  2691. pxor xmm14, xmm9
  2692. pxor xmm14, xmm13
  2693. pxor xmm14, xmm8
  2694. pxor xmm7, xmm14
  2695. movdqu [rsp+80], xmm7
  2696. ; H ^ 7
  2697. pshufd xmm9, xmm1, 78
  2698. pshufd xmm10, xmm3, 78
  2699. movdqa xmm11, xmm3
  2700. movdqa xmm8, xmm3
  2701. pclmulqdq xmm11, xmm1, 17
  2702. pclmulqdq xmm8, xmm1, 0
  2703. pxor xmm9, xmm1
  2704. pxor xmm10, xmm3
  2705. pclmulqdq xmm9, xmm10, 0
  2706. pxor xmm9, xmm8
  2707. pxor xmm9, xmm11
  2708. movdqa xmm10, xmm9
  2709. movdqa xmm7, xmm11
  2710. pslldq xmm10, 8
  2711. psrldq xmm9, 8
  2712. pxor xmm8, xmm10
  2713. pxor xmm7, xmm9
  2714. movdqa xmm12, xmm8
  2715. movdqa xmm13, xmm8
  2716. movdqa xmm14, xmm8
  2717. pslld xmm12, 31
  2718. pslld xmm13, 30
  2719. pslld xmm14, 25
  2720. pxor xmm12, xmm13
  2721. pxor xmm12, xmm14
  2722. movdqa xmm13, xmm12
  2723. psrldq xmm13, 4
  2724. pslldq xmm12, 12
  2725. pxor xmm8, xmm12
  2726. movdqa xmm14, xmm8
  2727. movdqa xmm10, xmm8
  2728. movdqa xmm9, xmm8
  2729. psrld xmm14, 1
  2730. psrld xmm10, 2
  2731. psrld xmm9, 7
  2732. pxor xmm14, xmm10
  2733. pxor xmm14, xmm9
  2734. pxor xmm14, xmm13
  2735. pxor xmm14, xmm8
  2736. pxor xmm7, xmm14
  2737. movdqu [rsp+96], xmm7
  2738. ; H ^ 8
  2739. pshufd xmm9, xmm3, 78
  2740. pshufd xmm10, xmm3, 78
  2741. movdqa xmm11, xmm3
  2742. movdqa xmm8, xmm3
  2743. pclmulqdq xmm11, xmm3, 17
  2744. pclmulqdq xmm8, xmm3, 0
  2745. pxor xmm9, xmm3
  2746. pxor xmm10, xmm3
  2747. pclmulqdq xmm9, xmm10, 0
  2748. pxor xmm9, xmm8
  2749. pxor xmm9, xmm11
  2750. movdqa xmm10, xmm9
  2751. movdqa xmm7, xmm11
  2752. pslldq xmm10, 8
  2753. psrldq xmm9, 8
  2754. pxor xmm8, xmm10
  2755. pxor xmm7, xmm9
  2756. movdqa xmm12, xmm8
  2757. movdqa xmm13, xmm8
  2758. movdqa xmm14, xmm8
  2759. pslld xmm12, 31
  2760. pslld xmm13, 30
  2761. pslld xmm14, 25
  2762. pxor xmm12, xmm13
  2763. pxor xmm12, xmm14
  2764. movdqa xmm13, xmm12
  2765. psrldq xmm13, 4
  2766. pslldq xmm12, 12
  2767. pxor xmm8, xmm12
  2768. movdqa xmm14, xmm8
  2769. movdqa xmm10, xmm8
  2770. movdqa xmm9, xmm8
  2771. psrld xmm14, 1
  2772. psrld xmm10, 2
  2773. psrld xmm9, 7
  2774. pxor xmm14, xmm10
  2775. pxor xmm14, xmm9
  2776. pxor xmm14, xmm13
  2777. pxor xmm14, xmm8
  2778. pxor xmm7, xmm14
  2779. movdqu [rsp+112], xmm7
  2780. L_AES_GCM_decrypt_aesni_ghash_128:
  2781. lea rcx, QWORD PTR [rdi+rbx]
  2782. lea rdx, QWORD PTR [rsi+rbx]
  2783. movdqu xmm8, [rsp+128]
  2784. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  2785. movdqa xmm0, xmm8
  2786. pshufb xmm8, xmm1
  2787. movdqa xmm9, xmm0
  2788. paddd xmm9, OWORD PTR L_aes_gcm_one
  2789. pshufb xmm9, xmm1
  2790. movdqa xmm10, xmm0
  2791. paddd xmm10, OWORD PTR L_aes_gcm_two
  2792. pshufb xmm10, xmm1
  2793. movdqa xmm11, xmm0
  2794. paddd xmm11, OWORD PTR L_aes_gcm_three
  2795. pshufb xmm11, xmm1
  2796. movdqa xmm12, xmm0
  2797. paddd xmm12, OWORD PTR L_aes_gcm_four
  2798. pshufb xmm12, xmm1
  2799. movdqa xmm13, xmm0
  2800. paddd xmm13, OWORD PTR L_aes_gcm_five
  2801. pshufb xmm13, xmm1
  2802. movdqa xmm14, xmm0
  2803. paddd xmm14, OWORD PTR L_aes_gcm_six
  2804. pshufb xmm14, xmm1
  2805. movdqa xmm15, xmm0
  2806. paddd xmm15, OWORD PTR L_aes_gcm_seven
  2807. pshufb xmm15, xmm1
  2808. paddd xmm0, OWORD PTR L_aes_gcm_eight
  2809. movdqa xmm7, OWORD PTR [r15]
  2810. movdqu [rsp+128], xmm0
  2811. pxor xmm8, xmm7
  2812. pxor xmm9, xmm7
  2813. pxor xmm10, xmm7
  2814. pxor xmm11, xmm7
  2815. pxor xmm12, xmm7
  2816. pxor xmm13, xmm7
  2817. pxor xmm14, xmm7
  2818. pxor xmm15, xmm7
  2819. movdqu xmm7, [rsp+112]
  2820. movdqu xmm0, [rcx]
  2821. aesenc xmm8, [r15+16]
  2822. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2823. pxor xmm0, xmm2
  2824. pshufd xmm1, xmm7, 78
  2825. pshufd xmm5, xmm0, 78
  2826. pxor xmm1, xmm7
  2827. pxor xmm5, xmm0
  2828. movdqa xmm3, xmm0
  2829. pclmulqdq xmm3, xmm7, 17
  2830. aesenc xmm9, [r15+16]
  2831. aesenc xmm10, [r15+16]
  2832. movdqa xmm2, xmm0
  2833. pclmulqdq xmm2, xmm7, 0
  2834. aesenc xmm11, [r15+16]
  2835. aesenc xmm12, [r15+16]
  2836. pclmulqdq xmm1, xmm5, 0
  2837. aesenc xmm13, [r15+16]
  2838. aesenc xmm14, [r15+16]
  2839. aesenc xmm15, [r15+16]
  2840. pxor xmm1, xmm2
  2841. pxor xmm1, xmm3
  2842. movdqu xmm7, [rsp+96]
  2843. movdqu xmm0, [rcx+16]
  2844. pshufd xmm4, xmm7, 78
  2845. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2846. aesenc xmm8, [r15+32]
  2847. pxor xmm4, xmm7
  2848. pshufd xmm5, xmm0, 78
  2849. pxor xmm5, xmm0
  2850. movdqa xmm6, xmm0
  2851. pclmulqdq xmm6, xmm7, 17
  2852. aesenc xmm9, [r15+32]
  2853. aesenc xmm10, [r15+32]
  2854. pclmulqdq xmm7, xmm0, 0
  2855. aesenc xmm11, [r15+32]
  2856. aesenc xmm12, [r15+32]
  2857. pclmulqdq xmm4, xmm5, 0
  2858. aesenc xmm13, [r15+32]
  2859. aesenc xmm14, [r15+32]
  2860. aesenc xmm15, [r15+32]
  2861. pxor xmm1, xmm7
  2862. pxor xmm2, xmm7
  2863. pxor xmm1, xmm6
  2864. pxor xmm3, xmm6
  2865. pxor xmm1, xmm4
  2866. movdqu xmm7, [rsp+80]
  2867. movdqu xmm0, [rcx+32]
  2868. pshufd xmm4, xmm7, 78
  2869. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2870. aesenc xmm8, [r15+48]
  2871. pxor xmm4, xmm7
  2872. pshufd xmm5, xmm0, 78
  2873. pxor xmm5, xmm0
  2874. movdqa xmm6, xmm0
  2875. pclmulqdq xmm6, xmm7, 17
  2876. aesenc xmm9, [r15+48]
  2877. aesenc xmm10, [r15+48]
  2878. pclmulqdq xmm7, xmm0, 0
  2879. aesenc xmm11, [r15+48]
  2880. aesenc xmm12, [r15+48]
  2881. pclmulqdq xmm4, xmm5, 0
  2882. aesenc xmm13, [r15+48]
  2883. aesenc xmm14, [r15+48]
  2884. aesenc xmm15, [r15+48]
  2885. pxor xmm1, xmm7
  2886. pxor xmm2, xmm7
  2887. pxor xmm1, xmm6
  2888. pxor xmm3, xmm6
  2889. pxor xmm1, xmm4
  2890. movdqu xmm7, [rsp+64]
  2891. movdqu xmm0, [rcx+48]
  2892. pshufd xmm4, xmm7, 78
  2893. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2894. aesenc xmm8, [r15+64]
  2895. pxor xmm4, xmm7
  2896. pshufd xmm5, xmm0, 78
  2897. pxor xmm5, xmm0
  2898. movdqa xmm6, xmm0
  2899. pclmulqdq xmm6, xmm7, 17
  2900. aesenc xmm9, [r15+64]
  2901. aesenc xmm10, [r15+64]
  2902. pclmulqdq xmm7, xmm0, 0
  2903. aesenc xmm11, [r15+64]
  2904. aesenc xmm12, [r15+64]
  2905. pclmulqdq xmm4, xmm5, 0
  2906. aesenc xmm13, [r15+64]
  2907. aesenc xmm14, [r15+64]
  2908. aesenc xmm15, [r15+64]
  2909. pxor xmm1, xmm7
  2910. pxor xmm2, xmm7
  2911. pxor xmm1, xmm6
  2912. pxor xmm3, xmm6
  2913. pxor xmm1, xmm4
  2914. movdqu xmm7, [rsp+48]
  2915. movdqu xmm0, [rcx+64]
  2916. pshufd xmm4, xmm7, 78
  2917. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2918. aesenc xmm8, [r15+80]
  2919. pxor xmm4, xmm7
  2920. pshufd xmm5, xmm0, 78
  2921. pxor xmm5, xmm0
  2922. movdqa xmm6, xmm0
  2923. pclmulqdq xmm6, xmm7, 17
  2924. aesenc xmm9, [r15+80]
  2925. aesenc xmm10, [r15+80]
  2926. pclmulqdq xmm7, xmm0, 0
  2927. aesenc xmm11, [r15+80]
  2928. aesenc xmm12, [r15+80]
  2929. pclmulqdq xmm4, xmm5, 0
  2930. aesenc xmm13, [r15+80]
  2931. aesenc xmm14, [r15+80]
  2932. aesenc xmm15, [r15+80]
  2933. pxor xmm1, xmm7
  2934. pxor xmm2, xmm7
  2935. pxor xmm1, xmm6
  2936. pxor xmm3, xmm6
  2937. pxor xmm1, xmm4
  2938. movdqu xmm7, [rsp+32]
  2939. movdqu xmm0, [rcx+80]
  2940. pshufd xmm4, xmm7, 78
  2941. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2942. aesenc xmm8, [r15+96]
  2943. pxor xmm4, xmm7
  2944. pshufd xmm5, xmm0, 78
  2945. pxor xmm5, xmm0
  2946. movdqa xmm6, xmm0
  2947. pclmulqdq xmm6, xmm7, 17
  2948. aesenc xmm9, [r15+96]
  2949. aesenc xmm10, [r15+96]
  2950. pclmulqdq xmm7, xmm0, 0
  2951. aesenc xmm11, [r15+96]
  2952. aesenc xmm12, [r15+96]
  2953. pclmulqdq xmm4, xmm5, 0
  2954. aesenc xmm13, [r15+96]
  2955. aesenc xmm14, [r15+96]
  2956. aesenc xmm15, [r15+96]
  2957. pxor xmm1, xmm7
  2958. pxor xmm2, xmm7
  2959. pxor xmm1, xmm6
  2960. pxor xmm3, xmm6
  2961. pxor xmm1, xmm4
  2962. movdqu xmm7, [rsp+16]
  2963. movdqu xmm0, [rcx+96]
  2964. pshufd xmm4, xmm7, 78
  2965. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2966. aesenc xmm8, [r15+112]
  2967. pxor xmm4, xmm7
  2968. pshufd xmm5, xmm0, 78
  2969. pxor xmm5, xmm0
  2970. movdqa xmm6, xmm0
  2971. pclmulqdq xmm6, xmm7, 17
  2972. aesenc xmm9, [r15+112]
  2973. aesenc xmm10, [r15+112]
  2974. pclmulqdq xmm7, xmm0, 0
  2975. aesenc xmm11, [r15+112]
  2976. aesenc xmm12, [r15+112]
  2977. pclmulqdq xmm4, xmm5, 0
  2978. aesenc xmm13, [r15+112]
  2979. aesenc xmm14, [r15+112]
  2980. aesenc xmm15, [r15+112]
  2981. pxor xmm1, xmm7
  2982. pxor xmm2, xmm7
  2983. pxor xmm1, xmm6
  2984. pxor xmm3, xmm6
  2985. pxor xmm1, xmm4
  2986. movdqu xmm7, [rsp]
  2987. movdqu xmm0, [rcx+112]
  2988. pshufd xmm4, xmm7, 78
  2989. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2990. aesenc xmm8, [r15+128]
  2991. pxor xmm4, xmm7
  2992. pshufd xmm5, xmm0, 78
  2993. pxor xmm5, xmm0
  2994. movdqa xmm6, xmm0
  2995. pclmulqdq xmm6, xmm7, 17
  2996. aesenc xmm9, [r15+128]
  2997. aesenc xmm10, [r15+128]
  2998. pclmulqdq xmm7, xmm0, 0
  2999. aesenc xmm11, [r15+128]
  3000. aesenc xmm12, [r15+128]
  3001. pclmulqdq xmm4, xmm5, 0
  3002. aesenc xmm13, [r15+128]
  3003. aesenc xmm14, [r15+128]
  3004. aesenc xmm15, [r15+128]
  3005. pxor xmm1, xmm7
  3006. pxor xmm2, xmm7
  3007. pxor xmm1, xmm6
  3008. pxor xmm3, xmm6
  3009. pxor xmm1, xmm4
  3010. movdqa xmm5, xmm1
  3011. psrldq xmm1, 8
  3012. pslldq xmm5, 8
  3013. aesenc xmm8, [r15+144]
  3014. pxor xmm2, xmm5
  3015. pxor xmm3, xmm1
  3016. movdqa xmm7, xmm2
  3017. movdqa xmm4, xmm2
  3018. movdqa xmm5, xmm2
  3019. aesenc xmm9, [r15+144]
  3020. pslld xmm7, 31
  3021. pslld xmm4, 30
  3022. pslld xmm5, 25
  3023. aesenc xmm10, [r15+144]
  3024. pxor xmm7, xmm4
  3025. pxor xmm7, xmm5
  3026. aesenc xmm11, [r15+144]
  3027. movdqa xmm4, xmm7
  3028. pslldq xmm7, 12
  3029. psrldq xmm4, 4
  3030. aesenc xmm12, [r15+144]
  3031. pxor xmm2, xmm7
  3032. movdqa xmm5, xmm2
  3033. movdqa xmm1, xmm2
  3034. movdqa xmm0, xmm2
  3035. aesenc xmm13, [r15+144]
  3036. psrld xmm5, 1
  3037. psrld xmm1, 2
  3038. psrld xmm0, 7
  3039. aesenc xmm14, [r15+144]
  3040. pxor xmm5, xmm1
  3041. pxor xmm5, xmm0
  3042. aesenc xmm15, [r15+144]
  3043. pxor xmm5, xmm4
  3044. pxor xmm2, xmm5
  3045. pxor xmm2, xmm3
  3046. cmp r10d, 11
  3047. movdqa xmm7, OWORD PTR [r15+160]
  3048. jl L_AES_GCM_decrypt_aesni_aesenc_128_ghash_avx_done
  3049. aesenc xmm8, xmm7
  3050. aesenc xmm9, xmm7
  3051. aesenc xmm10, xmm7
  3052. aesenc xmm11, xmm7
  3053. aesenc xmm12, xmm7
  3054. aesenc xmm13, xmm7
  3055. aesenc xmm14, xmm7
  3056. aesenc xmm15, xmm7
  3057. movdqa xmm7, OWORD PTR [r15+176]
  3058. aesenc xmm8, xmm7
  3059. aesenc xmm9, xmm7
  3060. aesenc xmm10, xmm7
  3061. aesenc xmm11, xmm7
  3062. aesenc xmm12, xmm7
  3063. aesenc xmm13, xmm7
  3064. aesenc xmm14, xmm7
  3065. aesenc xmm15, xmm7
  3066. cmp r10d, 13
  3067. movdqa xmm7, OWORD PTR [r15+192]
  3068. jl L_AES_GCM_decrypt_aesni_aesenc_128_ghash_avx_done
  3069. aesenc xmm8, xmm7
  3070. aesenc xmm9, xmm7
  3071. aesenc xmm10, xmm7
  3072. aesenc xmm11, xmm7
  3073. aesenc xmm12, xmm7
  3074. aesenc xmm13, xmm7
  3075. aesenc xmm14, xmm7
  3076. aesenc xmm15, xmm7
  3077. movdqa xmm7, OWORD PTR [r15+208]
  3078. aesenc xmm8, xmm7
  3079. aesenc xmm9, xmm7
  3080. aesenc xmm10, xmm7
  3081. aesenc xmm11, xmm7
  3082. aesenc xmm12, xmm7
  3083. aesenc xmm13, xmm7
  3084. aesenc xmm14, xmm7
  3085. aesenc xmm15, xmm7
  3086. movdqa xmm7, OWORD PTR [r15+224]
  3087. L_AES_GCM_decrypt_aesni_aesenc_128_ghash_avx_done:
  3088. aesenclast xmm8, xmm7
  3089. aesenclast xmm9, xmm7
  3090. movdqu xmm0, [rcx]
  3091. movdqu xmm1, [rcx+16]
  3092. pxor xmm8, xmm0
  3093. pxor xmm9, xmm1
  3094. movdqu [rdx], xmm8
  3095. movdqu [rdx+16], xmm9
  3096. aesenclast xmm10, xmm7
  3097. aesenclast xmm11, xmm7
  3098. movdqu xmm0, [rcx+32]
  3099. movdqu xmm1, [rcx+48]
  3100. pxor xmm10, xmm0
  3101. pxor xmm11, xmm1
  3102. movdqu [rdx+32], xmm10
  3103. movdqu [rdx+48], xmm11
  3104. aesenclast xmm12, xmm7
  3105. aesenclast xmm13, xmm7
  3106. movdqu xmm0, [rcx+64]
  3107. movdqu xmm1, [rcx+80]
  3108. pxor xmm12, xmm0
  3109. pxor xmm13, xmm1
  3110. movdqu [rdx+64], xmm12
  3111. movdqu [rdx+80], xmm13
  3112. aesenclast xmm14, xmm7
  3113. aesenclast xmm15, xmm7
  3114. movdqu xmm0, [rcx+96]
  3115. movdqu xmm1, [rcx+112]
  3116. pxor xmm14, xmm0
  3117. pxor xmm15, xmm1
  3118. movdqu [rdx+96], xmm14
  3119. movdqu [rdx+112], xmm15
  3120. add ebx, 128
  3121. cmp ebx, r13d
  3122. jl L_AES_GCM_decrypt_aesni_ghash_128
  3123. movdqa xmm6, xmm2
  3124. movdqu xmm5, [rsp]
  3125. L_AES_GCM_decrypt_aesni_done_128:
  3126. mov edx, r9d
  3127. cmp ebx, edx
  3128. jge L_AES_GCM_decrypt_aesni_done_dec
  3129. mov r13d, r9d
  3130. and r13d, 4294967280
  3131. cmp ebx, r13d
  3132. jge L_AES_GCM_decrypt_aesni_last_block_done
  3133. L_AES_GCM_decrypt_aesni_last_block_start:
  3134. lea rcx, QWORD PTR [rdi+rbx]
  3135. lea rdx, QWORD PTR [rsi+rbx]
  3136. movdqu xmm1, [rcx]
  3137. movdqa xmm0, xmm5
  3138. pshufb xmm1, OWORD PTR L_aes_gcm_bswap_mask
  3139. pxor xmm1, xmm6
  3140. movdqu xmm8, [rsp+128]
  3141. movdqa xmm9, xmm8
  3142. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  3143. paddd xmm9, OWORD PTR L_aes_gcm_one
  3144. pxor xmm8, [r15]
  3145. movdqu [rsp+128], xmm9
  3146. movdqa xmm10, xmm1
  3147. pclmulqdq xmm10, xmm0, 16
  3148. aesenc xmm8, [r15+16]
  3149. aesenc xmm8, [r15+32]
  3150. movdqa xmm11, xmm1
  3151. pclmulqdq xmm11, xmm0, 1
  3152. aesenc xmm8, [r15+48]
  3153. aesenc xmm8, [r15+64]
  3154. movdqa xmm12, xmm1
  3155. pclmulqdq xmm12, xmm0, 0
  3156. aesenc xmm8, [r15+80]
  3157. movdqa xmm1, xmm1
  3158. pclmulqdq xmm1, xmm0, 17
  3159. aesenc xmm8, [r15+96]
  3160. pxor xmm10, xmm11
  3161. movdqa xmm2, xmm10
  3162. psrldq xmm10, 8
  3163. pslldq xmm2, 8
  3164. aesenc xmm8, [r15+112]
  3165. movdqa xmm3, xmm1
  3166. pxor xmm2, xmm12
  3167. pxor xmm3, xmm10
  3168. movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
  3169. movdqa xmm11, xmm2
  3170. pclmulqdq xmm11, xmm0, 16
  3171. aesenc xmm8, [r15+128]
  3172. pshufd xmm10, xmm2, 78
  3173. pxor xmm10, xmm11
  3174. movdqa xmm11, xmm10
  3175. pclmulqdq xmm11, xmm0, 16
  3176. aesenc xmm8, [r15+144]
  3177. pshufd xmm6, xmm10, 78
  3178. pxor xmm6, xmm11
  3179. pxor xmm6, xmm3
  3180. cmp r10d, 11
  3181. movdqa xmm9, OWORD PTR [r15+160]
  3182. jl L_AES_GCM_decrypt_aesni_aesenc_gfmul_last
  3183. aesenc xmm8, xmm9
  3184. aesenc xmm8, [r15+176]
  3185. cmp r10d, 13
  3186. movdqa xmm9, OWORD PTR [r15+192]
  3187. jl L_AES_GCM_decrypt_aesni_aesenc_gfmul_last
  3188. aesenc xmm8, xmm9
  3189. aesenc xmm8, [r15+208]
  3190. movdqa xmm9, OWORD PTR [r15+224]
  3191. L_AES_GCM_decrypt_aesni_aesenc_gfmul_last:
  3192. aesenclast xmm8, xmm9
  3193. movdqu xmm9, [rcx]
  3194. pxor xmm8, xmm9
  3195. movdqu [rdx], xmm8
  3196. add ebx, 16
  3197. cmp ebx, r13d
  3198. jl L_AES_GCM_decrypt_aesni_last_block_start
  3199. L_AES_GCM_decrypt_aesni_last_block_done:
  3200. mov ecx, r9d
  3201. mov edx, ecx
  3202. and ecx, 15
  3203. jz L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_done
  3204. movdqu xmm4, [rsp+128]
  3205. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
  3206. pxor xmm4, [r15]
  3207. aesenc xmm4, [r15+16]
  3208. aesenc xmm4, [r15+32]
  3209. aesenc xmm4, [r15+48]
  3210. aesenc xmm4, [r15+64]
  3211. aesenc xmm4, [r15+80]
  3212. aesenc xmm4, [r15+96]
  3213. aesenc xmm4, [r15+112]
  3214. aesenc xmm4, [r15+128]
  3215. aesenc xmm4, [r15+144]
  3216. cmp r10d, 11
  3217. movdqa xmm9, OWORD PTR [r15+160]
  3218. jl L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_aesenc_avx_last
  3219. aesenc xmm4, xmm9
  3220. aesenc xmm4, [r15+176]
  3221. cmp r10d, 13
  3222. movdqa xmm9, OWORD PTR [r15+192]
  3223. jl L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_aesenc_avx_last
  3224. aesenc xmm4, xmm9
  3225. aesenc xmm4, [r15+208]
  3226. movdqa xmm9, OWORD PTR [r15+224]
  3227. L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_aesenc_avx_last:
  3228. aesenclast xmm4, xmm9
  3229. sub rsp, 32
  3230. xor ecx, ecx
  3231. movdqu [rsp], xmm4
  3232. pxor xmm0, xmm0
  3233. movdqu [rsp+16], xmm0
  3234. L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_loop:
  3235. movzx r13d, BYTE PTR [rdi+rbx]
  3236. mov BYTE PTR [rsp+rcx+16], r13b
  3237. xor r13b, BYTE PTR [rsp+rcx]
  3238. mov BYTE PTR [rsi+rbx], r13b
  3239. inc ebx
  3240. inc ecx
  3241. cmp ebx, edx
  3242. jl L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_loop
  3243. movdqu xmm4, [rsp+16]
  3244. add rsp, 32
  3245. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  3246. pxor xmm6, xmm4
  3247. pshufd xmm9, xmm5, 78
  3248. pshufd xmm10, xmm6, 78
  3249. movdqa xmm11, xmm6
  3250. movdqa xmm8, xmm6
  3251. pclmulqdq xmm11, xmm5, 17
  3252. pclmulqdq xmm8, xmm5, 0
  3253. pxor xmm9, xmm5
  3254. pxor xmm10, xmm6
  3255. pclmulqdq xmm9, xmm10, 0
  3256. pxor xmm9, xmm8
  3257. pxor xmm9, xmm11
  3258. movdqa xmm10, xmm9
  3259. movdqa xmm6, xmm11
  3260. pslldq xmm10, 8
  3261. psrldq xmm9, 8
  3262. pxor xmm8, xmm10
  3263. pxor xmm6, xmm9
  3264. movdqa xmm12, xmm8
  3265. movdqa xmm13, xmm8
  3266. movdqa xmm14, xmm8
  3267. pslld xmm12, 31
  3268. pslld xmm13, 30
  3269. pslld xmm14, 25
  3270. pxor xmm12, xmm13
  3271. pxor xmm12, xmm14
  3272. movdqa xmm13, xmm12
  3273. psrldq xmm13, 4
  3274. pslldq xmm12, 12
  3275. pxor xmm8, xmm12
  3276. movdqa xmm14, xmm8
  3277. movdqa xmm10, xmm8
  3278. movdqa xmm9, xmm8
  3279. psrld xmm14, 1
  3280. psrld xmm10, 2
  3281. psrld xmm9, 7
  3282. pxor xmm14, xmm10
  3283. pxor xmm14, xmm9
  3284. pxor xmm14, xmm13
  3285. pxor xmm14, xmm8
  3286. pxor xmm6, xmm14
  3287. L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_done:
  3288. L_AES_GCM_decrypt_aesni_done_dec:
  3289. mov edx, r9d
  3290. mov ecx, r11d
  3291. shl rdx, 3
  3292. shl rcx, 3
  3293. pinsrq xmm0, rdx, 0
  3294. pinsrq xmm0, rcx, 1
  3295. pxor xmm6, xmm0
  3296. pshufd xmm9, xmm5, 78
  3297. pshufd xmm10, xmm6, 78
  3298. movdqa xmm11, xmm6
  3299. movdqa xmm8, xmm6
  3300. pclmulqdq xmm11, xmm5, 17
  3301. pclmulqdq xmm8, xmm5, 0
  3302. pxor xmm9, xmm5
  3303. pxor xmm10, xmm6
  3304. pclmulqdq xmm9, xmm10, 0
  3305. pxor xmm9, xmm8
  3306. pxor xmm9, xmm11
  3307. movdqa xmm10, xmm9
  3308. movdqa xmm6, xmm11
  3309. pslldq xmm10, 8
  3310. psrldq xmm9, 8
  3311. pxor xmm8, xmm10
  3312. pxor xmm6, xmm9
  3313. movdqa xmm12, xmm8
  3314. movdqa xmm13, xmm8
  3315. movdqa xmm14, xmm8
  3316. pslld xmm12, 31
  3317. pslld xmm13, 30
  3318. pslld xmm14, 25
  3319. pxor xmm12, xmm13
  3320. pxor xmm12, xmm14
  3321. movdqa xmm13, xmm12
  3322. psrldq xmm13, 4
  3323. pslldq xmm12, 12
  3324. pxor xmm8, xmm12
  3325. movdqa xmm14, xmm8
  3326. movdqa xmm10, xmm8
  3327. movdqa xmm9, xmm8
  3328. psrld xmm14, 1
  3329. psrld xmm10, 2
  3330. psrld xmm9, 7
  3331. pxor xmm14, xmm10
  3332. pxor xmm14, xmm9
  3333. pxor xmm14, xmm13
  3334. pxor xmm14, xmm8
  3335. pxor xmm6, xmm14
  3336. pshufb xmm6, OWORD PTR L_aes_gcm_bswap_mask
  3337. movdqu xmm0, [rsp+144]
  3338. pxor xmm0, xmm6
  3339. cmp r14d, 16
  3340. je L_AES_GCM_decrypt_aesni_cmp_tag_16
  3341. sub rsp, 16
  3342. xor rcx, rcx
  3343. xor rbx, rbx
  3344. movdqu [rsp], xmm0
  3345. L_AES_GCM_decrypt_aesni_cmp_tag_loop:
  3346. movzx r13d, BYTE PTR [rsp+rcx]
  3347. xor r13b, BYTE PTR [r8+rcx]
  3348. or bl, r13b
  3349. inc ecx
  3350. cmp ecx, r14d
  3351. jne L_AES_GCM_decrypt_aesni_cmp_tag_loop
  3352. cmp rbx, 0
  3353. sete bl
  3354. add rsp, 16
  3355. xor rcx, rcx
  3356. jmp L_AES_GCM_decrypt_aesni_cmp_tag_done
  3357. L_AES_GCM_decrypt_aesni_cmp_tag_16:
  3358. movdqu xmm1, [r8]
  3359. pcmpeqb xmm0, xmm1
  3360. pmovmskb rdx, xmm0
  3361. ; %%edx == 0xFFFF then return 1 else => return 0
  3362. xor ebx, ebx
  3363. cmp edx, 65535
  3364. sete bl
  3365. L_AES_GCM_decrypt_aesni_cmp_tag_done:
  3366. mov DWORD PTR [rbp], ebx
  3367. movdqu xmm6, [rsp+168]
  3368. movdqu xmm7, [rsp+184]
  3369. movdqu xmm8, [rsp+200]
  3370. movdqu xmm9, [rsp+216]
  3371. movdqu xmm10, [rsp+232]
  3372. movdqu xmm11, [rsp+248]
  3373. movdqu xmm12, [rsp+264]
  3374. movdqu xmm13, [rsp+280]
  3375. movdqu xmm14, [rsp+296]
  3376. movdqu xmm15, [rsp+312]
  3377. add rsp, 328
  3378. pop rbp
  3379. pop r15
  3380. pop r14
  3381. pop rbx
  3382. pop r12
  3383. pop rsi
  3384. pop rdi
  3385. pop r13
  3386. ret
  3387. AES_GCM_decrypt_aesni ENDP
  3388. _text ENDS
  3389. _text SEGMENT READONLY PARA
  3390. AES_GCM_init_aesni PROC
  3391. push rdi
  3392. push rsi
  3393. push r12
  3394. push r13
  3395. push r14
  3396. mov rdi, rcx
  3397. mov rsi, rdx
  3398. mov r10, r8
  3399. mov r11d, r9d
  3400. mov rax, QWORD PTR [rsp+80]
  3401. mov r8, QWORD PTR [rsp+88]
  3402. mov r9, QWORD PTR [rsp+96]
  3403. sub rsp, 80
  3404. movdqu [rsp+16], xmm6
  3405. movdqu [rsp+32], xmm7
  3406. movdqu [rsp+48], xmm8
  3407. movdqu [rsp+64], xmm15
  3408. pxor xmm4, xmm4
  3409. mov edx, r11d
  3410. cmp edx, 12
  3411. jne L_AES_GCM_init_aesni_iv_not_12
  3412. ; # Calculate values when IV is 12 bytes
  3413. ; Set counter based on IV
  3414. mov ecx, 16777216
  3415. pinsrq xmm4, QWORD PTR [r10], 0
  3416. pinsrd xmm4, DWORD PTR [r10+8], 2
  3417. pinsrd xmm4, ecx, 3
  3418. ; H = Encrypt X(=0) and T = Encrypt counter
  3419. movdqa xmm1, xmm4
  3420. movdqa xmm5, OWORD PTR [rdi]
  3421. pxor xmm1, xmm5
  3422. movdqa xmm6, OWORD PTR [rdi+16]
  3423. aesenc xmm5, xmm6
  3424. aesenc xmm1, xmm6
  3425. movdqa xmm6, OWORD PTR [rdi+32]
  3426. aesenc xmm5, xmm6
  3427. aesenc xmm1, xmm6
  3428. movdqa xmm6, OWORD PTR [rdi+48]
  3429. aesenc xmm5, xmm6
  3430. aesenc xmm1, xmm6
  3431. movdqa xmm6, OWORD PTR [rdi+64]
  3432. aesenc xmm5, xmm6
  3433. aesenc xmm1, xmm6
  3434. movdqa xmm6, OWORD PTR [rdi+80]
  3435. aesenc xmm5, xmm6
  3436. aesenc xmm1, xmm6
  3437. movdqa xmm6, OWORD PTR [rdi+96]
  3438. aesenc xmm5, xmm6
  3439. aesenc xmm1, xmm6
  3440. movdqa xmm6, OWORD PTR [rdi+112]
  3441. aesenc xmm5, xmm6
  3442. aesenc xmm1, xmm6
  3443. movdqa xmm6, OWORD PTR [rdi+128]
  3444. aesenc xmm5, xmm6
  3445. aesenc xmm1, xmm6
  3446. movdqa xmm6, OWORD PTR [rdi+144]
  3447. aesenc xmm5, xmm6
  3448. aesenc xmm1, xmm6
  3449. cmp esi, 11
  3450. movdqa xmm6, OWORD PTR [rdi+160]
  3451. jl L_AES_GCM_init_aesni_calc_iv_12_last
  3452. aesenc xmm5, xmm6
  3453. aesenc xmm1, xmm6
  3454. movdqa xmm6, OWORD PTR [rdi+176]
  3455. aesenc xmm5, xmm6
  3456. aesenc xmm1, xmm6
  3457. cmp esi, 13
  3458. movdqa xmm6, OWORD PTR [rdi+192]
  3459. jl L_AES_GCM_init_aesni_calc_iv_12_last
  3460. aesenc xmm5, xmm6
  3461. aesenc xmm1, xmm6
  3462. movdqa xmm6, OWORD PTR [rdi+208]
  3463. aesenc xmm5, xmm6
  3464. aesenc xmm1, xmm6
  3465. movdqa xmm6, OWORD PTR [rdi+224]
  3466. L_AES_GCM_init_aesni_calc_iv_12_last:
  3467. aesenclast xmm5, xmm6
  3468. aesenclast xmm1, xmm6
  3469. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  3470. movdqu xmm15, xmm1
  3471. jmp L_AES_GCM_init_aesni_iv_done
  3472. L_AES_GCM_init_aesni_iv_not_12:
  3473. ; Calculate values when IV is not 12 bytes
  3474. ; H = Encrypt X(=0)
  3475. movdqa xmm5, OWORD PTR [rdi]
  3476. aesenc xmm5, [rdi+16]
  3477. aesenc xmm5, [rdi+32]
  3478. aesenc xmm5, [rdi+48]
  3479. aesenc xmm5, [rdi+64]
  3480. aesenc xmm5, [rdi+80]
  3481. aesenc xmm5, [rdi+96]
  3482. aesenc xmm5, [rdi+112]
  3483. aesenc xmm5, [rdi+128]
  3484. aesenc xmm5, [rdi+144]
  3485. cmp esi, 11
  3486. movdqa xmm8, OWORD PTR [rdi+160]
  3487. jl L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last
  3488. aesenc xmm5, xmm8
  3489. aesenc xmm5, [rdi+176]
  3490. cmp esi, 13
  3491. movdqa xmm8, OWORD PTR [rdi+192]
  3492. jl L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last
  3493. aesenc xmm5, xmm8
  3494. aesenc xmm5, [rdi+208]
  3495. movdqa xmm8, OWORD PTR [rdi+224]
  3496. L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last:
  3497. aesenclast xmm5, xmm8
  3498. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  3499. ; Calc counter
  3500. ; Initialization vector
  3501. cmp edx, 0
  3502. mov rcx, 0
  3503. je L_AES_GCM_init_aesni_calc_iv_done
  3504. cmp edx, 16
  3505. jl L_AES_GCM_init_aesni_calc_iv_lt16
  3506. and edx, 4294967280
  3507. L_AES_GCM_init_aesni_calc_iv_16_loop:
  3508. movdqu xmm7, [r10+rcx]
  3509. pshufb xmm7, OWORD PTR L_aes_gcm_bswap_mask
  3510. pxor xmm4, xmm7
  3511. pshufd xmm1, xmm4, 78
  3512. pshufd xmm2, xmm5, 78
  3513. movdqa xmm3, xmm5
  3514. movdqa xmm0, xmm5
  3515. pclmulqdq xmm3, xmm4, 17
  3516. pclmulqdq xmm0, xmm4, 0
  3517. pxor xmm1, xmm4
  3518. pxor xmm2, xmm5
  3519. pclmulqdq xmm1, xmm2, 0
  3520. pxor xmm1, xmm0
  3521. pxor xmm1, xmm3
  3522. movdqa xmm2, xmm1
  3523. movdqa xmm6, xmm0
  3524. movdqa xmm4, xmm3
  3525. pslldq xmm2, 8
  3526. psrldq xmm1, 8
  3527. pxor xmm6, xmm2
  3528. pxor xmm4, xmm1
  3529. movdqa xmm0, xmm6
  3530. movdqa xmm1, xmm4
  3531. psrld xmm0, 31
  3532. psrld xmm1, 31
  3533. pslld xmm6, 1
  3534. pslld xmm4, 1
  3535. movdqa xmm2, xmm0
  3536. pslldq xmm0, 4
  3537. psrldq xmm2, 12
  3538. pslldq xmm1, 4
  3539. por xmm4, xmm2
  3540. por xmm6, xmm0
  3541. por xmm4, xmm1
  3542. movdqa xmm0, xmm6
  3543. movdqa xmm1, xmm6
  3544. movdqa xmm2, xmm6
  3545. pslld xmm0, 31
  3546. pslld xmm1, 30
  3547. pslld xmm2, 25
  3548. pxor xmm0, xmm1
  3549. pxor xmm0, xmm2
  3550. movdqa xmm1, xmm0
  3551. psrldq xmm1, 4
  3552. pslldq xmm0, 12
  3553. pxor xmm6, xmm0
  3554. movdqa xmm2, xmm6
  3555. movdqa xmm3, xmm6
  3556. movdqa xmm0, xmm6
  3557. psrld xmm2, 1
  3558. psrld xmm3, 2
  3559. psrld xmm0, 7
  3560. pxor xmm2, xmm3
  3561. pxor xmm2, xmm0
  3562. pxor xmm2, xmm1
  3563. pxor xmm2, xmm6
  3564. pxor xmm4, xmm2
  3565. add ecx, 16
  3566. cmp ecx, edx
  3567. jl L_AES_GCM_init_aesni_calc_iv_16_loop
  3568. mov edx, r11d
  3569. cmp ecx, edx
  3570. je L_AES_GCM_init_aesni_calc_iv_done
  3571. L_AES_GCM_init_aesni_calc_iv_lt16:
  3572. sub rsp, 16
  3573. pxor xmm7, xmm7
  3574. xor r13d, r13d
  3575. movdqu [rsp], xmm7
  3576. L_AES_GCM_init_aesni_calc_iv_loop:
  3577. movzx r12d, BYTE PTR [r10+rcx]
  3578. mov BYTE PTR [rsp+r13], r12b
  3579. inc ecx
  3580. inc r13d
  3581. cmp ecx, edx
  3582. jl L_AES_GCM_init_aesni_calc_iv_loop
  3583. movdqu xmm7, [rsp]
  3584. add rsp, 16
  3585. pshufb xmm7, OWORD PTR L_aes_gcm_bswap_mask
  3586. pxor xmm4, xmm7
  3587. pshufd xmm1, xmm4, 78
  3588. pshufd xmm2, xmm5, 78
  3589. movdqa xmm3, xmm5
  3590. movdqa xmm0, xmm5
  3591. pclmulqdq xmm3, xmm4, 17
  3592. pclmulqdq xmm0, xmm4, 0
  3593. pxor xmm1, xmm4
  3594. pxor xmm2, xmm5
  3595. pclmulqdq xmm1, xmm2, 0
  3596. pxor xmm1, xmm0
  3597. pxor xmm1, xmm3
  3598. movdqa xmm2, xmm1
  3599. movdqa xmm6, xmm0
  3600. movdqa xmm4, xmm3
  3601. pslldq xmm2, 8
  3602. psrldq xmm1, 8
  3603. pxor xmm6, xmm2
  3604. pxor xmm4, xmm1
  3605. movdqa xmm0, xmm6
  3606. movdqa xmm1, xmm4
  3607. psrld xmm0, 31
  3608. psrld xmm1, 31
  3609. pslld xmm6, 1
  3610. pslld xmm4, 1
  3611. movdqa xmm2, xmm0
  3612. pslldq xmm0, 4
  3613. psrldq xmm2, 12
  3614. pslldq xmm1, 4
  3615. por xmm4, xmm2
  3616. por xmm6, xmm0
  3617. por xmm4, xmm1
  3618. movdqa xmm0, xmm6
  3619. movdqa xmm1, xmm6
  3620. movdqa xmm2, xmm6
  3621. pslld xmm0, 31
  3622. pslld xmm1, 30
  3623. pslld xmm2, 25
  3624. pxor xmm0, xmm1
  3625. pxor xmm0, xmm2
  3626. movdqa xmm1, xmm0
  3627. psrldq xmm1, 4
  3628. pslldq xmm0, 12
  3629. pxor xmm6, xmm0
  3630. movdqa xmm2, xmm6
  3631. movdqa xmm3, xmm6
  3632. movdqa xmm0, xmm6
  3633. psrld xmm2, 1
  3634. psrld xmm3, 2
  3635. psrld xmm0, 7
  3636. pxor xmm2, xmm3
  3637. pxor xmm2, xmm0
  3638. pxor xmm2, xmm1
  3639. pxor xmm2, xmm6
  3640. pxor xmm4, xmm2
  3641. L_AES_GCM_init_aesni_calc_iv_done:
  3642. ; T = Encrypt counter
  3643. pxor xmm0, xmm0
  3644. shl edx, 3
  3645. pinsrq xmm0, rdx, 0
  3646. pxor xmm4, xmm0
  3647. pshufd xmm1, xmm4, 78
  3648. pshufd xmm2, xmm5, 78
  3649. movdqa xmm3, xmm5
  3650. movdqa xmm0, xmm5
  3651. pclmulqdq xmm3, xmm4, 17
  3652. pclmulqdq xmm0, xmm4, 0
  3653. pxor xmm1, xmm4
  3654. pxor xmm2, xmm5
  3655. pclmulqdq xmm1, xmm2, 0
  3656. pxor xmm1, xmm0
  3657. pxor xmm1, xmm3
  3658. movdqa xmm2, xmm1
  3659. movdqa xmm6, xmm0
  3660. movdqa xmm4, xmm3
  3661. pslldq xmm2, 8
  3662. psrldq xmm1, 8
  3663. pxor xmm6, xmm2
  3664. pxor xmm4, xmm1
  3665. movdqa xmm0, xmm6
  3666. movdqa xmm1, xmm4
  3667. psrld xmm0, 31
  3668. psrld xmm1, 31
  3669. pslld xmm6, 1
  3670. pslld xmm4, 1
  3671. movdqa xmm2, xmm0
  3672. pslldq xmm0, 4
  3673. psrldq xmm2, 12
  3674. pslldq xmm1, 4
  3675. por xmm4, xmm2
  3676. por xmm6, xmm0
  3677. por xmm4, xmm1
  3678. movdqa xmm0, xmm6
  3679. movdqa xmm1, xmm6
  3680. movdqa xmm2, xmm6
  3681. pslld xmm0, 31
  3682. pslld xmm1, 30
  3683. pslld xmm2, 25
  3684. pxor xmm0, xmm1
  3685. pxor xmm0, xmm2
  3686. movdqa xmm1, xmm0
  3687. psrldq xmm1, 4
  3688. pslldq xmm0, 12
  3689. pxor xmm6, xmm0
  3690. movdqa xmm2, xmm6
  3691. movdqa xmm3, xmm6
  3692. movdqa xmm0, xmm6
  3693. psrld xmm2, 1
  3694. psrld xmm3, 2
  3695. psrld xmm0, 7
  3696. pxor xmm2, xmm3
  3697. pxor xmm2, xmm0
  3698. pxor xmm2, xmm1
  3699. pxor xmm2, xmm6
  3700. pxor xmm4, xmm2
  3701. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  3702. ; Encrypt counter
  3703. movdqa xmm7, OWORD PTR [rdi]
  3704. pxor xmm7, xmm4
  3705. aesenc xmm7, [rdi+16]
  3706. aesenc xmm7, [rdi+32]
  3707. aesenc xmm7, [rdi+48]
  3708. aesenc xmm7, [rdi+64]
  3709. aesenc xmm7, [rdi+80]
  3710. aesenc xmm7, [rdi+96]
  3711. aesenc xmm7, [rdi+112]
  3712. aesenc xmm7, [rdi+128]
  3713. aesenc xmm7, [rdi+144]
  3714. cmp esi, 11
  3715. movdqa xmm8, OWORD PTR [rdi+160]
  3716. jl L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last
  3717. aesenc xmm7, xmm8
  3718. aesenc xmm7, [rdi+176]
  3719. cmp esi, 13
  3720. movdqa xmm8, OWORD PTR [rdi+192]
  3721. jl L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last
  3722. aesenc xmm7, xmm8
  3723. aesenc xmm7, [rdi+208]
  3724. movdqa xmm8, OWORD PTR [rdi+224]
  3725. L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last:
  3726. aesenclast xmm7, xmm8
  3727. movdqu xmm15, xmm7
  3728. L_AES_GCM_init_aesni_iv_done:
  3729. movdqa OWORD PTR [r9], xmm15
  3730. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
  3731. paddd xmm4, OWORD PTR L_aes_gcm_one
  3732. movdqa OWORD PTR [rax], xmm5
  3733. movdqa OWORD PTR [r8], xmm4
  3734. movdqu xmm6, [rsp+16]
  3735. movdqu xmm7, [rsp+32]
  3736. movdqu xmm8, [rsp+48]
  3737. movdqu xmm15, [rsp+64]
  3738. add rsp, 80
  3739. pop r14
  3740. pop r13
  3741. pop r12
  3742. pop rsi
  3743. pop rdi
  3744. ret
  3745. AES_GCM_init_aesni ENDP
  3746. _text ENDS
  3747. _text SEGMENT READONLY PARA
  3748. AES_GCM_aad_update_aesni PROC
  3749. mov rax, rcx
  3750. sub rsp, 32
  3751. movdqu [rsp], xmm6
  3752. movdqu [rsp+16], xmm7
  3753. movdqa xmm5, OWORD PTR [r8]
  3754. movdqa xmm6, OWORD PTR [r9]
  3755. xor ecx, ecx
  3756. L_AES_GCM_aad_update_aesni_16_loop:
  3757. movdqu xmm7, [rax+rcx]
  3758. pshufb xmm7, OWORD PTR L_aes_gcm_bswap_mask
  3759. pxor xmm5, xmm7
  3760. pshufd xmm1, xmm5, 78
  3761. pshufd xmm2, xmm6, 78
  3762. movdqa xmm3, xmm6
  3763. movdqa xmm0, xmm6
  3764. pclmulqdq xmm3, xmm5, 17
  3765. pclmulqdq xmm0, xmm5, 0
  3766. pxor xmm1, xmm5
  3767. pxor xmm2, xmm6
  3768. pclmulqdq xmm1, xmm2, 0
  3769. pxor xmm1, xmm0
  3770. pxor xmm1, xmm3
  3771. movdqa xmm2, xmm1
  3772. movdqa xmm4, xmm0
  3773. movdqa xmm5, xmm3
  3774. pslldq xmm2, 8
  3775. psrldq xmm1, 8
  3776. pxor xmm4, xmm2
  3777. pxor xmm5, xmm1
  3778. movdqa xmm0, xmm4
  3779. movdqa xmm1, xmm5
  3780. psrld xmm0, 31
  3781. psrld xmm1, 31
  3782. pslld xmm4, 1
  3783. pslld xmm5, 1
  3784. movdqa xmm2, xmm0
  3785. pslldq xmm0, 4
  3786. psrldq xmm2, 12
  3787. pslldq xmm1, 4
  3788. por xmm5, xmm2
  3789. por xmm4, xmm0
  3790. por xmm5, xmm1
  3791. movdqa xmm0, xmm4
  3792. movdqa xmm1, xmm4
  3793. movdqa xmm2, xmm4
  3794. pslld xmm0, 31
  3795. pslld xmm1, 30
  3796. pslld xmm2, 25
  3797. pxor xmm0, xmm1
  3798. pxor xmm0, xmm2
  3799. movdqa xmm1, xmm0
  3800. psrldq xmm1, 4
  3801. pslldq xmm0, 12
  3802. pxor xmm4, xmm0
  3803. movdqa xmm2, xmm4
  3804. movdqa xmm3, xmm4
  3805. movdqa xmm0, xmm4
  3806. psrld xmm2, 1
  3807. psrld xmm3, 2
  3808. psrld xmm0, 7
  3809. pxor xmm2, xmm3
  3810. pxor xmm2, xmm0
  3811. pxor xmm2, xmm1
  3812. pxor xmm2, xmm4
  3813. pxor xmm5, xmm2
  3814. add ecx, 16
  3815. cmp ecx, edx
  3816. jl L_AES_GCM_aad_update_aesni_16_loop
  3817. movdqa OWORD PTR [r8], xmm5
  3818. movdqu xmm6, [rsp]
  3819. movdqu xmm7, [rsp+16]
  3820. add rsp, 32
  3821. ret
  3822. AES_GCM_aad_update_aesni ENDP
  3823. _text ENDS
  3824. _text SEGMENT READONLY PARA
  3825. AES_GCM_encrypt_block_aesni PROC
  3826. mov r10, r8
  3827. mov r11, r9
  3828. mov rax, QWORD PTR [rsp+40]
  3829. movdqu xmm0, [rax]
  3830. movdqa xmm1, xmm0
  3831. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_epi64
  3832. paddd xmm1, OWORD PTR L_aes_gcm_one
  3833. pxor xmm0, [rcx]
  3834. movdqu [rax], xmm1
  3835. aesenc xmm0, [rcx+16]
  3836. aesenc xmm0, [rcx+32]
  3837. aesenc xmm0, [rcx+48]
  3838. aesenc xmm0, [rcx+64]
  3839. aesenc xmm0, [rcx+80]
  3840. aesenc xmm0, [rcx+96]
  3841. aesenc xmm0, [rcx+112]
  3842. aesenc xmm0, [rcx+128]
  3843. aesenc xmm0, [rcx+144]
  3844. cmp edx, 11
  3845. movdqa xmm1, OWORD PTR [rcx+160]
  3846. jl L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last
  3847. aesenc xmm0, xmm1
  3848. aesenc xmm0, [rcx+176]
  3849. cmp edx, 13
  3850. movdqa xmm1, OWORD PTR [rcx+192]
  3851. jl L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last
  3852. aesenc xmm0, xmm1
  3853. aesenc xmm0, [rcx+208]
  3854. movdqa xmm1, OWORD PTR [rcx+224]
  3855. L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last:
  3856. aesenclast xmm0, xmm1
  3857. movdqu xmm1, [r11]
  3858. pxor xmm0, xmm1
  3859. movdqu [r10], xmm0
  3860. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  3861. ret
  3862. AES_GCM_encrypt_block_aesni ENDP
  3863. _text ENDS
  3864. _text SEGMENT READONLY PARA
  3865. AES_GCM_ghash_block_aesni PROC
  3866. sub rsp, 32
  3867. movdqu [rsp], xmm6
  3868. movdqu [rsp+16], xmm7
  3869. movdqa xmm4, OWORD PTR [rdx]
  3870. movdqa xmm5, OWORD PTR [r8]
  3871. movdqu xmm7, [rcx]
  3872. pshufb xmm7, OWORD PTR L_aes_gcm_bswap_mask
  3873. pxor xmm4, xmm7
  3874. pshufd xmm1, xmm4, 78
  3875. pshufd xmm2, xmm5, 78
  3876. movdqa xmm3, xmm5
  3877. movdqa xmm0, xmm5
  3878. pclmulqdq xmm3, xmm4, 17
  3879. pclmulqdq xmm0, xmm4, 0
  3880. pxor xmm1, xmm4
  3881. pxor xmm2, xmm5
  3882. pclmulqdq xmm1, xmm2, 0
  3883. pxor xmm1, xmm0
  3884. pxor xmm1, xmm3
  3885. movdqa xmm2, xmm1
  3886. movdqa xmm6, xmm0
  3887. movdqa xmm4, xmm3
  3888. pslldq xmm2, 8
  3889. psrldq xmm1, 8
  3890. pxor xmm6, xmm2
  3891. pxor xmm4, xmm1
  3892. movdqa xmm0, xmm6
  3893. movdqa xmm1, xmm4
  3894. psrld xmm0, 31
  3895. psrld xmm1, 31
  3896. pslld xmm6, 1
  3897. pslld xmm4, 1
  3898. movdqa xmm2, xmm0
  3899. pslldq xmm0, 4
  3900. psrldq xmm2, 12
  3901. pslldq xmm1, 4
  3902. por xmm4, xmm2
  3903. por xmm6, xmm0
  3904. por xmm4, xmm1
  3905. movdqa xmm0, xmm6
  3906. movdqa xmm1, xmm6
  3907. movdqa xmm2, xmm6
  3908. pslld xmm0, 31
  3909. pslld xmm1, 30
  3910. pslld xmm2, 25
  3911. pxor xmm0, xmm1
  3912. pxor xmm0, xmm2
  3913. movdqa xmm1, xmm0
  3914. psrldq xmm1, 4
  3915. pslldq xmm0, 12
  3916. pxor xmm6, xmm0
  3917. movdqa xmm2, xmm6
  3918. movdqa xmm3, xmm6
  3919. movdqa xmm0, xmm6
  3920. psrld xmm2, 1
  3921. psrld xmm3, 2
  3922. psrld xmm0, 7
  3923. pxor xmm2, xmm3
  3924. pxor xmm2, xmm0
  3925. pxor xmm2, xmm1
  3926. pxor xmm2, xmm6
  3927. pxor xmm4, xmm2
  3928. movdqa OWORD PTR [rdx], xmm4
  3929. movdqu xmm6, [rsp]
  3930. movdqu xmm7, [rsp+16]
  3931. add rsp, 32
  3932. ret
  3933. AES_GCM_ghash_block_aesni ENDP
  3934. _text ENDS
  3935. _text SEGMENT READONLY PARA
  3936. AES_GCM_encrypt_update_aesni PROC
  3937. push r13
  3938. push r12
  3939. push r14
  3940. push r15
  3941. push rdi
  3942. mov rax, rcx
  3943. mov r10, r8
  3944. mov r8d, edx
  3945. mov r11, r9
  3946. mov r9d, DWORD PTR [rsp+80]
  3947. mov r12, QWORD PTR [rsp+88]
  3948. mov r14, QWORD PTR [rsp+96]
  3949. mov r15, QWORD PTR [rsp+104]
  3950. sub rsp, 320
  3951. movdqu [rsp+160], xmm6
  3952. movdqu [rsp+176], xmm7
  3953. movdqu [rsp+192], xmm8
  3954. movdqu [rsp+208], xmm9
  3955. movdqu [rsp+224], xmm10
  3956. movdqu [rsp+240], xmm11
  3957. movdqu [rsp+256], xmm12
  3958. movdqu [rsp+272], xmm13
  3959. movdqu [rsp+288], xmm14
  3960. movdqu [rsp+304], xmm15
  3961. movdqa xmm6, OWORD PTR [r12]
  3962. movdqa xmm5, OWORD PTR [r14]
  3963. movdqa xmm9, xmm5
  3964. movdqa xmm8, xmm5
  3965. psrlq xmm9, 63
  3966. psllq xmm8, 1
  3967. pslldq xmm9, 8
  3968. por xmm8, xmm9
  3969. pshufd xmm5, xmm5, 255
  3970. psrad xmm5, 31
  3971. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  3972. pxor xmm5, xmm8
  3973. xor rdi, rdi
  3974. cmp r9d, 128
  3975. mov r13d, r9d
  3976. jl L_AES_GCM_encrypt_update_aesni_done_128
  3977. and r13d, 4294967168
  3978. movdqa xmm2, xmm6
  3979. ; H ^ 1
  3980. movdqu [rsp], xmm5
  3981. ; H ^ 2
  3982. pshufd xmm9, xmm5, 78
  3983. pshufd xmm10, xmm5, 78
  3984. movdqa xmm11, xmm5
  3985. movdqa xmm8, xmm5
  3986. pclmulqdq xmm11, xmm5, 17
  3987. pclmulqdq xmm8, xmm5, 0
  3988. pxor xmm9, xmm5
  3989. pxor xmm10, xmm5
  3990. pclmulqdq xmm9, xmm10, 0
  3991. pxor xmm9, xmm8
  3992. pxor xmm9, xmm11
  3993. movdqa xmm10, xmm9
  3994. movdqa xmm0, xmm11
  3995. pslldq xmm10, 8
  3996. psrldq xmm9, 8
  3997. pxor xmm8, xmm10
  3998. pxor xmm0, xmm9
  3999. movdqa xmm12, xmm8
  4000. movdqa xmm13, xmm8
  4001. movdqa xmm14, xmm8
  4002. pslld xmm12, 31
  4003. pslld xmm13, 30
  4004. pslld xmm14, 25
  4005. pxor xmm12, xmm13
  4006. pxor xmm12, xmm14
  4007. movdqa xmm13, xmm12
  4008. psrldq xmm13, 4
  4009. pslldq xmm12, 12
  4010. pxor xmm8, xmm12
  4011. movdqa xmm14, xmm8
  4012. movdqa xmm10, xmm8
  4013. movdqa xmm9, xmm8
  4014. psrld xmm14, 1
  4015. psrld xmm10, 2
  4016. psrld xmm9, 7
  4017. pxor xmm14, xmm10
  4018. pxor xmm14, xmm9
  4019. pxor xmm14, xmm13
  4020. pxor xmm14, xmm8
  4021. pxor xmm0, xmm14
  4022. movdqu [rsp+16], xmm0
  4023. ; H ^ 3
  4024. pshufd xmm9, xmm5, 78
  4025. pshufd xmm10, xmm0, 78
  4026. movdqa xmm11, xmm0
  4027. movdqa xmm8, xmm0
  4028. pclmulqdq xmm11, xmm5, 17
  4029. pclmulqdq xmm8, xmm5, 0
  4030. pxor xmm9, xmm5
  4031. pxor xmm10, xmm0
  4032. pclmulqdq xmm9, xmm10, 0
  4033. pxor xmm9, xmm8
  4034. pxor xmm9, xmm11
  4035. movdqa xmm10, xmm9
  4036. movdqa xmm1, xmm11
  4037. pslldq xmm10, 8
  4038. psrldq xmm9, 8
  4039. pxor xmm8, xmm10
  4040. pxor xmm1, xmm9
  4041. movdqa xmm12, xmm8
  4042. movdqa xmm13, xmm8
  4043. movdqa xmm14, xmm8
  4044. pslld xmm12, 31
  4045. pslld xmm13, 30
  4046. pslld xmm14, 25
  4047. pxor xmm12, xmm13
  4048. pxor xmm12, xmm14
  4049. movdqa xmm13, xmm12
  4050. psrldq xmm13, 4
  4051. pslldq xmm12, 12
  4052. pxor xmm8, xmm12
  4053. movdqa xmm14, xmm8
  4054. movdqa xmm10, xmm8
  4055. movdqa xmm9, xmm8
  4056. psrld xmm14, 1
  4057. psrld xmm10, 2
  4058. psrld xmm9, 7
  4059. pxor xmm14, xmm10
  4060. pxor xmm14, xmm9
  4061. pxor xmm14, xmm13
  4062. pxor xmm14, xmm8
  4063. pxor xmm1, xmm14
  4064. movdqu [rsp+32], xmm1
  4065. ; H ^ 4
  4066. pshufd xmm9, xmm0, 78
  4067. pshufd xmm10, xmm0, 78
  4068. movdqa xmm11, xmm0
  4069. movdqa xmm8, xmm0
  4070. pclmulqdq xmm11, xmm0, 17
  4071. pclmulqdq xmm8, xmm0, 0
  4072. pxor xmm9, xmm0
  4073. pxor xmm10, xmm0
  4074. pclmulqdq xmm9, xmm10, 0
  4075. pxor xmm9, xmm8
  4076. pxor xmm9, xmm11
  4077. movdqa xmm10, xmm9
  4078. movdqa xmm3, xmm11
  4079. pslldq xmm10, 8
  4080. psrldq xmm9, 8
  4081. pxor xmm8, xmm10
  4082. pxor xmm3, xmm9
  4083. movdqa xmm12, xmm8
  4084. movdqa xmm13, xmm8
  4085. movdqa xmm14, xmm8
  4086. pslld xmm12, 31
  4087. pslld xmm13, 30
  4088. pslld xmm14, 25
  4089. pxor xmm12, xmm13
  4090. pxor xmm12, xmm14
  4091. movdqa xmm13, xmm12
  4092. psrldq xmm13, 4
  4093. pslldq xmm12, 12
  4094. pxor xmm8, xmm12
  4095. movdqa xmm14, xmm8
  4096. movdqa xmm10, xmm8
  4097. movdqa xmm9, xmm8
  4098. psrld xmm14, 1
  4099. psrld xmm10, 2
  4100. psrld xmm9, 7
  4101. pxor xmm14, xmm10
  4102. pxor xmm14, xmm9
  4103. pxor xmm14, xmm13
  4104. pxor xmm14, xmm8
  4105. pxor xmm3, xmm14
  4106. movdqu [rsp+48], xmm3
  4107. ; H ^ 5
  4108. pshufd xmm9, xmm0, 78
  4109. pshufd xmm10, xmm1, 78
  4110. movdqa xmm11, xmm1
  4111. movdqa xmm8, xmm1
  4112. pclmulqdq xmm11, xmm0, 17
  4113. pclmulqdq xmm8, xmm0, 0
  4114. pxor xmm9, xmm0
  4115. pxor xmm10, xmm1
  4116. pclmulqdq xmm9, xmm10, 0
  4117. pxor xmm9, xmm8
  4118. pxor xmm9, xmm11
  4119. movdqa xmm10, xmm9
  4120. movdqa xmm7, xmm11
  4121. pslldq xmm10, 8
  4122. psrldq xmm9, 8
  4123. pxor xmm8, xmm10
  4124. pxor xmm7, xmm9
  4125. movdqa xmm12, xmm8
  4126. movdqa xmm13, xmm8
  4127. movdqa xmm14, xmm8
  4128. pslld xmm12, 31
  4129. pslld xmm13, 30
  4130. pslld xmm14, 25
  4131. pxor xmm12, xmm13
  4132. pxor xmm12, xmm14
  4133. movdqa xmm13, xmm12
  4134. psrldq xmm13, 4
  4135. pslldq xmm12, 12
  4136. pxor xmm8, xmm12
  4137. movdqa xmm14, xmm8
  4138. movdqa xmm10, xmm8
  4139. movdqa xmm9, xmm8
  4140. psrld xmm14, 1
  4141. psrld xmm10, 2
  4142. psrld xmm9, 7
  4143. pxor xmm14, xmm10
  4144. pxor xmm14, xmm9
  4145. pxor xmm14, xmm13
  4146. pxor xmm14, xmm8
  4147. pxor xmm7, xmm14
  4148. movdqu [rsp+64], xmm7
  4149. ; H ^ 6
  4150. pshufd xmm9, xmm1, 78
  4151. pshufd xmm10, xmm1, 78
  4152. movdqa xmm11, xmm1
  4153. movdqa xmm8, xmm1
  4154. pclmulqdq xmm11, xmm1, 17
  4155. pclmulqdq xmm8, xmm1, 0
  4156. pxor xmm9, xmm1
  4157. pxor xmm10, xmm1
  4158. pclmulqdq xmm9, xmm10, 0
  4159. pxor xmm9, xmm8
  4160. pxor xmm9, xmm11
  4161. movdqa xmm10, xmm9
  4162. movdqa xmm7, xmm11
  4163. pslldq xmm10, 8
  4164. psrldq xmm9, 8
  4165. pxor xmm8, xmm10
  4166. pxor xmm7, xmm9
  4167. movdqa xmm12, xmm8
  4168. movdqa xmm13, xmm8
  4169. movdqa xmm14, xmm8
  4170. pslld xmm12, 31
  4171. pslld xmm13, 30
  4172. pslld xmm14, 25
  4173. pxor xmm12, xmm13
  4174. pxor xmm12, xmm14
  4175. movdqa xmm13, xmm12
  4176. psrldq xmm13, 4
  4177. pslldq xmm12, 12
  4178. pxor xmm8, xmm12
  4179. movdqa xmm14, xmm8
  4180. movdqa xmm10, xmm8
  4181. movdqa xmm9, xmm8
  4182. psrld xmm14, 1
  4183. psrld xmm10, 2
  4184. psrld xmm9, 7
  4185. pxor xmm14, xmm10
  4186. pxor xmm14, xmm9
  4187. pxor xmm14, xmm13
  4188. pxor xmm14, xmm8
  4189. pxor xmm7, xmm14
  4190. movdqu [rsp+80], xmm7
  4191. ; H ^ 7
  4192. pshufd xmm9, xmm1, 78
  4193. pshufd xmm10, xmm3, 78
  4194. movdqa xmm11, xmm3
  4195. movdqa xmm8, xmm3
  4196. pclmulqdq xmm11, xmm1, 17
  4197. pclmulqdq xmm8, xmm1, 0
  4198. pxor xmm9, xmm1
  4199. pxor xmm10, xmm3
  4200. pclmulqdq xmm9, xmm10, 0
  4201. pxor xmm9, xmm8
  4202. pxor xmm9, xmm11
  4203. movdqa xmm10, xmm9
  4204. movdqa xmm7, xmm11
  4205. pslldq xmm10, 8
  4206. psrldq xmm9, 8
  4207. pxor xmm8, xmm10
  4208. pxor xmm7, xmm9
  4209. movdqa xmm12, xmm8
  4210. movdqa xmm13, xmm8
  4211. movdqa xmm14, xmm8
  4212. pslld xmm12, 31
  4213. pslld xmm13, 30
  4214. pslld xmm14, 25
  4215. pxor xmm12, xmm13
  4216. pxor xmm12, xmm14
  4217. movdqa xmm13, xmm12
  4218. psrldq xmm13, 4
  4219. pslldq xmm12, 12
  4220. pxor xmm8, xmm12
  4221. movdqa xmm14, xmm8
  4222. movdqa xmm10, xmm8
  4223. movdqa xmm9, xmm8
  4224. psrld xmm14, 1
  4225. psrld xmm10, 2
  4226. psrld xmm9, 7
  4227. pxor xmm14, xmm10
  4228. pxor xmm14, xmm9
  4229. pxor xmm14, xmm13
  4230. pxor xmm14, xmm8
  4231. pxor xmm7, xmm14
  4232. movdqu [rsp+96], xmm7
  4233. ; H ^ 8
  4234. pshufd xmm9, xmm3, 78
  4235. pshufd xmm10, xmm3, 78
  4236. movdqa xmm11, xmm3
  4237. movdqa xmm8, xmm3
  4238. pclmulqdq xmm11, xmm3, 17
  4239. pclmulqdq xmm8, xmm3, 0
  4240. pxor xmm9, xmm3
  4241. pxor xmm10, xmm3
  4242. pclmulqdq xmm9, xmm10, 0
  4243. pxor xmm9, xmm8
  4244. pxor xmm9, xmm11
  4245. movdqa xmm10, xmm9
  4246. movdqa xmm7, xmm11
  4247. pslldq xmm10, 8
  4248. psrldq xmm9, 8
  4249. pxor xmm8, xmm10
  4250. pxor xmm7, xmm9
  4251. movdqa xmm12, xmm8
  4252. movdqa xmm13, xmm8
  4253. movdqa xmm14, xmm8
  4254. pslld xmm12, 31
  4255. pslld xmm13, 30
  4256. pslld xmm14, 25
  4257. pxor xmm12, xmm13
  4258. pxor xmm12, xmm14
  4259. movdqa xmm13, xmm12
  4260. psrldq xmm13, 4
  4261. pslldq xmm12, 12
  4262. pxor xmm8, xmm12
  4263. movdqa xmm14, xmm8
  4264. movdqa xmm10, xmm8
  4265. movdqa xmm9, xmm8
  4266. psrld xmm14, 1
  4267. psrld xmm10, 2
  4268. psrld xmm9, 7
  4269. pxor xmm14, xmm10
  4270. pxor xmm14, xmm9
  4271. pxor xmm14, xmm13
  4272. pxor xmm14, xmm8
  4273. pxor xmm7, xmm14
  4274. movdqu [rsp+112], xmm7
  4275. ; First 128 bytes of input
  4276. movdqu xmm8, [r15]
  4277. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  4278. movdqa xmm0, xmm8
  4279. pshufb xmm8, xmm1
  4280. movdqa xmm9, xmm0
  4281. paddd xmm9, OWORD PTR L_aes_gcm_one
  4282. pshufb xmm9, xmm1
  4283. movdqa xmm10, xmm0
  4284. paddd xmm10, OWORD PTR L_aes_gcm_two
  4285. pshufb xmm10, xmm1
  4286. movdqa xmm11, xmm0
  4287. paddd xmm11, OWORD PTR L_aes_gcm_three
  4288. pshufb xmm11, xmm1
  4289. movdqa xmm12, xmm0
  4290. paddd xmm12, OWORD PTR L_aes_gcm_four
  4291. pshufb xmm12, xmm1
  4292. movdqa xmm13, xmm0
  4293. paddd xmm13, OWORD PTR L_aes_gcm_five
  4294. pshufb xmm13, xmm1
  4295. movdqa xmm14, xmm0
  4296. paddd xmm14, OWORD PTR L_aes_gcm_six
  4297. pshufb xmm14, xmm1
  4298. movdqa xmm15, xmm0
  4299. paddd xmm15, OWORD PTR L_aes_gcm_seven
  4300. pshufb xmm15, xmm1
  4301. paddd xmm0, OWORD PTR L_aes_gcm_eight
  4302. movdqa xmm7, OWORD PTR [rax]
  4303. movdqu [r15], xmm0
  4304. pxor xmm8, xmm7
  4305. pxor xmm9, xmm7
  4306. pxor xmm10, xmm7
  4307. pxor xmm11, xmm7
  4308. pxor xmm12, xmm7
  4309. pxor xmm13, xmm7
  4310. pxor xmm14, xmm7
  4311. pxor xmm15, xmm7
  4312. movdqa xmm7, OWORD PTR [rax+16]
  4313. aesenc xmm8, xmm7
  4314. aesenc xmm9, xmm7
  4315. aesenc xmm10, xmm7
  4316. aesenc xmm11, xmm7
  4317. aesenc xmm12, xmm7
  4318. aesenc xmm13, xmm7
  4319. aesenc xmm14, xmm7
  4320. aesenc xmm15, xmm7
  4321. movdqa xmm7, OWORD PTR [rax+32]
  4322. aesenc xmm8, xmm7
  4323. aesenc xmm9, xmm7
  4324. aesenc xmm10, xmm7
  4325. aesenc xmm11, xmm7
  4326. aesenc xmm12, xmm7
  4327. aesenc xmm13, xmm7
  4328. aesenc xmm14, xmm7
  4329. aesenc xmm15, xmm7
  4330. movdqa xmm7, OWORD PTR [rax+48]
  4331. aesenc xmm8, xmm7
  4332. aesenc xmm9, xmm7
  4333. aesenc xmm10, xmm7
  4334. aesenc xmm11, xmm7
  4335. aesenc xmm12, xmm7
  4336. aesenc xmm13, xmm7
  4337. aesenc xmm14, xmm7
  4338. aesenc xmm15, xmm7
  4339. movdqa xmm7, OWORD PTR [rax+64]
  4340. aesenc xmm8, xmm7
  4341. aesenc xmm9, xmm7
  4342. aesenc xmm10, xmm7
  4343. aesenc xmm11, xmm7
  4344. aesenc xmm12, xmm7
  4345. aesenc xmm13, xmm7
  4346. aesenc xmm14, xmm7
  4347. aesenc xmm15, xmm7
  4348. movdqa xmm7, OWORD PTR [rax+80]
  4349. aesenc xmm8, xmm7
  4350. aesenc xmm9, xmm7
  4351. aesenc xmm10, xmm7
  4352. aesenc xmm11, xmm7
  4353. aesenc xmm12, xmm7
  4354. aesenc xmm13, xmm7
  4355. aesenc xmm14, xmm7
  4356. aesenc xmm15, xmm7
  4357. movdqa xmm7, OWORD PTR [rax+96]
  4358. aesenc xmm8, xmm7
  4359. aesenc xmm9, xmm7
  4360. aesenc xmm10, xmm7
  4361. aesenc xmm11, xmm7
  4362. aesenc xmm12, xmm7
  4363. aesenc xmm13, xmm7
  4364. aesenc xmm14, xmm7
  4365. aesenc xmm15, xmm7
  4366. movdqa xmm7, OWORD PTR [rax+112]
  4367. aesenc xmm8, xmm7
  4368. aesenc xmm9, xmm7
  4369. aesenc xmm10, xmm7
  4370. aesenc xmm11, xmm7
  4371. aesenc xmm12, xmm7
  4372. aesenc xmm13, xmm7
  4373. aesenc xmm14, xmm7
  4374. aesenc xmm15, xmm7
  4375. movdqa xmm7, OWORD PTR [rax+128]
  4376. aesenc xmm8, xmm7
  4377. aesenc xmm9, xmm7
  4378. aesenc xmm10, xmm7
  4379. aesenc xmm11, xmm7
  4380. aesenc xmm12, xmm7
  4381. aesenc xmm13, xmm7
  4382. aesenc xmm14, xmm7
  4383. aesenc xmm15, xmm7
  4384. movdqa xmm7, OWORD PTR [rax+144]
  4385. aesenc xmm8, xmm7
  4386. aesenc xmm9, xmm7
  4387. aesenc xmm10, xmm7
  4388. aesenc xmm11, xmm7
  4389. aesenc xmm12, xmm7
  4390. aesenc xmm13, xmm7
  4391. aesenc xmm14, xmm7
  4392. aesenc xmm15, xmm7
  4393. cmp r8d, 11
  4394. movdqa xmm7, OWORD PTR [rax+160]
  4395. jl L_AES_GCM_encrypt_update_aesni_enc_done
  4396. aesenc xmm8, xmm7
  4397. aesenc xmm9, xmm7
  4398. aesenc xmm10, xmm7
  4399. aesenc xmm11, xmm7
  4400. aesenc xmm12, xmm7
  4401. aesenc xmm13, xmm7
  4402. aesenc xmm14, xmm7
  4403. aesenc xmm15, xmm7
  4404. movdqa xmm7, OWORD PTR [rax+176]
  4405. aesenc xmm8, xmm7
  4406. aesenc xmm9, xmm7
  4407. aesenc xmm10, xmm7
  4408. aesenc xmm11, xmm7
  4409. aesenc xmm12, xmm7
  4410. aesenc xmm13, xmm7
  4411. aesenc xmm14, xmm7
  4412. aesenc xmm15, xmm7
  4413. cmp r8d, 13
  4414. movdqa xmm7, OWORD PTR [rax+192]
  4415. jl L_AES_GCM_encrypt_update_aesni_enc_done
  4416. aesenc xmm8, xmm7
  4417. aesenc xmm9, xmm7
  4418. aesenc xmm10, xmm7
  4419. aesenc xmm11, xmm7
  4420. aesenc xmm12, xmm7
  4421. aesenc xmm13, xmm7
  4422. aesenc xmm14, xmm7
  4423. aesenc xmm15, xmm7
  4424. movdqa xmm7, OWORD PTR [rax+208]
  4425. aesenc xmm8, xmm7
  4426. aesenc xmm9, xmm7
  4427. aesenc xmm10, xmm7
  4428. aesenc xmm11, xmm7
  4429. aesenc xmm12, xmm7
  4430. aesenc xmm13, xmm7
  4431. aesenc xmm14, xmm7
  4432. aesenc xmm15, xmm7
  4433. movdqa xmm7, OWORD PTR [rax+224]
  4434. L_AES_GCM_encrypt_update_aesni_enc_done:
  4435. aesenclast xmm8, xmm7
  4436. aesenclast xmm9, xmm7
  4437. movdqu xmm0, [r11]
  4438. movdqu xmm1, [r11+16]
  4439. pxor xmm8, xmm0
  4440. pxor xmm9, xmm1
  4441. movdqu [r10], xmm8
  4442. movdqu [r10+16], xmm9
  4443. aesenclast xmm10, xmm7
  4444. aesenclast xmm11, xmm7
  4445. movdqu xmm0, [r11+32]
  4446. movdqu xmm1, [r11+48]
  4447. pxor xmm10, xmm0
  4448. pxor xmm11, xmm1
  4449. movdqu [r10+32], xmm10
  4450. movdqu [r10+48], xmm11
  4451. aesenclast xmm12, xmm7
  4452. aesenclast xmm13, xmm7
  4453. movdqu xmm0, [r11+64]
  4454. movdqu xmm1, [r11+80]
  4455. pxor xmm12, xmm0
  4456. pxor xmm13, xmm1
  4457. movdqu [r10+64], xmm12
  4458. movdqu [r10+80], xmm13
  4459. aesenclast xmm14, xmm7
  4460. aesenclast xmm15, xmm7
  4461. movdqu xmm0, [r11+96]
  4462. movdqu xmm1, [r11+112]
  4463. pxor xmm14, xmm0
  4464. pxor xmm15, xmm1
  4465. movdqu [r10+96], xmm14
  4466. movdqu [r10+112], xmm15
  4467. cmp r13d, 128
  4468. mov edi, 128
  4469. jle L_AES_GCM_encrypt_update_aesni_end_128
  4470. ; More 128 bytes of input
  4471. L_AES_GCM_encrypt_update_aesni_ghash_128:
  4472. lea rcx, QWORD PTR [r11+rdi]
  4473. lea rdx, QWORD PTR [r10+rdi]
  4474. movdqu xmm8, [r15]
  4475. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  4476. movdqa xmm0, xmm8
  4477. pshufb xmm8, xmm1
  4478. movdqa xmm9, xmm0
  4479. paddd xmm9, OWORD PTR L_aes_gcm_one
  4480. pshufb xmm9, xmm1
  4481. movdqa xmm10, xmm0
  4482. paddd xmm10, OWORD PTR L_aes_gcm_two
  4483. pshufb xmm10, xmm1
  4484. movdqa xmm11, xmm0
  4485. paddd xmm11, OWORD PTR L_aes_gcm_three
  4486. pshufb xmm11, xmm1
  4487. movdqa xmm12, xmm0
  4488. paddd xmm12, OWORD PTR L_aes_gcm_four
  4489. pshufb xmm12, xmm1
  4490. movdqa xmm13, xmm0
  4491. paddd xmm13, OWORD PTR L_aes_gcm_five
  4492. pshufb xmm13, xmm1
  4493. movdqa xmm14, xmm0
  4494. paddd xmm14, OWORD PTR L_aes_gcm_six
  4495. pshufb xmm14, xmm1
  4496. movdqa xmm15, xmm0
  4497. paddd xmm15, OWORD PTR L_aes_gcm_seven
  4498. pshufb xmm15, xmm1
  4499. paddd xmm0, OWORD PTR L_aes_gcm_eight
  4500. movdqa xmm7, OWORD PTR [rax]
  4501. movdqu [r15], xmm0
  4502. pxor xmm8, xmm7
  4503. pxor xmm9, xmm7
  4504. pxor xmm10, xmm7
  4505. pxor xmm11, xmm7
  4506. pxor xmm12, xmm7
  4507. pxor xmm13, xmm7
  4508. pxor xmm14, xmm7
  4509. pxor xmm15, xmm7
  4510. movdqu xmm7, [rsp+112]
  4511. movdqu xmm0, [rdx+-128]
  4512. aesenc xmm8, [rax+16]
  4513. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4514. pxor xmm0, xmm2
  4515. pshufd xmm1, xmm7, 78
  4516. pshufd xmm5, xmm0, 78
  4517. pxor xmm1, xmm7
  4518. pxor xmm5, xmm0
  4519. movdqa xmm3, xmm0
  4520. pclmulqdq xmm3, xmm7, 17
  4521. aesenc xmm9, [rax+16]
  4522. aesenc xmm10, [rax+16]
  4523. movdqa xmm2, xmm0
  4524. pclmulqdq xmm2, xmm7, 0
  4525. aesenc xmm11, [rax+16]
  4526. aesenc xmm12, [rax+16]
  4527. pclmulqdq xmm1, xmm5, 0
  4528. aesenc xmm13, [rax+16]
  4529. aesenc xmm14, [rax+16]
  4530. aesenc xmm15, [rax+16]
  4531. pxor xmm1, xmm2
  4532. pxor xmm1, xmm3
  4533. movdqu xmm7, [rsp+96]
  4534. movdqu xmm0, [rdx+-112]
  4535. pshufd xmm4, xmm7, 78
  4536. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4537. aesenc xmm8, [rax+32]
  4538. pxor xmm4, xmm7
  4539. pshufd xmm5, xmm0, 78
  4540. pxor xmm5, xmm0
  4541. movdqa xmm6, xmm0
  4542. pclmulqdq xmm6, xmm7, 17
  4543. aesenc xmm9, [rax+32]
  4544. aesenc xmm10, [rax+32]
  4545. pclmulqdq xmm7, xmm0, 0
  4546. aesenc xmm11, [rax+32]
  4547. aesenc xmm12, [rax+32]
  4548. pclmulqdq xmm4, xmm5, 0
  4549. aesenc xmm13, [rax+32]
  4550. aesenc xmm14, [rax+32]
  4551. aesenc xmm15, [rax+32]
  4552. pxor xmm1, xmm7
  4553. pxor xmm2, xmm7
  4554. pxor xmm1, xmm6
  4555. pxor xmm3, xmm6
  4556. pxor xmm1, xmm4
  4557. movdqu xmm7, [rsp+80]
  4558. movdqu xmm0, [rdx+-96]
  4559. pshufd xmm4, xmm7, 78
  4560. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4561. aesenc xmm8, [rax+48]
  4562. pxor xmm4, xmm7
  4563. pshufd xmm5, xmm0, 78
  4564. pxor xmm5, xmm0
  4565. movdqa xmm6, xmm0
  4566. pclmulqdq xmm6, xmm7, 17
  4567. aesenc xmm9, [rax+48]
  4568. aesenc xmm10, [rax+48]
  4569. pclmulqdq xmm7, xmm0, 0
  4570. aesenc xmm11, [rax+48]
  4571. aesenc xmm12, [rax+48]
  4572. pclmulqdq xmm4, xmm5, 0
  4573. aesenc xmm13, [rax+48]
  4574. aesenc xmm14, [rax+48]
  4575. aesenc xmm15, [rax+48]
  4576. pxor xmm1, xmm7
  4577. pxor xmm2, xmm7
  4578. pxor xmm1, xmm6
  4579. pxor xmm3, xmm6
  4580. pxor xmm1, xmm4
  4581. movdqu xmm7, [rsp+64]
  4582. movdqu xmm0, [rdx+-80]
  4583. pshufd xmm4, xmm7, 78
  4584. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4585. aesenc xmm8, [rax+64]
  4586. pxor xmm4, xmm7
  4587. pshufd xmm5, xmm0, 78
  4588. pxor xmm5, xmm0
  4589. movdqa xmm6, xmm0
  4590. pclmulqdq xmm6, xmm7, 17
  4591. aesenc xmm9, [rax+64]
  4592. aesenc xmm10, [rax+64]
  4593. pclmulqdq xmm7, xmm0, 0
  4594. aesenc xmm11, [rax+64]
  4595. aesenc xmm12, [rax+64]
  4596. pclmulqdq xmm4, xmm5, 0
  4597. aesenc xmm13, [rax+64]
  4598. aesenc xmm14, [rax+64]
  4599. aesenc xmm15, [rax+64]
  4600. pxor xmm1, xmm7
  4601. pxor xmm2, xmm7
  4602. pxor xmm1, xmm6
  4603. pxor xmm3, xmm6
  4604. pxor xmm1, xmm4
  4605. movdqu xmm7, [rsp+48]
  4606. movdqu xmm0, [rdx+-64]
  4607. pshufd xmm4, xmm7, 78
  4608. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4609. aesenc xmm8, [rax+80]
  4610. pxor xmm4, xmm7
  4611. pshufd xmm5, xmm0, 78
  4612. pxor xmm5, xmm0
  4613. movdqa xmm6, xmm0
  4614. pclmulqdq xmm6, xmm7, 17
  4615. aesenc xmm9, [rax+80]
  4616. aesenc xmm10, [rax+80]
  4617. pclmulqdq xmm7, xmm0, 0
  4618. aesenc xmm11, [rax+80]
  4619. aesenc xmm12, [rax+80]
  4620. pclmulqdq xmm4, xmm5, 0
  4621. aesenc xmm13, [rax+80]
  4622. aesenc xmm14, [rax+80]
  4623. aesenc xmm15, [rax+80]
  4624. pxor xmm1, xmm7
  4625. pxor xmm2, xmm7
  4626. pxor xmm1, xmm6
  4627. pxor xmm3, xmm6
  4628. pxor xmm1, xmm4
  4629. movdqu xmm7, [rsp+32]
  4630. movdqu xmm0, [rdx+-48]
  4631. pshufd xmm4, xmm7, 78
  4632. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4633. aesenc xmm8, [rax+96]
  4634. pxor xmm4, xmm7
  4635. pshufd xmm5, xmm0, 78
  4636. pxor xmm5, xmm0
  4637. movdqa xmm6, xmm0
  4638. pclmulqdq xmm6, xmm7, 17
  4639. aesenc xmm9, [rax+96]
  4640. aesenc xmm10, [rax+96]
  4641. pclmulqdq xmm7, xmm0, 0
  4642. aesenc xmm11, [rax+96]
  4643. aesenc xmm12, [rax+96]
  4644. pclmulqdq xmm4, xmm5, 0
  4645. aesenc xmm13, [rax+96]
  4646. aesenc xmm14, [rax+96]
  4647. aesenc xmm15, [rax+96]
  4648. pxor xmm1, xmm7
  4649. pxor xmm2, xmm7
  4650. pxor xmm1, xmm6
  4651. pxor xmm3, xmm6
  4652. pxor xmm1, xmm4
  4653. movdqu xmm7, [rsp+16]
  4654. movdqu xmm0, [rdx+-32]
  4655. pshufd xmm4, xmm7, 78
  4656. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4657. aesenc xmm8, [rax+112]
  4658. pxor xmm4, xmm7
  4659. pshufd xmm5, xmm0, 78
  4660. pxor xmm5, xmm0
  4661. movdqa xmm6, xmm0
  4662. pclmulqdq xmm6, xmm7, 17
  4663. aesenc xmm9, [rax+112]
  4664. aesenc xmm10, [rax+112]
  4665. pclmulqdq xmm7, xmm0, 0
  4666. aesenc xmm11, [rax+112]
  4667. aesenc xmm12, [rax+112]
  4668. pclmulqdq xmm4, xmm5, 0
  4669. aesenc xmm13, [rax+112]
  4670. aesenc xmm14, [rax+112]
  4671. aesenc xmm15, [rax+112]
  4672. pxor xmm1, xmm7
  4673. pxor xmm2, xmm7
  4674. pxor xmm1, xmm6
  4675. pxor xmm3, xmm6
  4676. pxor xmm1, xmm4
  4677. movdqu xmm7, [rsp]
  4678. movdqu xmm0, [rdx+-16]
  4679. pshufd xmm4, xmm7, 78
  4680. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4681. aesenc xmm8, [rax+128]
  4682. pxor xmm4, xmm7
  4683. pshufd xmm5, xmm0, 78
  4684. pxor xmm5, xmm0
  4685. movdqa xmm6, xmm0
  4686. pclmulqdq xmm6, xmm7, 17
  4687. aesenc xmm9, [rax+128]
  4688. aesenc xmm10, [rax+128]
  4689. pclmulqdq xmm7, xmm0, 0
  4690. aesenc xmm11, [rax+128]
  4691. aesenc xmm12, [rax+128]
  4692. pclmulqdq xmm4, xmm5, 0
  4693. aesenc xmm13, [rax+128]
  4694. aesenc xmm14, [rax+128]
  4695. aesenc xmm15, [rax+128]
  4696. pxor xmm1, xmm7
  4697. pxor xmm2, xmm7
  4698. pxor xmm1, xmm6
  4699. pxor xmm3, xmm6
  4700. pxor xmm1, xmm4
  4701. movdqa xmm5, xmm1
  4702. psrldq xmm1, 8
  4703. pslldq xmm5, 8
  4704. aesenc xmm8, [rax+144]
  4705. pxor xmm2, xmm5
  4706. pxor xmm3, xmm1
  4707. movdqa xmm7, xmm2
  4708. movdqa xmm4, xmm2
  4709. movdqa xmm5, xmm2
  4710. aesenc xmm9, [rax+144]
  4711. pslld xmm7, 31
  4712. pslld xmm4, 30
  4713. pslld xmm5, 25
  4714. aesenc xmm10, [rax+144]
  4715. pxor xmm7, xmm4
  4716. pxor xmm7, xmm5
  4717. aesenc xmm11, [rax+144]
  4718. movdqa xmm4, xmm7
  4719. pslldq xmm7, 12
  4720. psrldq xmm4, 4
  4721. aesenc xmm12, [rax+144]
  4722. pxor xmm2, xmm7
  4723. movdqa xmm5, xmm2
  4724. movdqa xmm1, xmm2
  4725. movdqa xmm0, xmm2
  4726. aesenc xmm13, [rax+144]
  4727. psrld xmm5, 1
  4728. psrld xmm1, 2
  4729. psrld xmm0, 7
  4730. aesenc xmm14, [rax+144]
  4731. pxor xmm5, xmm1
  4732. pxor xmm5, xmm0
  4733. aesenc xmm15, [rax+144]
  4734. pxor xmm5, xmm4
  4735. pxor xmm2, xmm5
  4736. pxor xmm2, xmm3
  4737. cmp r8d, 11
  4738. movdqa xmm7, OWORD PTR [rax+160]
  4739. jl L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done
  4740. aesenc xmm8, xmm7
  4741. aesenc xmm9, xmm7
  4742. aesenc xmm10, xmm7
  4743. aesenc xmm11, xmm7
  4744. aesenc xmm12, xmm7
  4745. aesenc xmm13, xmm7
  4746. aesenc xmm14, xmm7
  4747. aesenc xmm15, xmm7
  4748. movdqa xmm7, OWORD PTR [rax+176]
  4749. aesenc xmm8, xmm7
  4750. aesenc xmm9, xmm7
  4751. aesenc xmm10, xmm7
  4752. aesenc xmm11, xmm7
  4753. aesenc xmm12, xmm7
  4754. aesenc xmm13, xmm7
  4755. aesenc xmm14, xmm7
  4756. aesenc xmm15, xmm7
  4757. cmp r8d, 13
  4758. movdqa xmm7, OWORD PTR [rax+192]
  4759. jl L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done
  4760. aesenc xmm8, xmm7
  4761. aesenc xmm9, xmm7
  4762. aesenc xmm10, xmm7
  4763. aesenc xmm11, xmm7
  4764. aesenc xmm12, xmm7
  4765. aesenc xmm13, xmm7
  4766. aesenc xmm14, xmm7
  4767. aesenc xmm15, xmm7
  4768. movdqa xmm7, OWORD PTR [rax+208]
  4769. aesenc xmm8, xmm7
  4770. aesenc xmm9, xmm7
  4771. aesenc xmm10, xmm7
  4772. aesenc xmm11, xmm7
  4773. aesenc xmm12, xmm7
  4774. aesenc xmm13, xmm7
  4775. aesenc xmm14, xmm7
  4776. aesenc xmm15, xmm7
  4777. movdqa xmm7, OWORD PTR [rax+224]
  4778. L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done:
  4779. aesenclast xmm8, xmm7
  4780. aesenclast xmm9, xmm7
  4781. movdqu xmm0, [rcx]
  4782. movdqu xmm1, [rcx+16]
  4783. pxor xmm8, xmm0
  4784. pxor xmm9, xmm1
  4785. movdqu [rdx], xmm8
  4786. movdqu [rdx+16], xmm9
  4787. aesenclast xmm10, xmm7
  4788. aesenclast xmm11, xmm7
  4789. movdqu xmm0, [rcx+32]
  4790. movdqu xmm1, [rcx+48]
  4791. pxor xmm10, xmm0
  4792. pxor xmm11, xmm1
  4793. movdqu [rdx+32], xmm10
  4794. movdqu [rdx+48], xmm11
  4795. aesenclast xmm12, xmm7
  4796. aesenclast xmm13, xmm7
  4797. movdqu xmm0, [rcx+64]
  4798. movdqu xmm1, [rcx+80]
  4799. pxor xmm12, xmm0
  4800. pxor xmm13, xmm1
  4801. movdqu [rdx+64], xmm12
  4802. movdqu [rdx+80], xmm13
  4803. aesenclast xmm14, xmm7
  4804. aesenclast xmm15, xmm7
  4805. movdqu xmm0, [rcx+96]
  4806. movdqu xmm1, [rcx+112]
  4807. pxor xmm14, xmm0
  4808. pxor xmm15, xmm1
  4809. movdqu [rdx+96], xmm14
  4810. movdqu [rdx+112], xmm15
  4811. add edi, 128
  4812. cmp edi, r13d
  4813. jl L_AES_GCM_encrypt_update_aesni_ghash_128
  4814. L_AES_GCM_encrypt_update_aesni_end_128:
  4815. movdqa xmm4, OWORD PTR L_aes_gcm_bswap_mask
  4816. pshufb xmm8, xmm4
  4817. pshufb xmm9, xmm4
  4818. pshufb xmm10, xmm4
  4819. pshufb xmm11, xmm4
  4820. pxor xmm8, xmm2
  4821. pshufb xmm12, xmm4
  4822. pshufb xmm13, xmm4
  4823. pshufb xmm14, xmm4
  4824. pshufb xmm15, xmm4
  4825. movdqu xmm7, [rsp+112]
  4826. pshufd xmm1, xmm8, 78
  4827. pshufd xmm2, xmm7, 78
  4828. movdqa xmm3, xmm7
  4829. movdqa xmm0, xmm7
  4830. pclmulqdq xmm3, xmm8, 17
  4831. pclmulqdq xmm0, xmm8, 0
  4832. pxor xmm1, xmm8
  4833. pxor xmm2, xmm7
  4834. pclmulqdq xmm1, xmm2, 0
  4835. pxor xmm1, xmm0
  4836. pxor xmm1, xmm3
  4837. movdqa xmm2, xmm1
  4838. movdqa xmm4, xmm0
  4839. movdqa xmm6, xmm3
  4840. pslldq xmm2, 8
  4841. psrldq xmm1, 8
  4842. pxor xmm4, xmm2
  4843. pxor xmm6, xmm1
  4844. movdqu xmm7, [rsp+96]
  4845. pshufd xmm1, xmm9, 78
  4846. pshufd xmm2, xmm7, 78
  4847. movdqa xmm3, xmm7
  4848. movdqa xmm0, xmm7
  4849. pclmulqdq xmm3, xmm9, 17
  4850. pclmulqdq xmm0, xmm9, 0
  4851. pxor xmm1, xmm9
  4852. pxor xmm2, xmm7
  4853. pclmulqdq xmm1, xmm2, 0
  4854. pxor xmm1, xmm0
  4855. pxor xmm1, xmm3
  4856. movdqa xmm2, xmm1
  4857. pxor xmm4, xmm0
  4858. pxor xmm6, xmm3
  4859. pslldq xmm2, 8
  4860. psrldq xmm1, 8
  4861. pxor xmm4, xmm2
  4862. pxor xmm6, xmm1
  4863. movdqu xmm7, [rsp+80]
  4864. pshufd xmm1, xmm10, 78
  4865. pshufd xmm2, xmm7, 78
  4866. movdqa xmm3, xmm7
  4867. movdqa xmm0, xmm7
  4868. pclmulqdq xmm3, xmm10, 17
  4869. pclmulqdq xmm0, xmm10, 0
  4870. pxor xmm1, xmm10
  4871. pxor xmm2, xmm7
  4872. pclmulqdq xmm1, xmm2, 0
  4873. pxor xmm1, xmm0
  4874. pxor xmm1, xmm3
  4875. movdqa xmm2, xmm1
  4876. pxor xmm4, xmm0
  4877. pxor xmm6, xmm3
  4878. pslldq xmm2, 8
  4879. psrldq xmm1, 8
  4880. pxor xmm4, xmm2
  4881. pxor xmm6, xmm1
  4882. movdqu xmm7, [rsp+64]
  4883. pshufd xmm1, xmm11, 78
  4884. pshufd xmm2, xmm7, 78
  4885. movdqa xmm3, xmm7
  4886. movdqa xmm0, xmm7
  4887. pclmulqdq xmm3, xmm11, 17
  4888. pclmulqdq xmm0, xmm11, 0
  4889. pxor xmm1, xmm11
  4890. pxor xmm2, xmm7
  4891. pclmulqdq xmm1, xmm2, 0
  4892. pxor xmm1, xmm0
  4893. pxor xmm1, xmm3
  4894. movdqa xmm2, xmm1
  4895. pxor xmm4, xmm0
  4896. pxor xmm6, xmm3
  4897. pslldq xmm2, 8
  4898. psrldq xmm1, 8
  4899. pxor xmm4, xmm2
  4900. pxor xmm6, xmm1
  4901. movdqu xmm7, [rsp+48]
  4902. pshufd xmm1, xmm12, 78
  4903. pshufd xmm2, xmm7, 78
  4904. movdqa xmm3, xmm7
  4905. movdqa xmm0, xmm7
  4906. pclmulqdq xmm3, xmm12, 17
  4907. pclmulqdq xmm0, xmm12, 0
  4908. pxor xmm1, xmm12
  4909. pxor xmm2, xmm7
  4910. pclmulqdq xmm1, xmm2, 0
  4911. pxor xmm1, xmm0
  4912. pxor xmm1, xmm3
  4913. movdqa xmm2, xmm1
  4914. pxor xmm4, xmm0
  4915. pxor xmm6, xmm3
  4916. pslldq xmm2, 8
  4917. psrldq xmm1, 8
  4918. pxor xmm4, xmm2
  4919. pxor xmm6, xmm1
  4920. movdqu xmm7, [rsp+32]
  4921. pshufd xmm1, xmm13, 78
  4922. pshufd xmm2, xmm7, 78
  4923. movdqa xmm3, xmm7
  4924. movdqa xmm0, xmm7
  4925. pclmulqdq xmm3, xmm13, 17
  4926. pclmulqdq xmm0, xmm13, 0
  4927. pxor xmm1, xmm13
  4928. pxor xmm2, xmm7
  4929. pclmulqdq xmm1, xmm2, 0
  4930. pxor xmm1, xmm0
  4931. pxor xmm1, xmm3
  4932. movdqa xmm2, xmm1
  4933. pxor xmm4, xmm0
  4934. pxor xmm6, xmm3
  4935. pslldq xmm2, 8
  4936. psrldq xmm1, 8
  4937. pxor xmm4, xmm2
  4938. pxor xmm6, xmm1
  4939. movdqu xmm7, [rsp+16]
  4940. pshufd xmm1, xmm14, 78
  4941. pshufd xmm2, xmm7, 78
  4942. movdqa xmm3, xmm7
  4943. movdqa xmm0, xmm7
  4944. pclmulqdq xmm3, xmm14, 17
  4945. pclmulqdq xmm0, xmm14, 0
  4946. pxor xmm1, xmm14
  4947. pxor xmm2, xmm7
  4948. pclmulqdq xmm1, xmm2, 0
  4949. pxor xmm1, xmm0
  4950. pxor xmm1, xmm3
  4951. movdqa xmm2, xmm1
  4952. pxor xmm4, xmm0
  4953. pxor xmm6, xmm3
  4954. pslldq xmm2, 8
  4955. psrldq xmm1, 8
  4956. pxor xmm4, xmm2
  4957. pxor xmm6, xmm1
  4958. movdqu xmm7, [rsp]
  4959. pshufd xmm1, xmm15, 78
  4960. pshufd xmm2, xmm7, 78
  4961. movdqa xmm3, xmm7
  4962. movdqa xmm0, xmm7
  4963. pclmulqdq xmm3, xmm15, 17
  4964. pclmulqdq xmm0, xmm15, 0
  4965. pxor xmm1, xmm15
  4966. pxor xmm2, xmm7
  4967. pclmulqdq xmm1, xmm2, 0
  4968. pxor xmm1, xmm0
  4969. pxor xmm1, xmm3
  4970. movdqa xmm2, xmm1
  4971. pxor xmm4, xmm0
  4972. pxor xmm6, xmm3
  4973. pslldq xmm2, 8
  4974. psrldq xmm1, 8
  4975. pxor xmm4, xmm2
  4976. pxor xmm6, xmm1
  4977. movdqa xmm0, xmm4
  4978. movdqa xmm1, xmm4
  4979. movdqa xmm2, xmm4
  4980. pslld xmm0, 31
  4981. pslld xmm1, 30
  4982. pslld xmm2, 25
  4983. pxor xmm0, xmm1
  4984. pxor xmm0, xmm2
  4985. movdqa xmm1, xmm0
  4986. psrldq xmm1, 4
  4987. pslldq xmm0, 12
  4988. pxor xmm4, xmm0
  4989. movdqa xmm2, xmm4
  4990. movdqa xmm3, xmm4
  4991. movdqa xmm0, xmm4
  4992. psrld xmm2, 1
  4993. psrld xmm3, 2
  4994. psrld xmm0, 7
  4995. pxor xmm2, xmm3
  4996. pxor xmm2, xmm0
  4997. pxor xmm2, xmm1
  4998. pxor xmm2, xmm4
  4999. pxor xmm6, xmm2
  5000. movdqu xmm5, [rsp]
  5001. L_AES_GCM_encrypt_update_aesni_done_128:
  5002. mov edx, r9d
  5003. cmp edi, edx
  5004. jge L_AES_GCM_encrypt_update_aesni_done_enc
  5005. mov r13d, r9d
  5006. and r13d, 4294967280
  5007. cmp edi, r13d
  5008. jge L_AES_GCM_encrypt_update_aesni_last_block_done
  5009. lea rcx, QWORD PTR [r11+rdi]
  5010. lea rdx, QWORD PTR [r10+rdi]
  5011. movdqu xmm8, [r15]
  5012. movdqa xmm9, xmm8
  5013. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  5014. paddd xmm9, OWORD PTR L_aes_gcm_one
  5015. pxor xmm8, [rax]
  5016. movdqu [r15], xmm9
  5017. aesenc xmm8, [rax+16]
  5018. aesenc xmm8, [rax+32]
  5019. aesenc xmm8, [rax+48]
  5020. aesenc xmm8, [rax+64]
  5021. aesenc xmm8, [rax+80]
  5022. aesenc xmm8, [rax+96]
  5023. aesenc xmm8, [rax+112]
  5024. aesenc xmm8, [rax+128]
  5025. aesenc xmm8, [rax+144]
  5026. cmp r8d, 11
  5027. movdqa xmm9, OWORD PTR [rax+160]
  5028. jl L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last
  5029. aesenc xmm8, xmm9
  5030. aesenc xmm8, [rax+176]
  5031. cmp r8d, 13
  5032. movdqa xmm9, OWORD PTR [rax+192]
  5033. jl L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last
  5034. aesenc xmm8, xmm9
  5035. aesenc xmm8, [rax+208]
  5036. movdqa xmm9, OWORD PTR [rax+224]
  5037. L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last:
  5038. aesenclast xmm8, xmm9
  5039. movdqu xmm9, [rcx]
  5040. pxor xmm8, xmm9
  5041. movdqu [rdx], xmm8
  5042. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  5043. pxor xmm6, xmm8
  5044. add edi, 16
  5045. cmp edi, r13d
  5046. jge L_AES_GCM_encrypt_update_aesni_last_block_ghash
  5047. L_AES_GCM_encrypt_update_aesni_last_block_start:
  5048. lea rcx, QWORD PTR [r11+rdi]
  5049. lea rdx, QWORD PTR [r10+rdi]
  5050. movdqu xmm8, [r15]
  5051. movdqa xmm9, xmm8
  5052. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  5053. paddd xmm9, OWORD PTR L_aes_gcm_one
  5054. pxor xmm8, [rax]
  5055. movdqu [r15], xmm9
  5056. movdqa xmm10, xmm6
  5057. pclmulqdq xmm10, xmm5, 16
  5058. aesenc xmm8, [rax+16]
  5059. aesenc xmm8, [rax+32]
  5060. movdqa xmm11, xmm6
  5061. pclmulqdq xmm11, xmm5, 1
  5062. aesenc xmm8, [rax+48]
  5063. aesenc xmm8, [rax+64]
  5064. movdqa xmm12, xmm6
  5065. pclmulqdq xmm12, xmm5, 0
  5066. aesenc xmm8, [rax+80]
  5067. movdqa xmm1, xmm6
  5068. pclmulqdq xmm1, xmm5, 17
  5069. aesenc xmm8, [rax+96]
  5070. pxor xmm10, xmm11
  5071. movdqa xmm2, xmm10
  5072. psrldq xmm10, 8
  5073. pslldq xmm2, 8
  5074. aesenc xmm8, [rax+112]
  5075. movdqa xmm3, xmm1
  5076. pxor xmm2, xmm12
  5077. pxor xmm3, xmm10
  5078. movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
  5079. movdqa xmm11, xmm2
  5080. pclmulqdq xmm11, xmm0, 16
  5081. aesenc xmm8, [rax+128]
  5082. pshufd xmm10, xmm2, 78
  5083. pxor xmm10, xmm11
  5084. movdqa xmm11, xmm10
  5085. pclmulqdq xmm11, xmm0, 16
  5086. aesenc xmm8, [rax+144]
  5087. pshufd xmm6, xmm10, 78
  5088. pxor xmm6, xmm11
  5089. pxor xmm6, xmm3
  5090. cmp r8d, 11
  5091. movdqa xmm9, OWORD PTR [rax+160]
  5092. jl L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last
  5093. aesenc xmm8, xmm9
  5094. aesenc xmm8, [rax+176]
  5095. cmp r8d, 13
  5096. movdqa xmm9, OWORD PTR [rax+192]
  5097. jl L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last
  5098. aesenc xmm8, xmm9
  5099. aesenc xmm8, [rax+208]
  5100. movdqa xmm9, OWORD PTR [rax+224]
  5101. L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last:
  5102. aesenclast xmm8, xmm9
  5103. movdqu xmm9, [rcx]
  5104. pxor xmm8, xmm9
  5105. movdqu [rdx], xmm8
  5106. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  5107. pxor xmm6, xmm8
  5108. add edi, 16
  5109. cmp edi, r13d
  5110. jl L_AES_GCM_encrypt_update_aesni_last_block_start
  5111. L_AES_GCM_encrypt_update_aesni_last_block_ghash:
  5112. pshufd xmm9, xmm5, 78
  5113. pshufd xmm10, xmm6, 78
  5114. movdqa xmm11, xmm6
  5115. movdqa xmm8, xmm6
  5116. pclmulqdq xmm11, xmm5, 17
  5117. pclmulqdq xmm8, xmm5, 0
  5118. pxor xmm9, xmm5
  5119. pxor xmm10, xmm6
  5120. pclmulqdq xmm9, xmm10, 0
  5121. pxor xmm9, xmm8
  5122. pxor xmm9, xmm11
  5123. movdqa xmm10, xmm9
  5124. movdqa xmm6, xmm11
  5125. pslldq xmm10, 8
  5126. psrldq xmm9, 8
  5127. pxor xmm8, xmm10
  5128. pxor xmm6, xmm9
  5129. movdqa xmm12, xmm8
  5130. movdqa xmm13, xmm8
  5131. movdqa xmm14, xmm8
  5132. pslld xmm12, 31
  5133. pslld xmm13, 30
  5134. pslld xmm14, 25
  5135. pxor xmm12, xmm13
  5136. pxor xmm12, xmm14
  5137. movdqa xmm13, xmm12
  5138. psrldq xmm13, 4
  5139. pslldq xmm12, 12
  5140. pxor xmm8, xmm12
  5141. movdqa xmm14, xmm8
  5142. movdqa xmm10, xmm8
  5143. movdqa xmm9, xmm8
  5144. psrld xmm14, 1
  5145. psrld xmm10, 2
  5146. psrld xmm9, 7
  5147. pxor xmm14, xmm10
  5148. pxor xmm14, xmm9
  5149. pxor xmm14, xmm13
  5150. pxor xmm14, xmm8
  5151. pxor xmm6, xmm14
  5152. L_AES_GCM_encrypt_update_aesni_last_block_done:
  5153. L_AES_GCM_encrypt_update_aesni_done_enc:
  5154. movdqa OWORD PTR [r12], xmm6
  5155. movdqu xmm6, [rsp+160]
  5156. movdqu xmm7, [rsp+176]
  5157. movdqu xmm8, [rsp+192]
  5158. movdqu xmm9, [rsp+208]
  5159. movdqu xmm10, [rsp+224]
  5160. movdqu xmm11, [rsp+240]
  5161. movdqu xmm12, [rsp+256]
  5162. movdqu xmm13, [rsp+272]
  5163. movdqu xmm14, [rsp+288]
  5164. movdqu xmm15, [rsp+304]
  5165. add rsp, 320
  5166. pop rdi
  5167. pop r15
  5168. pop r14
  5169. pop r12
  5170. pop r13
  5171. ret
  5172. AES_GCM_encrypt_update_aesni ENDP
  5173. _text ENDS
  5174. _text SEGMENT READONLY PARA
  5175. AES_GCM_encrypt_final_aesni PROC
  5176. push r13
  5177. push r12
  5178. push r14
  5179. mov rax, rcx
  5180. mov r10d, r9d
  5181. mov r9, rdx
  5182. mov r11d, DWORD PTR [rsp+64]
  5183. mov r12, QWORD PTR [rsp+72]
  5184. mov r14, QWORD PTR [rsp+80]
  5185. sub rsp, 144
  5186. movdqu [rsp+16], xmm6
  5187. movdqu [rsp+32], xmm7
  5188. movdqu [rsp+48], xmm8
  5189. movdqu [rsp+64], xmm9
  5190. movdqu [rsp+80], xmm10
  5191. movdqu [rsp+96], xmm11
  5192. movdqu [rsp+112], xmm12
  5193. movdqu [rsp+128], xmm13
  5194. movdqa xmm4, OWORD PTR [rax]
  5195. movdqa xmm5, OWORD PTR [r12]
  5196. movdqa xmm6, OWORD PTR [r14]
  5197. movdqa xmm8, xmm5
  5198. movdqa xmm7, xmm5
  5199. psrlq xmm8, 63
  5200. psllq xmm7, 1
  5201. pslldq xmm8, 8
  5202. por xmm7, xmm8
  5203. pshufd xmm5, xmm5, 255
  5204. psrad xmm5, 31
  5205. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  5206. pxor xmm5, xmm7
  5207. mov edx, r10d
  5208. mov ecx, r11d
  5209. shl rdx, 3
  5210. shl rcx, 3
  5211. pinsrq xmm0, rdx, 0
  5212. pinsrq xmm0, rcx, 1
  5213. pxor xmm4, xmm0
  5214. pshufd xmm8, xmm5, 78
  5215. pshufd xmm9, xmm4, 78
  5216. movdqa xmm10, xmm4
  5217. movdqa xmm7, xmm4
  5218. pclmulqdq xmm10, xmm5, 17
  5219. pclmulqdq xmm7, xmm5, 0
  5220. pxor xmm8, xmm5
  5221. pxor xmm9, xmm4
  5222. pclmulqdq xmm8, xmm9, 0
  5223. pxor xmm8, xmm7
  5224. pxor xmm8, xmm10
  5225. movdqa xmm9, xmm8
  5226. movdqa xmm4, xmm10
  5227. pslldq xmm9, 8
  5228. psrldq xmm8, 8
  5229. pxor xmm7, xmm9
  5230. pxor xmm4, xmm8
  5231. movdqa xmm11, xmm7
  5232. movdqa xmm12, xmm7
  5233. movdqa xmm13, xmm7
  5234. pslld xmm11, 31
  5235. pslld xmm12, 30
  5236. pslld xmm13, 25
  5237. pxor xmm11, xmm12
  5238. pxor xmm11, xmm13
  5239. movdqa xmm12, xmm11
  5240. psrldq xmm12, 4
  5241. pslldq xmm11, 12
  5242. pxor xmm7, xmm11
  5243. movdqa xmm13, xmm7
  5244. movdqa xmm9, xmm7
  5245. movdqa xmm8, xmm7
  5246. psrld xmm13, 1
  5247. psrld xmm9, 2
  5248. psrld xmm8, 7
  5249. pxor xmm13, xmm9
  5250. pxor xmm13, xmm8
  5251. pxor xmm13, xmm12
  5252. pxor xmm13, xmm7
  5253. pxor xmm4, xmm13
  5254. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  5255. movdqu xmm0, xmm6
  5256. pxor xmm0, xmm4
  5257. cmp r8d, 16
  5258. je L_AES_GCM_encrypt_final_aesni_store_tag_16
  5259. xor rcx, rcx
  5260. movdqu [rsp], xmm0
  5261. L_AES_GCM_encrypt_final_aesni_store_tag_loop:
  5262. movzx r13d, BYTE PTR [rsp+rcx]
  5263. mov BYTE PTR [r9+rcx], r13b
  5264. inc ecx
  5265. cmp ecx, r8d
  5266. jne L_AES_GCM_encrypt_final_aesni_store_tag_loop
  5267. jmp L_AES_GCM_encrypt_final_aesni_store_tag_done
  5268. L_AES_GCM_encrypt_final_aesni_store_tag_16:
  5269. movdqu [r9], xmm0
  5270. L_AES_GCM_encrypt_final_aesni_store_tag_done:
  5271. movdqu xmm6, [rsp+16]
  5272. movdqu xmm7, [rsp+32]
  5273. movdqu xmm8, [rsp+48]
  5274. movdqu xmm9, [rsp+64]
  5275. movdqu xmm10, [rsp+80]
  5276. movdqu xmm11, [rsp+96]
  5277. movdqu xmm12, [rsp+112]
  5278. movdqu xmm13, [rsp+128]
  5279. add rsp, 144
  5280. pop r14
  5281. pop r12
  5282. pop r13
  5283. ret
  5284. AES_GCM_encrypt_final_aesni ENDP
  5285. _text ENDS
  5286. _text SEGMENT READONLY PARA
  5287. AES_GCM_decrypt_update_aesni PROC
  5288. push r13
  5289. push r12
  5290. push r14
  5291. push r15
  5292. push rdi
  5293. push rsi
  5294. mov rax, rcx
  5295. mov r10, r8
  5296. mov r8d, edx
  5297. mov r11, r9
  5298. mov r9d, DWORD PTR [rsp+88]
  5299. mov r12, QWORD PTR [rsp+96]
  5300. mov r14, QWORD PTR [rsp+104]
  5301. mov r15, QWORD PTR [rsp+112]
  5302. sub rsp, 328
  5303. movdqu [rsp+168], xmm6
  5304. movdqu [rsp+184], xmm7
  5305. movdqu [rsp+200], xmm8
  5306. movdqu [rsp+216], xmm9
  5307. movdqu [rsp+232], xmm10
  5308. movdqu [rsp+248], xmm11
  5309. movdqu [rsp+264], xmm12
  5310. movdqu [rsp+280], xmm13
  5311. movdqu [rsp+296], xmm14
  5312. movdqu [rsp+312], xmm15
  5313. movdqa xmm6, OWORD PTR [r12]
  5314. movdqa xmm5, OWORD PTR [r14]
  5315. movdqa xmm9, xmm5
  5316. movdqa xmm8, xmm5
  5317. psrlq xmm9, 63
  5318. psllq xmm8, 1
  5319. pslldq xmm9, 8
  5320. por xmm8, xmm9
  5321. pshufd xmm5, xmm5, 255
  5322. psrad xmm5, 31
  5323. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  5324. pxor xmm5, xmm8
  5325. xor edi, edi
  5326. cmp r9d, 128
  5327. mov r13d, r9d
  5328. jl L_AES_GCM_decrypt_update_aesni_done_128
  5329. and r13d, 4294967168
  5330. movdqa xmm2, xmm6
  5331. ; H ^ 1
  5332. movdqu [rsp], xmm5
  5333. ; H ^ 2
  5334. pshufd xmm9, xmm5, 78
  5335. pshufd xmm10, xmm5, 78
  5336. movdqa xmm11, xmm5
  5337. movdqa xmm8, xmm5
  5338. pclmulqdq xmm11, xmm5, 17
  5339. pclmulqdq xmm8, xmm5, 0
  5340. pxor xmm9, xmm5
  5341. pxor xmm10, xmm5
  5342. pclmulqdq xmm9, xmm10, 0
  5343. pxor xmm9, xmm8
  5344. pxor xmm9, xmm11
  5345. movdqa xmm10, xmm9
  5346. movdqa xmm0, xmm11
  5347. pslldq xmm10, 8
  5348. psrldq xmm9, 8
  5349. pxor xmm8, xmm10
  5350. pxor xmm0, xmm9
  5351. movdqa xmm12, xmm8
  5352. movdqa xmm13, xmm8
  5353. movdqa xmm14, xmm8
  5354. pslld xmm12, 31
  5355. pslld xmm13, 30
  5356. pslld xmm14, 25
  5357. pxor xmm12, xmm13
  5358. pxor xmm12, xmm14
  5359. movdqa xmm13, xmm12
  5360. psrldq xmm13, 4
  5361. pslldq xmm12, 12
  5362. pxor xmm8, xmm12
  5363. movdqa xmm14, xmm8
  5364. movdqa xmm10, xmm8
  5365. movdqa xmm9, xmm8
  5366. psrld xmm14, 1
  5367. psrld xmm10, 2
  5368. psrld xmm9, 7
  5369. pxor xmm14, xmm10
  5370. pxor xmm14, xmm9
  5371. pxor xmm14, xmm13
  5372. pxor xmm14, xmm8
  5373. pxor xmm0, xmm14
  5374. movdqu [rsp+16], xmm0
  5375. ; H ^ 3
  5376. pshufd xmm9, xmm5, 78
  5377. pshufd xmm10, xmm0, 78
  5378. movdqa xmm11, xmm0
  5379. movdqa xmm8, xmm0
  5380. pclmulqdq xmm11, xmm5, 17
  5381. pclmulqdq xmm8, xmm5, 0
  5382. pxor xmm9, xmm5
  5383. pxor xmm10, xmm0
  5384. pclmulqdq xmm9, xmm10, 0
  5385. pxor xmm9, xmm8
  5386. pxor xmm9, xmm11
  5387. movdqa xmm10, xmm9
  5388. movdqa xmm1, xmm11
  5389. pslldq xmm10, 8
  5390. psrldq xmm9, 8
  5391. pxor xmm8, xmm10
  5392. pxor xmm1, xmm9
  5393. movdqa xmm12, xmm8
  5394. movdqa xmm13, xmm8
  5395. movdqa xmm14, xmm8
  5396. pslld xmm12, 31
  5397. pslld xmm13, 30
  5398. pslld xmm14, 25
  5399. pxor xmm12, xmm13
  5400. pxor xmm12, xmm14
  5401. movdqa xmm13, xmm12
  5402. psrldq xmm13, 4
  5403. pslldq xmm12, 12
  5404. pxor xmm8, xmm12
  5405. movdqa xmm14, xmm8
  5406. movdqa xmm10, xmm8
  5407. movdqa xmm9, xmm8
  5408. psrld xmm14, 1
  5409. psrld xmm10, 2
  5410. psrld xmm9, 7
  5411. pxor xmm14, xmm10
  5412. pxor xmm14, xmm9
  5413. pxor xmm14, xmm13
  5414. pxor xmm14, xmm8
  5415. pxor xmm1, xmm14
  5416. movdqu [rsp+32], xmm1
  5417. ; H ^ 4
  5418. pshufd xmm9, xmm0, 78
  5419. pshufd xmm10, xmm0, 78
  5420. movdqa xmm11, xmm0
  5421. movdqa xmm8, xmm0
  5422. pclmulqdq xmm11, xmm0, 17
  5423. pclmulqdq xmm8, xmm0, 0
  5424. pxor xmm9, xmm0
  5425. pxor xmm10, xmm0
  5426. pclmulqdq xmm9, xmm10, 0
  5427. pxor xmm9, xmm8
  5428. pxor xmm9, xmm11
  5429. movdqa xmm10, xmm9
  5430. movdqa xmm3, xmm11
  5431. pslldq xmm10, 8
  5432. psrldq xmm9, 8
  5433. pxor xmm8, xmm10
  5434. pxor xmm3, xmm9
  5435. movdqa xmm12, xmm8
  5436. movdqa xmm13, xmm8
  5437. movdqa xmm14, xmm8
  5438. pslld xmm12, 31
  5439. pslld xmm13, 30
  5440. pslld xmm14, 25
  5441. pxor xmm12, xmm13
  5442. pxor xmm12, xmm14
  5443. movdqa xmm13, xmm12
  5444. psrldq xmm13, 4
  5445. pslldq xmm12, 12
  5446. pxor xmm8, xmm12
  5447. movdqa xmm14, xmm8
  5448. movdqa xmm10, xmm8
  5449. movdqa xmm9, xmm8
  5450. psrld xmm14, 1
  5451. psrld xmm10, 2
  5452. psrld xmm9, 7
  5453. pxor xmm14, xmm10
  5454. pxor xmm14, xmm9
  5455. pxor xmm14, xmm13
  5456. pxor xmm14, xmm8
  5457. pxor xmm3, xmm14
  5458. movdqu [rsp+48], xmm3
  5459. ; H ^ 5
  5460. pshufd xmm9, xmm0, 78
  5461. pshufd xmm10, xmm1, 78
  5462. movdqa xmm11, xmm1
  5463. movdqa xmm8, xmm1
  5464. pclmulqdq xmm11, xmm0, 17
  5465. pclmulqdq xmm8, xmm0, 0
  5466. pxor xmm9, xmm0
  5467. pxor xmm10, xmm1
  5468. pclmulqdq xmm9, xmm10, 0
  5469. pxor xmm9, xmm8
  5470. pxor xmm9, xmm11
  5471. movdqa xmm10, xmm9
  5472. movdqa xmm7, xmm11
  5473. pslldq xmm10, 8
  5474. psrldq xmm9, 8
  5475. pxor xmm8, xmm10
  5476. pxor xmm7, xmm9
  5477. movdqa xmm12, xmm8
  5478. movdqa xmm13, xmm8
  5479. movdqa xmm14, xmm8
  5480. pslld xmm12, 31
  5481. pslld xmm13, 30
  5482. pslld xmm14, 25
  5483. pxor xmm12, xmm13
  5484. pxor xmm12, xmm14
  5485. movdqa xmm13, xmm12
  5486. psrldq xmm13, 4
  5487. pslldq xmm12, 12
  5488. pxor xmm8, xmm12
  5489. movdqa xmm14, xmm8
  5490. movdqa xmm10, xmm8
  5491. movdqa xmm9, xmm8
  5492. psrld xmm14, 1
  5493. psrld xmm10, 2
  5494. psrld xmm9, 7
  5495. pxor xmm14, xmm10
  5496. pxor xmm14, xmm9
  5497. pxor xmm14, xmm13
  5498. pxor xmm14, xmm8
  5499. pxor xmm7, xmm14
  5500. movdqu [rsp+64], xmm7
  5501. ; H ^ 6
  5502. pshufd xmm9, xmm1, 78
  5503. pshufd xmm10, xmm1, 78
  5504. movdqa xmm11, xmm1
  5505. movdqa xmm8, xmm1
  5506. pclmulqdq xmm11, xmm1, 17
  5507. pclmulqdq xmm8, xmm1, 0
  5508. pxor xmm9, xmm1
  5509. pxor xmm10, xmm1
  5510. pclmulqdq xmm9, xmm10, 0
  5511. pxor xmm9, xmm8
  5512. pxor xmm9, xmm11
  5513. movdqa xmm10, xmm9
  5514. movdqa xmm7, xmm11
  5515. pslldq xmm10, 8
  5516. psrldq xmm9, 8
  5517. pxor xmm8, xmm10
  5518. pxor xmm7, xmm9
  5519. movdqa xmm12, xmm8
  5520. movdqa xmm13, xmm8
  5521. movdqa xmm14, xmm8
  5522. pslld xmm12, 31
  5523. pslld xmm13, 30
  5524. pslld xmm14, 25
  5525. pxor xmm12, xmm13
  5526. pxor xmm12, xmm14
  5527. movdqa xmm13, xmm12
  5528. psrldq xmm13, 4
  5529. pslldq xmm12, 12
  5530. pxor xmm8, xmm12
  5531. movdqa xmm14, xmm8
  5532. movdqa xmm10, xmm8
  5533. movdqa xmm9, xmm8
  5534. psrld xmm14, 1
  5535. psrld xmm10, 2
  5536. psrld xmm9, 7
  5537. pxor xmm14, xmm10
  5538. pxor xmm14, xmm9
  5539. pxor xmm14, xmm13
  5540. pxor xmm14, xmm8
  5541. pxor xmm7, xmm14
  5542. movdqu [rsp+80], xmm7
  5543. ; H ^ 7
  5544. pshufd xmm9, xmm1, 78
  5545. pshufd xmm10, xmm3, 78
  5546. movdqa xmm11, xmm3
  5547. movdqa xmm8, xmm3
  5548. pclmulqdq xmm11, xmm1, 17
  5549. pclmulqdq xmm8, xmm1, 0
  5550. pxor xmm9, xmm1
  5551. pxor xmm10, xmm3
  5552. pclmulqdq xmm9, xmm10, 0
  5553. pxor xmm9, xmm8
  5554. pxor xmm9, xmm11
  5555. movdqa xmm10, xmm9
  5556. movdqa xmm7, xmm11
  5557. pslldq xmm10, 8
  5558. psrldq xmm9, 8
  5559. pxor xmm8, xmm10
  5560. pxor xmm7, xmm9
  5561. movdqa xmm12, xmm8
  5562. movdqa xmm13, xmm8
  5563. movdqa xmm14, xmm8
  5564. pslld xmm12, 31
  5565. pslld xmm13, 30
  5566. pslld xmm14, 25
  5567. pxor xmm12, xmm13
  5568. pxor xmm12, xmm14
  5569. movdqa xmm13, xmm12
  5570. psrldq xmm13, 4
  5571. pslldq xmm12, 12
  5572. pxor xmm8, xmm12
  5573. movdqa xmm14, xmm8
  5574. movdqa xmm10, xmm8
  5575. movdqa xmm9, xmm8
  5576. psrld xmm14, 1
  5577. psrld xmm10, 2
  5578. psrld xmm9, 7
  5579. pxor xmm14, xmm10
  5580. pxor xmm14, xmm9
  5581. pxor xmm14, xmm13
  5582. pxor xmm14, xmm8
  5583. pxor xmm7, xmm14
  5584. movdqu [rsp+96], xmm7
  5585. ; H ^ 8
  5586. pshufd xmm9, xmm3, 78
  5587. pshufd xmm10, xmm3, 78
  5588. movdqa xmm11, xmm3
  5589. movdqa xmm8, xmm3
  5590. pclmulqdq xmm11, xmm3, 17
  5591. pclmulqdq xmm8, xmm3, 0
  5592. pxor xmm9, xmm3
  5593. pxor xmm10, xmm3
  5594. pclmulqdq xmm9, xmm10, 0
  5595. pxor xmm9, xmm8
  5596. pxor xmm9, xmm11
  5597. movdqa xmm10, xmm9
  5598. movdqa xmm7, xmm11
  5599. pslldq xmm10, 8
  5600. psrldq xmm9, 8
  5601. pxor xmm8, xmm10
  5602. pxor xmm7, xmm9
  5603. movdqa xmm12, xmm8
  5604. movdqa xmm13, xmm8
  5605. movdqa xmm14, xmm8
  5606. pslld xmm12, 31
  5607. pslld xmm13, 30
  5608. pslld xmm14, 25
  5609. pxor xmm12, xmm13
  5610. pxor xmm12, xmm14
  5611. movdqa xmm13, xmm12
  5612. psrldq xmm13, 4
  5613. pslldq xmm12, 12
  5614. pxor xmm8, xmm12
  5615. movdqa xmm14, xmm8
  5616. movdqa xmm10, xmm8
  5617. movdqa xmm9, xmm8
  5618. psrld xmm14, 1
  5619. psrld xmm10, 2
  5620. psrld xmm9, 7
  5621. pxor xmm14, xmm10
  5622. pxor xmm14, xmm9
  5623. pxor xmm14, xmm13
  5624. pxor xmm14, xmm8
  5625. pxor xmm7, xmm14
  5626. movdqu [rsp+112], xmm7
  5627. L_AES_GCM_decrypt_update_aesni_ghash_128:
  5628. lea rcx, QWORD PTR [r11+rdi]
  5629. lea rdx, QWORD PTR [r10+rdi]
  5630. movdqu xmm8, [r15]
  5631. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  5632. movdqa xmm0, xmm8
  5633. pshufb xmm8, xmm1
  5634. movdqa xmm9, xmm0
  5635. paddd xmm9, OWORD PTR L_aes_gcm_one
  5636. pshufb xmm9, xmm1
  5637. movdqa xmm10, xmm0
  5638. paddd xmm10, OWORD PTR L_aes_gcm_two
  5639. pshufb xmm10, xmm1
  5640. movdqa xmm11, xmm0
  5641. paddd xmm11, OWORD PTR L_aes_gcm_three
  5642. pshufb xmm11, xmm1
  5643. movdqa xmm12, xmm0
  5644. paddd xmm12, OWORD PTR L_aes_gcm_four
  5645. pshufb xmm12, xmm1
  5646. movdqa xmm13, xmm0
  5647. paddd xmm13, OWORD PTR L_aes_gcm_five
  5648. pshufb xmm13, xmm1
  5649. movdqa xmm14, xmm0
  5650. paddd xmm14, OWORD PTR L_aes_gcm_six
  5651. pshufb xmm14, xmm1
  5652. movdqa xmm15, xmm0
  5653. paddd xmm15, OWORD PTR L_aes_gcm_seven
  5654. pshufb xmm15, xmm1
  5655. paddd xmm0, OWORD PTR L_aes_gcm_eight
  5656. movdqa xmm7, OWORD PTR [rax]
  5657. movdqu [r15], xmm0
  5658. pxor xmm8, xmm7
  5659. pxor xmm9, xmm7
  5660. pxor xmm10, xmm7
  5661. pxor xmm11, xmm7
  5662. pxor xmm12, xmm7
  5663. pxor xmm13, xmm7
  5664. pxor xmm14, xmm7
  5665. pxor xmm15, xmm7
  5666. movdqu xmm7, [rsp+112]
  5667. movdqu xmm0, [rcx]
  5668. aesenc xmm8, [rax+16]
  5669. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5670. pxor xmm0, xmm2
  5671. pshufd xmm1, xmm7, 78
  5672. pshufd xmm5, xmm0, 78
  5673. pxor xmm1, xmm7
  5674. pxor xmm5, xmm0
  5675. movdqa xmm3, xmm0
  5676. pclmulqdq xmm3, xmm7, 17
  5677. aesenc xmm9, [rax+16]
  5678. aesenc xmm10, [rax+16]
  5679. movdqa xmm2, xmm0
  5680. pclmulqdq xmm2, xmm7, 0
  5681. aesenc xmm11, [rax+16]
  5682. aesenc xmm12, [rax+16]
  5683. pclmulqdq xmm1, xmm5, 0
  5684. aesenc xmm13, [rax+16]
  5685. aesenc xmm14, [rax+16]
  5686. aesenc xmm15, [rax+16]
  5687. pxor xmm1, xmm2
  5688. pxor xmm1, xmm3
  5689. movdqu xmm7, [rsp+96]
  5690. movdqu xmm0, [rcx+16]
  5691. pshufd xmm4, xmm7, 78
  5692. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5693. aesenc xmm8, [rax+32]
  5694. pxor xmm4, xmm7
  5695. pshufd xmm5, xmm0, 78
  5696. pxor xmm5, xmm0
  5697. movdqa xmm6, xmm0
  5698. pclmulqdq xmm6, xmm7, 17
  5699. aesenc xmm9, [rax+32]
  5700. aesenc xmm10, [rax+32]
  5701. pclmulqdq xmm7, xmm0, 0
  5702. aesenc xmm11, [rax+32]
  5703. aesenc xmm12, [rax+32]
  5704. pclmulqdq xmm4, xmm5, 0
  5705. aesenc xmm13, [rax+32]
  5706. aesenc xmm14, [rax+32]
  5707. aesenc xmm15, [rax+32]
  5708. pxor xmm1, xmm7
  5709. pxor xmm2, xmm7
  5710. pxor xmm1, xmm6
  5711. pxor xmm3, xmm6
  5712. pxor xmm1, xmm4
  5713. movdqu xmm7, [rsp+80]
  5714. movdqu xmm0, [rcx+32]
  5715. pshufd xmm4, xmm7, 78
  5716. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5717. aesenc xmm8, [rax+48]
  5718. pxor xmm4, xmm7
  5719. pshufd xmm5, xmm0, 78
  5720. pxor xmm5, xmm0
  5721. movdqa xmm6, xmm0
  5722. pclmulqdq xmm6, xmm7, 17
  5723. aesenc xmm9, [rax+48]
  5724. aesenc xmm10, [rax+48]
  5725. pclmulqdq xmm7, xmm0, 0
  5726. aesenc xmm11, [rax+48]
  5727. aesenc xmm12, [rax+48]
  5728. pclmulqdq xmm4, xmm5, 0
  5729. aesenc xmm13, [rax+48]
  5730. aesenc xmm14, [rax+48]
  5731. aesenc xmm15, [rax+48]
  5732. pxor xmm1, xmm7
  5733. pxor xmm2, xmm7
  5734. pxor xmm1, xmm6
  5735. pxor xmm3, xmm6
  5736. pxor xmm1, xmm4
  5737. movdqu xmm7, [rsp+64]
  5738. movdqu xmm0, [rcx+48]
  5739. pshufd xmm4, xmm7, 78
  5740. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5741. aesenc xmm8, [rax+64]
  5742. pxor xmm4, xmm7
  5743. pshufd xmm5, xmm0, 78
  5744. pxor xmm5, xmm0
  5745. movdqa xmm6, xmm0
  5746. pclmulqdq xmm6, xmm7, 17
  5747. aesenc xmm9, [rax+64]
  5748. aesenc xmm10, [rax+64]
  5749. pclmulqdq xmm7, xmm0, 0
  5750. aesenc xmm11, [rax+64]
  5751. aesenc xmm12, [rax+64]
  5752. pclmulqdq xmm4, xmm5, 0
  5753. aesenc xmm13, [rax+64]
  5754. aesenc xmm14, [rax+64]
  5755. aesenc xmm15, [rax+64]
  5756. pxor xmm1, xmm7
  5757. pxor xmm2, xmm7
  5758. pxor xmm1, xmm6
  5759. pxor xmm3, xmm6
  5760. pxor xmm1, xmm4
  5761. movdqu xmm7, [rsp+48]
  5762. movdqu xmm0, [rcx+64]
  5763. pshufd xmm4, xmm7, 78
  5764. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5765. aesenc xmm8, [rax+80]
  5766. pxor xmm4, xmm7
  5767. pshufd xmm5, xmm0, 78
  5768. pxor xmm5, xmm0
  5769. movdqa xmm6, xmm0
  5770. pclmulqdq xmm6, xmm7, 17
  5771. aesenc xmm9, [rax+80]
  5772. aesenc xmm10, [rax+80]
  5773. pclmulqdq xmm7, xmm0, 0
  5774. aesenc xmm11, [rax+80]
  5775. aesenc xmm12, [rax+80]
  5776. pclmulqdq xmm4, xmm5, 0
  5777. aesenc xmm13, [rax+80]
  5778. aesenc xmm14, [rax+80]
  5779. aesenc xmm15, [rax+80]
  5780. pxor xmm1, xmm7
  5781. pxor xmm2, xmm7
  5782. pxor xmm1, xmm6
  5783. pxor xmm3, xmm6
  5784. pxor xmm1, xmm4
  5785. movdqu xmm7, [rsp+32]
  5786. movdqu xmm0, [rcx+80]
  5787. pshufd xmm4, xmm7, 78
  5788. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5789. aesenc xmm8, [rax+96]
  5790. pxor xmm4, xmm7
  5791. pshufd xmm5, xmm0, 78
  5792. pxor xmm5, xmm0
  5793. movdqa xmm6, xmm0
  5794. pclmulqdq xmm6, xmm7, 17
  5795. aesenc xmm9, [rax+96]
  5796. aesenc xmm10, [rax+96]
  5797. pclmulqdq xmm7, xmm0, 0
  5798. aesenc xmm11, [rax+96]
  5799. aesenc xmm12, [rax+96]
  5800. pclmulqdq xmm4, xmm5, 0
  5801. aesenc xmm13, [rax+96]
  5802. aesenc xmm14, [rax+96]
  5803. aesenc xmm15, [rax+96]
  5804. pxor xmm1, xmm7
  5805. pxor xmm2, xmm7
  5806. pxor xmm1, xmm6
  5807. pxor xmm3, xmm6
  5808. pxor xmm1, xmm4
  5809. movdqu xmm7, [rsp+16]
  5810. movdqu xmm0, [rcx+96]
  5811. pshufd xmm4, xmm7, 78
  5812. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5813. aesenc xmm8, [rax+112]
  5814. pxor xmm4, xmm7
  5815. pshufd xmm5, xmm0, 78
  5816. pxor xmm5, xmm0
  5817. movdqa xmm6, xmm0
  5818. pclmulqdq xmm6, xmm7, 17
  5819. aesenc xmm9, [rax+112]
  5820. aesenc xmm10, [rax+112]
  5821. pclmulqdq xmm7, xmm0, 0
  5822. aesenc xmm11, [rax+112]
  5823. aesenc xmm12, [rax+112]
  5824. pclmulqdq xmm4, xmm5, 0
  5825. aesenc xmm13, [rax+112]
  5826. aesenc xmm14, [rax+112]
  5827. aesenc xmm15, [rax+112]
  5828. pxor xmm1, xmm7
  5829. pxor xmm2, xmm7
  5830. pxor xmm1, xmm6
  5831. pxor xmm3, xmm6
  5832. pxor xmm1, xmm4
  5833. movdqu xmm7, [rsp]
  5834. movdqu xmm0, [rcx+112]
  5835. pshufd xmm4, xmm7, 78
  5836. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5837. aesenc xmm8, [rax+128]
  5838. pxor xmm4, xmm7
  5839. pshufd xmm5, xmm0, 78
  5840. pxor xmm5, xmm0
  5841. movdqa xmm6, xmm0
  5842. pclmulqdq xmm6, xmm7, 17
  5843. aesenc xmm9, [rax+128]
  5844. aesenc xmm10, [rax+128]
  5845. pclmulqdq xmm7, xmm0, 0
  5846. aesenc xmm11, [rax+128]
  5847. aesenc xmm12, [rax+128]
  5848. pclmulqdq xmm4, xmm5, 0
  5849. aesenc xmm13, [rax+128]
  5850. aesenc xmm14, [rax+128]
  5851. aesenc xmm15, [rax+128]
  5852. pxor xmm1, xmm7
  5853. pxor xmm2, xmm7
  5854. pxor xmm1, xmm6
  5855. pxor xmm3, xmm6
  5856. pxor xmm1, xmm4
  5857. movdqa xmm5, xmm1
  5858. psrldq xmm1, 8
  5859. pslldq xmm5, 8
  5860. aesenc xmm8, [rax+144]
  5861. pxor xmm2, xmm5
  5862. pxor xmm3, xmm1
  5863. movdqa xmm7, xmm2
  5864. movdqa xmm4, xmm2
  5865. movdqa xmm5, xmm2
  5866. aesenc xmm9, [rax+144]
  5867. pslld xmm7, 31
  5868. pslld xmm4, 30
  5869. pslld xmm5, 25
  5870. aesenc xmm10, [rax+144]
  5871. pxor xmm7, xmm4
  5872. pxor xmm7, xmm5
  5873. aesenc xmm11, [rax+144]
  5874. movdqa xmm4, xmm7
  5875. pslldq xmm7, 12
  5876. psrldq xmm4, 4
  5877. aesenc xmm12, [rax+144]
  5878. pxor xmm2, xmm7
  5879. movdqa xmm5, xmm2
  5880. movdqa xmm1, xmm2
  5881. movdqa xmm0, xmm2
  5882. aesenc xmm13, [rax+144]
  5883. psrld xmm5, 1
  5884. psrld xmm1, 2
  5885. psrld xmm0, 7
  5886. aesenc xmm14, [rax+144]
  5887. pxor xmm5, xmm1
  5888. pxor xmm5, xmm0
  5889. aesenc xmm15, [rax+144]
  5890. pxor xmm5, xmm4
  5891. pxor xmm2, xmm5
  5892. pxor xmm2, xmm3
  5893. cmp r8d, 11
  5894. movdqa xmm7, OWORD PTR [rax+160]
  5895. jl L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done
  5896. aesenc xmm8, xmm7
  5897. aesenc xmm9, xmm7
  5898. aesenc xmm10, xmm7
  5899. aesenc xmm11, xmm7
  5900. aesenc xmm12, xmm7
  5901. aesenc xmm13, xmm7
  5902. aesenc xmm14, xmm7
  5903. aesenc xmm15, xmm7
  5904. movdqa xmm7, OWORD PTR [rax+176]
  5905. aesenc xmm8, xmm7
  5906. aesenc xmm9, xmm7
  5907. aesenc xmm10, xmm7
  5908. aesenc xmm11, xmm7
  5909. aesenc xmm12, xmm7
  5910. aesenc xmm13, xmm7
  5911. aesenc xmm14, xmm7
  5912. aesenc xmm15, xmm7
  5913. cmp r8d, 13
  5914. movdqa xmm7, OWORD PTR [rax+192]
  5915. jl L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done
  5916. aesenc xmm8, xmm7
  5917. aesenc xmm9, xmm7
  5918. aesenc xmm10, xmm7
  5919. aesenc xmm11, xmm7
  5920. aesenc xmm12, xmm7
  5921. aesenc xmm13, xmm7
  5922. aesenc xmm14, xmm7
  5923. aesenc xmm15, xmm7
  5924. movdqa xmm7, OWORD PTR [rax+208]
  5925. aesenc xmm8, xmm7
  5926. aesenc xmm9, xmm7
  5927. aesenc xmm10, xmm7
  5928. aesenc xmm11, xmm7
  5929. aesenc xmm12, xmm7
  5930. aesenc xmm13, xmm7
  5931. aesenc xmm14, xmm7
  5932. aesenc xmm15, xmm7
  5933. movdqa xmm7, OWORD PTR [rax+224]
  5934. L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done:
  5935. aesenclast xmm8, xmm7
  5936. aesenclast xmm9, xmm7
  5937. movdqu xmm0, [rcx]
  5938. movdqu xmm1, [rcx+16]
  5939. pxor xmm8, xmm0
  5940. pxor xmm9, xmm1
  5941. movdqu [rdx], xmm8
  5942. movdqu [rdx+16], xmm9
  5943. aesenclast xmm10, xmm7
  5944. aesenclast xmm11, xmm7
  5945. movdqu xmm0, [rcx+32]
  5946. movdqu xmm1, [rcx+48]
  5947. pxor xmm10, xmm0
  5948. pxor xmm11, xmm1
  5949. movdqu [rdx+32], xmm10
  5950. movdqu [rdx+48], xmm11
  5951. aesenclast xmm12, xmm7
  5952. aesenclast xmm13, xmm7
  5953. movdqu xmm0, [rcx+64]
  5954. movdqu xmm1, [rcx+80]
  5955. pxor xmm12, xmm0
  5956. pxor xmm13, xmm1
  5957. movdqu [rdx+64], xmm12
  5958. movdqu [rdx+80], xmm13
  5959. aesenclast xmm14, xmm7
  5960. aesenclast xmm15, xmm7
  5961. movdqu xmm0, [rcx+96]
  5962. movdqu xmm1, [rcx+112]
  5963. pxor xmm14, xmm0
  5964. pxor xmm15, xmm1
  5965. movdqu [rdx+96], xmm14
  5966. movdqu [rdx+112], xmm15
  5967. add edi, 128
  5968. cmp edi, r13d
  5969. jl L_AES_GCM_decrypt_update_aesni_ghash_128
  5970. movdqa xmm6, xmm2
  5971. movdqu xmm5, [rsp]
  5972. L_AES_GCM_decrypt_update_aesni_done_128:
  5973. mov edx, r9d
  5974. cmp edi, edx
  5975. jge L_AES_GCM_decrypt_update_aesni_done_dec
  5976. mov r13d, r9d
  5977. and r13d, 4294967280
  5978. cmp edi, r13d
  5979. jge L_AES_GCM_decrypt_update_aesni_last_block_done
  5980. L_AES_GCM_decrypt_update_aesni_last_block_start:
  5981. lea rcx, QWORD PTR [r11+rdi]
  5982. lea rdx, QWORD PTR [r10+rdi]
  5983. movdqu xmm1, [rcx]
  5984. movdqa xmm0, xmm5
  5985. pshufb xmm1, OWORD PTR L_aes_gcm_bswap_mask
  5986. pxor xmm1, xmm6
  5987. movdqu xmm8, [r15]
  5988. movdqa xmm9, xmm8
  5989. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  5990. paddd xmm9, OWORD PTR L_aes_gcm_one
  5991. pxor xmm8, [rax]
  5992. movdqu [r15], xmm9
  5993. movdqa xmm10, xmm1
  5994. pclmulqdq xmm10, xmm0, 16
  5995. aesenc xmm8, [rax+16]
  5996. aesenc xmm8, [rax+32]
  5997. movdqa xmm11, xmm1
  5998. pclmulqdq xmm11, xmm0, 1
  5999. aesenc xmm8, [rax+48]
  6000. aesenc xmm8, [rax+64]
  6001. movdqa xmm12, xmm1
  6002. pclmulqdq xmm12, xmm0, 0
  6003. aesenc xmm8, [rax+80]
  6004. movdqa xmm1, xmm1
  6005. pclmulqdq xmm1, xmm0, 17
  6006. aesenc xmm8, [rax+96]
  6007. pxor xmm10, xmm11
  6008. movdqa xmm2, xmm10
  6009. psrldq xmm10, 8
  6010. pslldq xmm2, 8
  6011. aesenc xmm8, [rax+112]
  6012. movdqa xmm3, xmm1
  6013. pxor xmm2, xmm12
  6014. pxor xmm3, xmm10
  6015. movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
  6016. movdqa xmm11, xmm2
  6017. pclmulqdq xmm11, xmm0, 16
  6018. aesenc xmm8, [rax+128]
  6019. pshufd xmm10, xmm2, 78
  6020. pxor xmm10, xmm11
  6021. movdqa xmm11, xmm10
  6022. pclmulqdq xmm11, xmm0, 16
  6023. aesenc xmm8, [rax+144]
  6024. pshufd xmm6, xmm10, 78
  6025. pxor xmm6, xmm11
  6026. pxor xmm6, xmm3
  6027. cmp r8d, 11
  6028. movdqa xmm9, OWORD PTR [rax+160]
  6029. jl L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last
  6030. aesenc xmm8, xmm9
  6031. aesenc xmm8, [rax+176]
  6032. cmp r8d, 13
  6033. movdqa xmm9, OWORD PTR [rax+192]
  6034. jl L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last
  6035. aesenc xmm8, xmm9
  6036. aesenc xmm8, [rax+208]
  6037. movdqa xmm9, OWORD PTR [rax+224]
  6038. L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last:
  6039. aesenclast xmm8, xmm9
  6040. movdqu xmm9, [rcx]
  6041. pxor xmm8, xmm9
  6042. movdqu [rdx], xmm8
  6043. add edi, 16
  6044. cmp edi, r13d
  6045. jl L_AES_GCM_decrypt_update_aesni_last_block_start
  6046. L_AES_GCM_decrypt_update_aesni_last_block_done:
  6047. L_AES_GCM_decrypt_update_aesni_done_dec:
  6048. movdqa OWORD PTR [r12], xmm6
  6049. movdqu xmm6, [rsp+168]
  6050. movdqu xmm7, [rsp+184]
  6051. movdqu xmm8, [rsp+200]
  6052. movdqu xmm9, [rsp+216]
  6053. movdqu xmm10, [rsp+232]
  6054. movdqu xmm11, [rsp+248]
  6055. movdqu xmm12, [rsp+264]
  6056. movdqu xmm13, [rsp+280]
  6057. movdqu xmm14, [rsp+296]
  6058. movdqu xmm15, [rsp+312]
  6059. add rsp, 328
  6060. pop rsi
  6061. pop rdi
  6062. pop r15
  6063. pop r14
  6064. pop r12
  6065. pop r13
  6066. ret
  6067. AES_GCM_decrypt_update_aesni ENDP
  6068. _text ENDS
  6069. _text SEGMENT READONLY PARA
  6070. AES_GCM_decrypt_final_aesni PROC
  6071. push r13
  6072. push r12
  6073. push r14
  6074. push rbp
  6075. push r15
  6076. mov rax, rcx
  6077. mov r10d, r9d
  6078. mov r9, rdx
  6079. mov r11d, DWORD PTR [rsp+80]
  6080. mov r12, QWORD PTR [rsp+88]
  6081. mov r14, QWORD PTR [rsp+96]
  6082. mov rbp, QWORD PTR [rsp+104]
  6083. sub rsp, 160
  6084. movdqu [rsp+16], xmm6
  6085. movdqu [rsp+32], xmm7
  6086. movdqu [rsp+48], xmm8
  6087. movdqu [rsp+64], xmm9
  6088. movdqu [rsp+80], xmm10
  6089. movdqu [rsp+96], xmm11
  6090. movdqu [rsp+112], xmm12
  6091. movdqu [rsp+128], xmm13
  6092. movdqu [rsp+144], xmm15
  6093. movdqa xmm6, OWORD PTR [rax]
  6094. movdqa xmm5, OWORD PTR [r12]
  6095. movdqa xmm15, OWORD PTR [r14]
  6096. movdqa xmm8, xmm5
  6097. movdqa xmm7, xmm5
  6098. psrlq xmm8, 63
  6099. psllq xmm7, 1
  6100. pslldq xmm8, 8
  6101. por xmm7, xmm8
  6102. pshufd xmm5, xmm5, 255
  6103. psrad xmm5, 31
  6104. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  6105. pxor xmm5, xmm7
  6106. mov edx, r10d
  6107. mov ecx, r11d
  6108. shl rdx, 3
  6109. shl rcx, 3
  6110. pinsrq xmm0, rdx, 0
  6111. pinsrq xmm0, rcx, 1
  6112. pxor xmm6, xmm0
  6113. pshufd xmm8, xmm5, 78
  6114. pshufd xmm9, xmm6, 78
  6115. movdqa xmm10, xmm6
  6116. movdqa xmm7, xmm6
  6117. pclmulqdq xmm10, xmm5, 17
  6118. pclmulqdq xmm7, xmm5, 0
  6119. pxor xmm8, xmm5
  6120. pxor xmm9, xmm6
  6121. pclmulqdq xmm8, xmm9, 0
  6122. pxor xmm8, xmm7
  6123. pxor xmm8, xmm10
  6124. movdqa xmm9, xmm8
  6125. movdqa xmm6, xmm10
  6126. pslldq xmm9, 8
  6127. psrldq xmm8, 8
  6128. pxor xmm7, xmm9
  6129. pxor xmm6, xmm8
  6130. movdqa xmm11, xmm7
  6131. movdqa xmm12, xmm7
  6132. movdqa xmm13, xmm7
  6133. pslld xmm11, 31
  6134. pslld xmm12, 30
  6135. pslld xmm13, 25
  6136. pxor xmm11, xmm12
  6137. pxor xmm11, xmm13
  6138. movdqa xmm12, xmm11
  6139. psrldq xmm12, 4
  6140. pslldq xmm11, 12
  6141. pxor xmm7, xmm11
  6142. movdqa xmm13, xmm7
  6143. movdqa xmm9, xmm7
  6144. movdqa xmm8, xmm7
  6145. psrld xmm13, 1
  6146. psrld xmm9, 2
  6147. psrld xmm8, 7
  6148. pxor xmm13, xmm9
  6149. pxor xmm13, xmm8
  6150. pxor xmm13, xmm12
  6151. pxor xmm13, xmm7
  6152. pxor xmm6, xmm13
  6153. pshufb xmm6, OWORD PTR L_aes_gcm_bswap_mask
  6154. movdqu xmm0, xmm15
  6155. pxor xmm0, xmm6
  6156. cmp r8d, 16
  6157. je L_AES_GCM_decrypt_final_aesni_cmp_tag_16
  6158. sub rsp, 16
  6159. xor rcx, rcx
  6160. xor r15, r15
  6161. movdqu [rsp], xmm0
  6162. L_AES_GCM_decrypt_final_aesni_cmp_tag_loop:
  6163. movzx r13d, BYTE PTR [rsp+rcx]
  6164. xor r13b, BYTE PTR [r9+rcx]
  6165. or r15b, r13b
  6166. inc ecx
  6167. cmp ecx, r8d
  6168. jne L_AES_GCM_decrypt_final_aesni_cmp_tag_loop
  6169. cmp r15, 0
  6170. sete r15b
  6171. add rsp, 16
  6172. xor rcx, rcx
  6173. jmp L_AES_GCM_decrypt_final_aesni_cmp_tag_done
  6174. L_AES_GCM_decrypt_final_aesni_cmp_tag_16:
  6175. movdqu xmm1, [r9]
  6176. pcmpeqb xmm0, xmm1
  6177. pmovmskb rdx, xmm0
  6178. ; %%edx == 0xFFFF then return 1 else => return 0
  6179. xor r15d, r15d
  6180. cmp edx, 65535
  6181. sete r15b
  6182. L_AES_GCM_decrypt_final_aesni_cmp_tag_done:
  6183. mov DWORD PTR [rbp], r15d
  6184. movdqu xmm6, [rsp+16]
  6185. movdqu xmm7, [rsp+32]
  6186. movdqu xmm8, [rsp+48]
  6187. movdqu xmm9, [rsp+64]
  6188. movdqu xmm10, [rsp+80]
  6189. movdqu xmm11, [rsp+96]
  6190. movdqu xmm12, [rsp+112]
  6191. movdqu xmm13, [rsp+128]
  6192. movdqu xmm15, [rsp+144]
  6193. add rsp, 160
  6194. pop r15
  6195. pop rbp
  6196. pop r14
  6197. pop r12
  6198. pop r13
  6199. ret
  6200. AES_GCM_decrypt_final_aesni ENDP
  6201. _text ENDS
  6202. IFDEF HAVE_INTEL_AVX1
  6203. _DATA SEGMENT
  6204. ALIGN 16
  6205. L_avx1_aes_gcm_one QWORD 0, 1
  6206. ptr_L_avx1_aes_gcm_one QWORD L_avx1_aes_gcm_one
  6207. _DATA ENDS
  6208. _DATA SEGMENT
  6209. ALIGN 16
  6210. L_avx1_aes_gcm_two QWORD 0, 2
  6211. ptr_L_avx1_aes_gcm_two QWORD L_avx1_aes_gcm_two
  6212. _DATA ENDS
  6213. _DATA SEGMENT
  6214. ALIGN 16
  6215. L_avx1_aes_gcm_three QWORD 0, 3
  6216. ptr_L_avx1_aes_gcm_three QWORD L_avx1_aes_gcm_three
  6217. _DATA ENDS
  6218. _DATA SEGMENT
  6219. ALIGN 16
  6220. L_avx1_aes_gcm_four QWORD 0, 4
  6221. ptr_L_avx1_aes_gcm_four QWORD L_avx1_aes_gcm_four
  6222. _DATA ENDS
  6223. _DATA SEGMENT
  6224. ALIGN 16
  6225. L_avx1_aes_gcm_five QWORD 0, 5
  6226. ptr_L_avx1_aes_gcm_five QWORD L_avx1_aes_gcm_five
  6227. _DATA ENDS
  6228. _DATA SEGMENT
  6229. ALIGN 16
  6230. L_avx1_aes_gcm_six QWORD 0, 6
  6231. ptr_L_avx1_aes_gcm_six QWORD L_avx1_aes_gcm_six
  6232. _DATA ENDS
  6233. _DATA SEGMENT
  6234. ALIGN 16
  6235. L_avx1_aes_gcm_seven QWORD 0, 7
  6236. ptr_L_avx1_aes_gcm_seven QWORD L_avx1_aes_gcm_seven
  6237. _DATA ENDS
  6238. _DATA SEGMENT
  6239. ALIGN 16
  6240. L_avx1_aes_gcm_eight QWORD 0, 8
  6241. ptr_L_avx1_aes_gcm_eight QWORD L_avx1_aes_gcm_eight
  6242. _DATA ENDS
  6243. _DATA SEGMENT
  6244. ALIGN 16
  6245. L_avx1_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567
  6246. ptr_L_avx1_aes_gcm_bswap_epi64 QWORD L_avx1_aes_gcm_bswap_epi64
  6247. _DATA ENDS
  6248. _DATA SEGMENT
  6249. ALIGN 16
  6250. L_avx1_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183
  6251. ptr_L_avx1_aes_gcm_bswap_mask QWORD L_avx1_aes_gcm_bswap_mask
  6252. _DATA ENDS
  6253. _DATA SEGMENT
  6254. ALIGN 16
  6255. L_avx1_aes_gcm_mod2_128 QWORD 1, 13979173243358019584
  6256. ptr_L_avx1_aes_gcm_mod2_128 QWORD L_avx1_aes_gcm_mod2_128
  6257. _DATA ENDS
  6258. _text SEGMENT READONLY PARA
  6259. AES_GCM_encrypt_avx1 PROC
  6260. push r13
  6261. push rdi
  6262. push rsi
  6263. push r12
  6264. push rbx
  6265. push r14
  6266. push r15
  6267. mov rdi, rcx
  6268. mov rsi, rdx
  6269. mov r12, r8
  6270. mov rax, r9
  6271. mov r8, QWORD PTR [rsp+96]
  6272. mov r9d, DWORD PTR [rsp+104]
  6273. mov r11d, DWORD PTR [rsp+112]
  6274. mov ebx, DWORD PTR [rsp+120]
  6275. mov r14d, DWORD PTR [rsp+128]
  6276. mov r15, QWORD PTR [rsp+136]
  6277. mov r10d, DWORD PTR [rsp+144]
  6278. sub rsp, 320
  6279. vmovdqu OWORD PTR [rsp+160], xmm6
  6280. vmovdqu OWORD PTR [rsp+176], xmm7
  6281. vmovdqu OWORD PTR [rsp+192], xmm8
  6282. vmovdqu OWORD PTR [rsp+208], xmm9
  6283. vmovdqu OWORD PTR [rsp+224], xmm10
  6284. vmovdqu OWORD PTR [rsp+240], xmm11
  6285. vmovdqu OWORD PTR [rsp+256], xmm12
  6286. vmovdqu OWORD PTR [rsp+272], xmm13
  6287. vmovdqu OWORD PTR [rsp+288], xmm14
  6288. vmovdqu OWORD PTR [rsp+304], xmm15
  6289. vpxor xmm4, xmm4, xmm4
  6290. vpxor xmm6, xmm6, xmm6
  6291. mov edx, ebx
  6292. cmp edx, 12
  6293. jne L_AES_GCM_encrypt_avx1_iv_not_12
  6294. ; # Calculate values when IV is 12 bytes
  6295. ; Set counter based on IV
  6296. mov ecx, 16777216
  6297. vmovq xmm4, QWORD PTR [rax]
  6298. vpinsrd xmm4, xmm4, DWORD PTR [rax+8], 2
  6299. vpinsrd xmm4, xmm4, ecx, 3
  6300. ; H = Encrypt X(=0) and T = Encrypt counter
  6301. vmovdqa xmm5, OWORD PTR [r15]
  6302. vpxor xmm1, xmm4, xmm5
  6303. vmovdqa xmm7, OWORD PTR [r15+16]
  6304. vaesenc xmm5, xmm5, xmm7
  6305. vaesenc xmm1, xmm1, xmm7
  6306. vmovdqa xmm7, OWORD PTR [r15+32]
  6307. vaesenc xmm5, xmm5, xmm7
  6308. vaesenc xmm1, xmm1, xmm7
  6309. vmovdqa xmm7, OWORD PTR [r15+48]
  6310. vaesenc xmm5, xmm5, xmm7
  6311. vaesenc xmm1, xmm1, xmm7
  6312. vmovdqa xmm7, OWORD PTR [r15+64]
  6313. vaesenc xmm5, xmm5, xmm7
  6314. vaesenc xmm1, xmm1, xmm7
  6315. vmovdqa xmm7, OWORD PTR [r15+80]
  6316. vaesenc xmm5, xmm5, xmm7
  6317. vaesenc xmm1, xmm1, xmm7
  6318. vmovdqa xmm7, OWORD PTR [r15+96]
  6319. vaesenc xmm5, xmm5, xmm7
  6320. vaesenc xmm1, xmm1, xmm7
  6321. vmovdqa xmm7, OWORD PTR [r15+112]
  6322. vaesenc xmm5, xmm5, xmm7
  6323. vaesenc xmm1, xmm1, xmm7
  6324. vmovdqa xmm7, OWORD PTR [r15+128]
  6325. vaesenc xmm5, xmm5, xmm7
  6326. vaesenc xmm1, xmm1, xmm7
  6327. vmovdqa xmm7, OWORD PTR [r15+144]
  6328. vaesenc xmm5, xmm5, xmm7
  6329. vaesenc xmm1, xmm1, xmm7
  6330. cmp r10d, 11
  6331. vmovdqa xmm7, OWORD PTR [r15+160]
  6332. jl L_AES_GCM_encrypt_avx1_calc_iv_12_last
  6333. vaesenc xmm5, xmm5, xmm7
  6334. vaesenc xmm1, xmm1, xmm7
  6335. vmovdqa xmm7, OWORD PTR [r15+176]
  6336. vaesenc xmm5, xmm5, xmm7
  6337. vaesenc xmm1, xmm1, xmm7
  6338. cmp r10d, 13
  6339. vmovdqa xmm7, OWORD PTR [r15+192]
  6340. jl L_AES_GCM_encrypt_avx1_calc_iv_12_last
  6341. vaesenc xmm5, xmm5, xmm7
  6342. vaesenc xmm1, xmm1, xmm7
  6343. vmovdqa xmm7, OWORD PTR [r15+208]
  6344. vaesenc xmm5, xmm5, xmm7
  6345. vaesenc xmm1, xmm1, xmm7
  6346. vmovdqa xmm7, OWORD PTR [r15+224]
  6347. L_AES_GCM_encrypt_avx1_calc_iv_12_last:
  6348. vaesenclast xmm5, xmm5, xmm7
  6349. vaesenclast xmm1, xmm1, xmm7
  6350. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6351. vmovdqu OWORD PTR [rsp+144], xmm1
  6352. jmp L_AES_GCM_encrypt_avx1_iv_done
  6353. L_AES_GCM_encrypt_avx1_iv_not_12:
  6354. ; Calculate values when IV is not 12 bytes
  6355. ; H = Encrypt X(=0)
  6356. vmovdqa xmm5, OWORD PTR [r15]
  6357. vaesenc xmm5, xmm5, [r15+16]
  6358. vaesenc xmm5, xmm5, [r15+32]
  6359. vaesenc xmm5, xmm5, [r15+48]
  6360. vaesenc xmm5, xmm5, [r15+64]
  6361. vaesenc xmm5, xmm5, [r15+80]
  6362. vaesenc xmm5, xmm5, [r15+96]
  6363. vaesenc xmm5, xmm5, [r15+112]
  6364. vaesenc xmm5, xmm5, [r15+128]
  6365. vaesenc xmm5, xmm5, [r15+144]
  6366. cmp r10d, 11
  6367. vmovdqa xmm9, OWORD PTR [r15+160]
  6368. jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last
  6369. vaesenc xmm5, xmm5, xmm9
  6370. vaesenc xmm5, xmm5, [r15+176]
  6371. cmp r10d, 13
  6372. vmovdqa xmm9, OWORD PTR [r15+192]
  6373. jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last
  6374. vaesenc xmm5, xmm5, xmm9
  6375. vaesenc xmm5, xmm5, [r15+208]
  6376. vmovdqa xmm9, OWORD PTR [r15+224]
  6377. L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last:
  6378. vaesenclast xmm5, xmm5, xmm9
  6379. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6380. ; Calc counter
  6381. ; Initialization vector
  6382. cmp edx, 0
  6383. mov rcx, 0
  6384. je L_AES_GCM_encrypt_avx1_calc_iv_done
  6385. cmp edx, 16
  6386. jl L_AES_GCM_encrypt_avx1_calc_iv_lt16
  6387. and edx, 4294967280
  6388. L_AES_GCM_encrypt_avx1_calc_iv_16_loop:
  6389. vmovdqu xmm8, OWORD PTR [rax+rcx]
  6390. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6391. vpxor xmm4, xmm4, xmm8
  6392. ; ghash_gfmul_avx
  6393. vpshufd xmm1, xmm4, 78
  6394. vpshufd xmm2, xmm5, 78
  6395. vpclmulqdq xmm3, xmm5, xmm4, 17
  6396. vpclmulqdq xmm0, xmm5, xmm4, 0
  6397. vpxor xmm1, xmm1, xmm4
  6398. vpxor xmm2, xmm2, xmm5
  6399. vpclmulqdq xmm1, xmm1, xmm2, 0
  6400. vpxor xmm1, xmm1, xmm0
  6401. vpxor xmm1, xmm1, xmm3
  6402. vmovdqa xmm7, xmm0
  6403. vmovdqa xmm4, xmm3
  6404. vpslldq xmm2, xmm1, 8
  6405. vpsrldq xmm1, xmm1, 8
  6406. vpxor xmm7, xmm7, xmm2
  6407. vpxor xmm4, xmm4, xmm1
  6408. vpsrld xmm0, xmm7, 31
  6409. vpsrld xmm1, xmm4, 31
  6410. vpslld xmm7, xmm7, 1
  6411. vpslld xmm4, xmm4, 1
  6412. vpsrldq xmm2, xmm0, 12
  6413. vpslldq xmm0, xmm0, 4
  6414. vpslldq xmm1, xmm1, 4
  6415. vpor xmm4, xmm4, xmm2
  6416. vpor xmm7, xmm7, xmm0
  6417. vpor xmm4, xmm4, xmm1
  6418. vpslld xmm0, xmm7, 31
  6419. vpslld xmm1, xmm7, 30
  6420. vpslld xmm2, xmm7, 25
  6421. vpxor xmm0, xmm0, xmm1
  6422. vpxor xmm0, xmm0, xmm2
  6423. vmovdqa xmm1, xmm0
  6424. vpsrldq xmm1, xmm1, 4
  6425. vpslldq xmm0, xmm0, 12
  6426. vpxor xmm7, xmm7, xmm0
  6427. vpsrld xmm2, xmm7, 1
  6428. vpsrld xmm3, xmm7, 2
  6429. vpsrld xmm0, xmm7, 7
  6430. vpxor xmm2, xmm2, xmm3
  6431. vpxor xmm2, xmm2, xmm0
  6432. vpxor xmm2, xmm2, xmm1
  6433. vpxor xmm2, xmm2, xmm7
  6434. vpxor xmm4, xmm4, xmm2
  6435. add ecx, 16
  6436. cmp ecx, edx
  6437. jl L_AES_GCM_encrypt_avx1_calc_iv_16_loop
  6438. mov edx, ebx
  6439. cmp ecx, edx
  6440. je L_AES_GCM_encrypt_avx1_calc_iv_done
  6441. L_AES_GCM_encrypt_avx1_calc_iv_lt16:
  6442. sub rsp, 16
  6443. vpxor xmm8, xmm8, xmm8
  6444. xor ebx, ebx
  6445. vmovdqu OWORD PTR [rsp], xmm8
  6446. L_AES_GCM_encrypt_avx1_calc_iv_loop:
  6447. movzx r13d, BYTE PTR [rax+rcx]
  6448. mov BYTE PTR [rsp+rbx], r13b
  6449. inc ecx
  6450. inc ebx
  6451. cmp ecx, edx
  6452. jl L_AES_GCM_encrypt_avx1_calc_iv_loop
  6453. vmovdqu xmm8, OWORD PTR [rsp]
  6454. add rsp, 16
  6455. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6456. vpxor xmm4, xmm4, xmm8
  6457. ; ghash_gfmul_avx
  6458. vpshufd xmm1, xmm4, 78
  6459. vpshufd xmm2, xmm5, 78
  6460. vpclmulqdq xmm3, xmm5, xmm4, 17
  6461. vpclmulqdq xmm0, xmm5, xmm4, 0
  6462. vpxor xmm1, xmm1, xmm4
  6463. vpxor xmm2, xmm2, xmm5
  6464. vpclmulqdq xmm1, xmm1, xmm2, 0
  6465. vpxor xmm1, xmm1, xmm0
  6466. vpxor xmm1, xmm1, xmm3
  6467. vmovdqa xmm7, xmm0
  6468. vmovdqa xmm4, xmm3
  6469. vpslldq xmm2, xmm1, 8
  6470. vpsrldq xmm1, xmm1, 8
  6471. vpxor xmm7, xmm7, xmm2
  6472. vpxor xmm4, xmm4, xmm1
  6473. vpsrld xmm0, xmm7, 31
  6474. vpsrld xmm1, xmm4, 31
  6475. vpslld xmm7, xmm7, 1
  6476. vpslld xmm4, xmm4, 1
  6477. vpsrldq xmm2, xmm0, 12
  6478. vpslldq xmm0, xmm0, 4
  6479. vpslldq xmm1, xmm1, 4
  6480. vpor xmm4, xmm4, xmm2
  6481. vpor xmm7, xmm7, xmm0
  6482. vpor xmm4, xmm4, xmm1
  6483. vpslld xmm0, xmm7, 31
  6484. vpslld xmm1, xmm7, 30
  6485. vpslld xmm2, xmm7, 25
  6486. vpxor xmm0, xmm0, xmm1
  6487. vpxor xmm0, xmm0, xmm2
  6488. vmovdqa xmm1, xmm0
  6489. vpsrldq xmm1, xmm1, 4
  6490. vpslldq xmm0, xmm0, 12
  6491. vpxor xmm7, xmm7, xmm0
  6492. vpsrld xmm2, xmm7, 1
  6493. vpsrld xmm3, xmm7, 2
  6494. vpsrld xmm0, xmm7, 7
  6495. vpxor xmm2, xmm2, xmm3
  6496. vpxor xmm2, xmm2, xmm0
  6497. vpxor xmm2, xmm2, xmm1
  6498. vpxor xmm2, xmm2, xmm7
  6499. vpxor xmm4, xmm4, xmm2
  6500. L_AES_GCM_encrypt_avx1_calc_iv_done:
  6501. ; T = Encrypt counter
  6502. vpxor xmm0, xmm0, xmm0
  6503. shl edx, 3
  6504. vmovq xmm0, rdx
  6505. vpxor xmm4, xmm4, xmm0
  6506. ; ghash_gfmul_avx
  6507. vpshufd xmm1, xmm4, 78
  6508. vpshufd xmm2, xmm5, 78
  6509. vpclmulqdq xmm3, xmm5, xmm4, 17
  6510. vpclmulqdq xmm0, xmm5, xmm4, 0
  6511. vpxor xmm1, xmm1, xmm4
  6512. vpxor xmm2, xmm2, xmm5
  6513. vpclmulqdq xmm1, xmm1, xmm2, 0
  6514. vpxor xmm1, xmm1, xmm0
  6515. vpxor xmm1, xmm1, xmm3
  6516. vmovdqa xmm7, xmm0
  6517. vmovdqa xmm4, xmm3
  6518. vpslldq xmm2, xmm1, 8
  6519. vpsrldq xmm1, xmm1, 8
  6520. vpxor xmm7, xmm7, xmm2
  6521. vpxor xmm4, xmm4, xmm1
  6522. vpsrld xmm0, xmm7, 31
  6523. vpsrld xmm1, xmm4, 31
  6524. vpslld xmm7, xmm7, 1
  6525. vpslld xmm4, xmm4, 1
  6526. vpsrldq xmm2, xmm0, 12
  6527. vpslldq xmm0, xmm0, 4
  6528. vpslldq xmm1, xmm1, 4
  6529. vpor xmm4, xmm4, xmm2
  6530. vpor xmm7, xmm7, xmm0
  6531. vpor xmm4, xmm4, xmm1
  6532. vpslld xmm0, xmm7, 31
  6533. vpslld xmm1, xmm7, 30
  6534. vpslld xmm2, xmm7, 25
  6535. vpxor xmm0, xmm0, xmm1
  6536. vpxor xmm0, xmm0, xmm2
  6537. vmovdqa xmm1, xmm0
  6538. vpsrldq xmm1, xmm1, 4
  6539. vpslldq xmm0, xmm0, 12
  6540. vpxor xmm7, xmm7, xmm0
  6541. vpsrld xmm2, xmm7, 1
  6542. vpsrld xmm3, xmm7, 2
  6543. vpsrld xmm0, xmm7, 7
  6544. vpxor xmm2, xmm2, xmm3
  6545. vpxor xmm2, xmm2, xmm0
  6546. vpxor xmm2, xmm2, xmm1
  6547. vpxor xmm2, xmm2, xmm7
  6548. vpxor xmm4, xmm4, xmm2
  6549. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6550. ; Encrypt counter
  6551. vmovdqa xmm8, OWORD PTR [r15]
  6552. vpxor xmm8, xmm8, xmm4
  6553. vaesenc xmm8, xmm8, [r15+16]
  6554. vaesenc xmm8, xmm8, [r15+32]
  6555. vaesenc xmm8, xmm8, [r15+48]
  6556. vaesenc xmm8, xmm8, [r15+64]
  6557. vaesenc xmm8, xmm8, [r15+80]
  6558. vaesenc xmm8, xmm8, [r15+96]
  6559. vaesenc xmm8, xmm8, [r15+112]
  6560. vaesenc xmm8, xmm8, [r15+128]
  6561. vaesenc xmm8, xmm8, [r15+144]
  6562. cmp r10d, 11
  6563. vmovdqa xmm9, OWORD PTR [r15+160]
  6564. jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last
  6565. vaesenc xmm8, xmm8, xmm9
  6566. vaesenc xmm8, xmm8, [r15+176]
  6567. cmp r10d, 13
  6568. vmovdqa xmm9, OWORD PTR [r15+192]
  6569. jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last
  6570. vaesenc xmm8, xmm8, xmm9
  6571. vaesenc xmm8, xmm8, [r15+208]
  6572. vmovdqa xmm9, OWORD PTR [r15+224]
  6573. L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last:
  6574. vaesenclast xmm8, xmm8, xmm9
  6575. vmovdqu OWORD PTR [rsp+144], xmm8
  6576. L_AES_GCM_encrypt_avx1_iv_done:
  6577. ; Additional authentication data
  6578. mov edx, r11d
  6579. cmp edx, 0
  6580. je L_AES_GCM_encrypt_avx1_calc_aad_done
  6581. xor ecx, ecx
  6582. cmp edx, 16
  6583. jl L_AES_GCM_encrypt_avx1_calc_aad_lt16
  6584. and edx, 4294967280
  6585. L_AES_GCM_encrypt_avx1_calc_aad_16_loop:
  6586. vmovdqu xmm8, OWORD PTR [r12+rcx]
  6587. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6588. vpxor xmm6, xmm6, xmm8
  6589. ; ghash_gfmul_avx
  6590. vpshufd xmm1, xmm6, 78
  6591. vpshufd xmm2, xmm5, 78
  6592. vpclmulqdq xmm3, xmm5, xmm6, 17
  6593. vpclmulqdq xmm0, xmm5, xmm6, 0
  6594. vpxor xmm1, xmm1, xmm6
  6595. vpxor xmm2, xmm2, xmm5
  6596. vpclmulqdq xmm1, xmm1, xmm2, 0
  6597. vpxor xmm1, xmm1, xmm0
  6598. vpxor xmm1, xmm1, xmm3
  6599. vmovdqa xmm7, xmm0
  6600. vmovdqa xmm6, xmm3
  6601. vpslldq xmm2, xmm1, 8
  6602. vpsrldq xmm1, xmm1, 8
  6603. vpxor xmm7, xmm7, xmm2
  6604. vpxor xmm6, xmm6, xmm1
  6605. vpsrld xmm0, xmm7, 31
  6606. vpsrld xmm1, xmm6, 31
  6607. vpslld xmm7, xmm7, 1
  6608. vpslld xmm6, xmm6, 1
  6609. vpsrldq xmm2, xmm0, 12
  6610. vpslldq xmm0, xmm0, 4
  6611. vpslldq xmm1, xmm1, 4
  6612. vpor xmm6, xmm6, xmm2
  6613. vpor xmm7, xmm7, xmm0
  6614. vpor xmm6, xmm6, xmm1
  6615. vpslld xmm0, xmm7, 31
  6616. vpslld xmm1, xmm7, 30
  6617. vpslld xmm2, xmm7, 25
  6618. vpxor xmm0, xmm0, xmm1
  6619. vpxor xmm0, xmm0, xmm2
  6620. vmovdqa xmm1, xmm0
  6621. vpsrldq xmm1, xmm1, 4
  6622. vpslldq xmm0, xmm0, 12
  6623. vpxor xmm7, xmm7, xmm0
  6624. vpsrld xmm2, xmm7, 1
  6625. vpsrld xmm3, xmm7, 2
  6626. vpsrld xmm0, xmm7, 7
  6627. vpxor xmm2, xmm2, xmm3
  6628. vpxor xmm2, xmm2, xmm0
  6629. vpxor xmm2, xmm2, xmm1
  6630. vpxor xmm2, xmm2, xmm7
  6631. vpxor xmm6, xmm6, xmm2
  6632. add ecx, 16
  6633. cmp ecx, edx
  6634. jl L_AES_GCM_encrypt_avx1_calc_aad_16_loop
  6635. mov edx, r11d
  6636. cmp ecx, edx
  6637. je L_AES_GCM_encrypt_avx1_calc_aad_done
  6638. L_AES_GCM_encrypt_avx1_calc_aad_lt16:
  6639. sub rsp, 16
  6640. vpxor xmm8, xmm8, xmm8
  6641. xor ebx, ebx
  6642. vmovdqu OWORD PTR [rsp], xmm8
  6643. L_AES_GCM_encrypt_avx1_calc_aad_loop:
  6644. movzx r13d, BYTE PTR [r12+rcx]
  6645. mov BYTE PTR [rsp+rbx], r13b
  6646. inc ecx
  6647. inc ebx
  6648. cmp ecx, edx
  6649. jl L_AES_GCM_encrypt_avx1_calc_aad_loop
  6650. vmovdqu xmm8, OWORD PTR [rsp]
  6651. add rsp, 16
  6652. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6653. vpxor xmm6, xmm6, xmm8
  6654. ; ghash_gfmul_avx
  6655. vpshufd xmm1, xmm6, 78
  6656. vpshufd xmm2, xmm5, 78
  6657. vpclmulqdq xmm3, xmm5, xmm6, 17
  6658. vpclmulqdq xmm0, xmm5, xmm6, 0
  6659. vpxor xmm1, xmm1, xmm6
  6660. vpxor xmm2, xmm2, xmm5
  6661. vpclmulqdq xmm1, xmm1, xmm2, 0
  6662. vpxor xmm1, xmm1, xmm0
  6663. vpxor xmm1, xmm1, xmm3
  6664. vmovdqa xmm7, xmm0
  6665. vmovdqa xmm6, xmm3
  6666. vpslldq xmm2, xmm1, 8
  6667. vpsrldq xmm1, xmm1, 8
  6668. vpxor xmm7, xmm7, xmm2
  6669. vpxor xmm6, xmm6, xmm1
  6670. vpsrld xmm0, xmm7, 31
  6671. vpsrld xmm1, xmm6, 31
  6672. vpslld xmm7, xmm7, 1
  6673. vpslld xmm6, xmm6, 1
  6674. vpsrldq xmm2, xmm0, 12
  6675. vpslldq xmm0, xmm0, 4
  6676. vpslldq xmm1, xmm1, 4
  6677. vpor xmm6, xmm6, xmm2
  6678. vpor xmm7, xmm7, xmm0
  6679. vpor xmm6, xmm6, xmm1
  6680. vpslld xmm0, xmm7, 31
  6681. vpslld xmm1, xmm7, 30
  6682. vpslld xmm2, xmm7, 25
  6683. vpxor xmm0, xmm0, xmm1
  6684. vpxor xmm0, xmm0, xmm2
  6685. vmovdqa xmm1, xmm0
  6686. vpsrldq xmm1, xmm1, 4
  6687. vpslldq xmm0, xmm0, 12
  6688. vpxor xmm7, xmm7, xmm0
  6689. vpsrld xmm2, xmm7, 1
  6690. vpsrld xmm3, xmm7, 2
  6691. vpsrld xmm0, xmm7, 7
  6692. vpxor xmm2, xmm2, xmm3
  6693. vpxor xmm2, xmm2, xmm0
  6694. vpxor xmm2, xmm2, xmm1
  6695. vpxor xmm2, xmm2, xmm7
  6696. vpxor xmm6, xmm6, xmm2
  6697. L_AES_GCM_encrypt_avx1_calc_aad_done:
  6698. ; Calculate counter and H
  6699. vpsrlq xmm9, xmm5, 63
  6700. vpsllq xmm8, xmm5, 1
  6701. vpslldq xmm9, xmm9, 8
  6702. vpor xmm8, xmm8, xmm9
  6703. vpshufd xmm5, xmm5, 255
  6704. vpsrad xmm5, xmm5, 31
  6705. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  6706. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  6707. vpaddd xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_one
  6708. vpxor xmm5, xmm5, xmm8
  6709. vmovdqu OWORD PTR [rsp+128], xmm4
  6710. xor ebx, ebx
  6711. cmp r9d, 128
  6712. mov r13d, r9d
  6713. jl L_AES_GCM_encrypt_avx1_done_128
  6714. and r13d, 4294967168
  6715. vmovdqa xmm2, xmm6
  6716. ; H ^ 1
  6717. vmovdqu OWORD PTR [rsp], xmm5
  6718. ; H ^ 2
  6719. vpclmulqdq xmm8, xmm5, xmm5, 0
  6720. vpclmulqdq xmm0, xmm5, xmm5, 17
  6721. vpslld xmm12, xmm8, 31
  6722. vpslld xmm13, xmm8, 30
  6723. vpslld xmm14, xmm8, 25
  6724. vpxor xmm12, xmm12, xmm13
  6725. vpxor xmm12, xmm12, xmm14
  6726. vpsrldq xmm13, xmm12, 4
  6727. vpslldq xmm12, xmm12, 12
  6728. vpxor xmm8, xmm8, xmm12
  6729. vpsrld xmm14, xmm8, 1
  6730. vpsrld xmm10, xmm8, 2
  6731. vpsrld xmm9, xmm8, 7
  6732. vpxor xmm14, xmm14, xmm10
  6733. vpxor xmm14, xmm14, xmm9
  6734. vpxor xmm14, xmm14, xmm13
  6735. vpxor xmm14, xmm14, xmm8
  6736. vpxor xmm0, xmm0, xmm14
  6737. vmovdqu OWORD PTR [rsp+16], xmm0
  6738. ; H ^ 3
  6739. ; ghash_gfmul_red_avx
  6740. vpshufd xmm9, xmm5, 78
  6741. vpshufd xmm10, xmm0, 78
  6742. vpclmulqdq xmm11, xmm0, xmm5, 17
  6743. vpclmulqdq xmm8, xmm0, xmm5, 0
  6744. vpxor xmm9, xmm9, xmm5
  6745. vpxor xmm10, xmm10, xmm0
  6746. vpclmulqdq xmm9, xmm9, xmm10, 0
  6747. vpxor xmm9, xmm9, xmm8
  6748. vpxor xmm9, xmm9, xmm11
  6749. vpslldq xmm10, xmm9, 8
  6750. vpsrldq xmm9, xmm9, 8
  6751. vpxor xmm8, xmm8, xmm10
  6752. vpxor xmm1, xmm11, xmm9
  6753. vpslld xmm12, xmm8, 31
  6754. vpslld xmm13, xmm8, 30
  6755. vpslld xmm14, xmm8, 25
  6756. vpxor xmm12, xmm12, xmm13
  6757. vpxor xmm12, xmm12, xmm14
  6758. vpsrldq xmm13, xmm12, 4
  6759. vpslldq xmm12, xmm12, 12
  6760. vpxor xmm8, xmm8, xmm12
  6761. vpsrld xmm14, xmm8, 1
  6762. vpsrld xmm10, xmm8, 2
  6763. vpsrld xmm9, xmm8, 7
  6764. vpxor xmm14, xmm14, xmm10
  6765. vpxor xmm14, xmm14, xmm9
  6766. vpxor xmm14, xmm14, xmm13
  6767. vpxor xmm14, xmm14, xmm8
  6768. vpxor xmm1, xmm1, xmm14
  6769. vmovdqu OWORD PTR [rsp+32], xmm1
  6770. ; H ^ 4
  6771. vpclmulqdq xmm8, xmm0, xmm0, 0
  6772. vpclmulqdq xmm3, xmm0, xmm0, 17
  6773. vpslld xmm12, xmm8, 31
  6774. vpslld xmm13, xmm8, 30
  6775. vpslld xmm14, xmm8, 25
  6776. vpxor xmm12, xmm12, xmm13
  6777. vpxor xmm12, xmm12, xmm14
  6778. vpsrldq xmm13, xmm12, 4
  6779. vpslldq xmm12, xmm12, 12
  6780. vpxor xmm8, xmm8, xmm12
  6781. vpsrld xmm14, xmm8, 1
  6782. vpsrld xmm10, xmm8, 2
  6783. vpsrld xmm9, xmm8, 7
  6784. vpxor xmm14, xmm14, xmm10
  6785. vpxor xmm14, xmm14, xmm9
  6786. vpxor xmm14, xmm14, xmm13
  6787. vpxor xmm14, xmm14, xmm8
  6788. vpxor xmm3, xmm3, xmm14
  6789. vmovdqu OWORD PTR [rsp+48], xmm3
  6790. ; H ^ 5
  6791. ; ghash_gfmul_red_avx
  6792. vpshufd xmm9, xmm0, 78
  6793. vpshufd xmm10, xmm1, 78
  6794. vpclmulqdq xmm11, xmm1, xmm0, 17
  6795. vpclmulqdq xmm8, xmm1, xmm0, 0
  6796. vpxor xmm9, xmm9, xmm0
  6797. vpxor xmm10, xmm10, xmm1
  6798. vpclmulqdq xmm9, xmm9, xmm10, 0
  6799. vpxor xmm9, xmm9, xmm8
  6800. vpxor xmm9, xmm9, xmm11
  6801. vpslldq xmm10, xmm9, 8
  6802. vpsrldq xmm9, xmm9, 8
  6803. vpxor xmm8, xmm8, xmm10
  6804. vpxor xmm7, xmm11, xmm9
  6805. vpslld xmm12, xmm8, 31
  6806. vpslld xmm13, xmm8, 30
  6807. vpslld xmm14, xmm8, 25
  6808. vpxor xmm12, xmm12, xmm13
  6809. vpxor xmm12, xmm12, xmm14
  6810. vpsrldq xmm13, xmm12, 4
  6811. vpslldq xmm12, xmm12, 12
  6812. vpxor xmm8, xmm8, xmm12
  6813. vpsrld xmm14, xmm8, 1
  6814. vpsrld xmm10, xmm8, 2
  6815. vpsrld xmm9, xmm8, 7
  6816. vpxor xmm14, xmm14, xmm10
  6817. vpxor xmm14, xmm14, xmm9
  6818. vpxor xmm14, xmm14, xmm13
  6819. vpxor xmm14, xmm14, xmm8
  6820. vpxor xmm7, xmm7, xmm14
  6821. vmovdqu OWORD PTR [rsp+64], xmm7
  6822. ; H ^ 6
  6823. vpclmulqdq xmm8, xmm1, xmm1, 0
  6824. vpclmulqdq xmm7, xmm1, xmm1, 17
  6825. vpslld xmm12, xmm8, 31
  6826. vpslld xmm13, xmm8, 30
  6827. vpslld xmm14, xmm8, 25
  6828. vpxor xmm12, xmm12, xmm13
  6829. vpxor xmm12, xmm12, xmm14
  6830. vpsrldq xmm13, xmm12, 4
  6831. vpslldq xmm12, xmm12, 12
  6832. vpxor xmm8, xmm8, xmm12
  6833. vpsrld xmm14, xmm8, 1
  6834. vpsrld xmm10, xmm8, 2
  6835. vpsrld xmm9, xmm8, 7
  6836. vpxor xmm14, xmm14, xmm10
  6837. vpxor xmm14, xmm14, xmm9
  6838. vpxor xmm14, xmm14, xmm13
  6839. vpxor xmm14, xmm14, xmm8
  6840. vpxor xmm7, xmm7, xmm14
  6841. vmovdqu OWORD PTR [rsp+80], xmm7
  6842. ; H ^ 7
  6843. ; ghash_gfmul_red_avx
  6844. vpshufd xmm9, xmm1, 78
  6845. vpshufd xmm10, xmm3, 78
  6846. vpclmulqdq xmm11, xmm3, xmm1, 17
  6847. vpclmulqdq xmm8, xmm3, xmm1, 0
  6848. vpxor xmm9, xmm9, xmm1
  6849. vpxor xmm10, xmm10, xmm3
  6850. vpclmulqdq xmm9, xmm9, xmm10, 0
  6851. vpxor xmm9, xmm9, xmm8
  6852. vpxor xmm9, xmm9, xmm11
  6853. vpslldq xmm10, xmm9, 8
  6854. vpsrldq xmm9, xmm9, 8
  6855. vpxor xmm8, xmm8, xmm10
  6856. vpxor xmm7, xmm11, xmm9
  6857. vpslld xmm12, xmm8, 31
  6858. vpslld xmm13, xmm8, 30
  6859. vpslld xmm14, xmm8, 25
  6860. vpxor xmm12, xmm12, xmm13
  6861. vpxor xmm12, xmm12, xmm14
  6862. vpsrldq xmm13, xmm12, 4
  6863. vpslldq xmm12, xmm12, 12
  6864. vpxor xmm8, xmm8, xmm12
  6865. vpsrld xmm14, xmm8, 1
  6866. vpsrld xmm10, xmm8, 2
  6867. vpsrld xmm9, xmm8, 7
  6868. vpxor xmm14, xmm14, xmm10
  6869. vpxor xmm14, xmm14, xmm9
  6870. vpxor xmm14, xmm14, xmm13
  6871. vpxor xmm14, xmm14, xmm8
  6872. vpxor xmm7, xmm7, xmm14
  6873. vmovdqu OWORD PTR [rsp+96], xmm7
  6874. ; H ^ 8
  6875. vpclmulqdq xmm8, xmm3, xmm3, 0
  6876. vpclmulqdq xmm7, xmm3, xmm3, 17
  6877. vpslld xmm12, xmm8, 31
  6878. vpslld xmm13, xmm8, 30
  6879. vpslld xmm14, xmm8, 25
  6880. vpxor xmm12, xmm12, xmm13
  6881. vpxor xmm12, xmm12, xmm14
  6882. vpsrldq xmm13, xmm12, 4
  6883. vpslldq xmm12, xmm12, 12
  6884. vpxor xmm8, xmm8, xmm12
  6885. vpsrld xmm14, xmm8, 1
  6886. vpsrld xmm10, xmm8, 2
  6887. vpsrld xmm9, xmm8, 7
  6888. vpxor xmm14, xmm14, xmm10
  6889. vpxor xmm14, xmm14, xmm9
  6890. vpxor xmm14, xmm14, xmm13
  6891. vpxor xmm14, xmm14, xmm8
  6892. vpxor xmm7, xmm7, xmm14
  6893. vmovdqu OWORD PTR [rsp+112], xmm7
  6894. ; First 128 bytes of input
  6895. vmovdqu xmm0, OWORD PTR [rsp+128]
  6896. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  6897. vpshufb xmm8, xmm0, xmm1
  6898. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  6899. vpshufb xmm9, xmm9, xmm1
  6900. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  6901. vpshufb xmm10, xmm10, xmm1
  6902. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  6903. vpshufb xmm11, xmm11, xmm1
  6904. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  6905. vpshufb xmm12, xmm12, xmm1
  6906. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  6907. vpshufb xmm13, xmm13, xmm1
  6908. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  6909. vpshufb xmm14, xmm14, xmm1
  6910. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  6911. vpshufb xmm15, xmm15, xmm1
  6912. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  6913. vmovdqa xmm7, OWORD PTR [r15]
  6914. vmovdqu OWORD PTR [rsp+128], xmm0
  6915. vpxor xmm8, xmm8, xmm7
  6916. vpxor xmm9, xmm9, xmm7
  6917. vpxor xmm10, xmm10, xmm7
  6918. vpxor xmm11, xmm11, xmm7
  6919. vpxor xmm12, xmm12, xmm7
  6920. vpxor xmm13, xmm13, xmm7
  6921. vpxor xmm14, xmm14, xmm7
  6922. vpxor xmm15, xmm15, xmm7
  6923. vmovdqa xmm7, OWORD PTR [r15+16]
  6924. vaesenc xmm8, xmm8, xmm7
  6925. vaesenc xmm9, xmm9, xmm7
  6926. vaesenc xmm10, xmm10, xmm7
  6927. vaesenc xmm11, xmm11, xmm7
  6928. vaesenc xmm12, xmm12, xmm7
  6929. vaesenc xmm13, xmm13, xmm7
  6930. vaesenc xmm14, xmm14, xmm7
  6931. vaesenc xmm15, xmm15, xmm7
  6932. vmovdqa xmm7, OWORD PTR [r15+32]
  6933. vaesenc xmm8, xmm8, xmm7
  6934. vaesenc xmm9, xmm9, xmm7
  6935. vaesenc xmm10, xmm10, xmm7
  6936. vaesenc xmm11, xmm11, xmm7
  6937. vaesenc xmm12, xmm12, xmm7
  6938. vaesenc xmm13, xmm13, xmm7
  6939. vaesenc xmm14, xmm14, xmm7
  6940. vaesenc xmm15, xmm15, xmm7
  6941. vmovdqa xmm7, OWORD PTR [r15+48]
  6942. vaesenc xmm8, xmm8, xmm7
  6943. vaesenc xmm9, xmm9, xmm7
  6944. vaesenc xmm10, xmm10, xmm7
  6945. vaesenc xmm11, xmm11, xmm7
  6946. vaesenc xmm12, xmm12, xmm7
  6947. vaesenc xmm13, xmm13, xmm7
  6948. vaesenc xmm14, xmm14, xmm7
  6949. vaesenc xmm15, xmm15, xmm7
  6950. vmovdqa xmm7, OWORD PTR [r15+64]
  6951. vaesenc xmm8, xmm8, xmm7
  6952. vaesenc xmm9, xmm9, xmm7
  6953. vaesenc xmm10, xmm10, xmm7
  6954. vaesenc xmm11, xmm11, xmm7
  6955. vaesenc xmm12, xmm12, xmm7
  6956. vaesenc xmm13, xmm13, xmm7
  6957. vaesenc xmm14, xmm14, xmm7
  6958. vaesenc xmm15, xmm15, xmm7
  6959. vmovdqa xmm7, OWORD PTR [r15+80]
  6960. vaesenc xmm8, xmm8, xmm7
  6961. vaesenc xmm9, xmm9, xmm7
  6962. vaesenc xmm10, xmm10, xmm7
  6963. vaesenc xmm11, xmm11, xmm7
  6964. vaesenc xmm12, xmm12, xmm7
  6965. vaesenc xmm13, xmm13, xmm7
  6966. vaesenc xmm14, xmm14, xmm7
  6967. vaesenc xmm15, xmm15, xmm7
  6968. vmovdqa xmm7, OWORD PTR [r15+96]
  6969. vaesenc xmm8, xmm8, xmm7
  6970. vaesenc xmm9, xmm9, xmm7
  6971. vaesenc xmm10, xmm10, xmm7
  6972. vaesenc xmm11, xmm11, xmm7
  6973. vaesenc xmm12, xmm12, xmm7
  6974. vaesenc xmm13, xmm13, xmm7
  6975. vaesenc xmm14, xmm14, xmm7
  6976. vaesenc xmm15, xmm15, xmm7
  6977. vmovdqa xmm7, OWORD PTR [r15+112]
  6978. vaesenc xmm8, xmm8, xmm7
  6979. vaesenc xmm9, xmm9, xmm7
  6980. vaesenc xmm10, xmm10, xmm7
  6981. vaesenc xmm11, xmm11, xmm7
  6982. vaesenc xmm12, xmm12, xmm7
  6983. vaesenc xmm13, xmm13, xmm7
  6984. vaesenc xmm14, xmm14, xmm7
  6985. vaesenc xmm15, xmm15, xmm7
  6986. vmovdqa xmm7, OWORD PTR [r15+128]
  6987. vaesenc xmm8, xmm8, xmm7
  6988. vaesenc xmm9, xmm9, xmm7
  6989. vaesenc xmm10, xmm10, xmm7
  6990. vaesenc xmm11, xmm11, xmm7
  6991. vaesenc xmm12, xmm12, xmm7
  6992. vaesenc xmm13, xmm13, xmm7
  6993. vaesenc xmm14, xmm14, xmm7
  6994. vaesenc xmm15, xmm15, xmm7
  6995. vmovdqa xmm7, OWORD PTR [r15+144]
  6996. vaesenc xmm8, xmm8, xmm7
  6997. vaesenc xmm9, xmm9, xmm7
  6998. vaesenc xmm10, xmm10, xmm7
  6999. vaesenc xmm11, xmm11, xmm7
  7000. vaesenc xmm12, xmm12, xmm7
  7001. vaesenc xmm13, xmm13, xmm7
  7002. vaesenc xmm14, xmm14, xmm7
  7003. vaesenc xmm15, xmm15, xmm7
  7004. cmp r10d, 11
  7005. vmovdqa xmm7, OWORD PTR [r15+160]
  7006. jl L_AES_GCM_encrypt_avx1_aesenc_128_enc_done
  7007. vaesenc xmm8, xmm8, xmm7
  7008. vaesenc xmm9, xmm9, xmm7
  7009. vaesenc xmm10, xmm10, xmm7
  7010. vaesenc xmm11, xmm11, xmm7
  7011. vaesenc xmm12, xmm12, xmm7
  7012. vaesenc xmm13, xmm13, xmm7
  7013. vaesenc xmm14, xmm14, xmm7
  7014. vaesenc xmm15, xmm15, xmm7
  7015. vmovdqa xmm7, OWORD PTR [r15+176]
  7016. vaesenc xmm8, xmm8, xmm7
  7017. vaesenc xmm9, xmm9, xmm7
  7018. vaesenc xmm10, xmm10, xmm7
  7019. vaesenc xmm11, xmm11, xmm7
  7020. vaesenc xmm12, xmm12, xmm7
  7021. vaesenc xmm13, xmm13, xmm7
  7022. vaesenc xmm14, xmm14, xmm7
  7023. vaesenc xmm15, xmm15, xmm7
  7024. cmp r10d, 13
  7025. vmovdqa xmm7, OWORD PTR [r15+192]
  7026. jl L_AES_GCM_encrypt_avx1_aesenc_128_enc_done
  7027. vaesenc xmm8, xmm8, xmm7
  7028. vaesenc xmm9, xmm9, xmm7
  7029. vaesenc xmm10, xmm10, xmm7
  7030. vaesenc xmm11, xmm11, xmm7
  7031. vaesenc xmm12, xmm12, xmm7
  7032. vaesenc xmm13, xmm13, xmm7
  7033. vaesenc xmm14, xmm14, xmm7
  7034. vaesenc xmm15, xmm15, xmm7
  7035. vmovdqa xmm7, OWORD PTR [r15+208]
  7036. vaesenc xmm8, xmm8, xmm7
  7037. vaesenc xmm9, xmm9, xmm7
  7038. vaesenc xmm10, xmm10, xmm7
  7039. vaesenc xmm11, xmm11, xmm7
  7040. vaesenc xmm12, xmm12, xmm7
  7041. vaesenc xmm13, xmm13, xmm7
  7042. vaesenc xmm14, xmm14, xmm7
  7043. vaesenc xmm15, xmm15, xmm7
  7044. vmovdqa xmm7, OWORD PTR [r15+224]
  7045. L_AES_GCM_encrypt_avx1_aesenc_128_enc_done:
  7046. vaesenclast xmm8, xmm8, xmm7
  7047. vaesenclast xmm9, xmm9, xmm7
  7048. vmovdqu xmm0, OWORD PTR [rdi]
  7049. vmovdqu xmm1, OWORD PTR [rdi+16]
  7050. vpxor xmm8, xmm8, xmm0
  7051. vpxor xmm9, xmm9, xmm1
  7052. vmovdqu OWORD PTR [rsi], xmm8
  7053. vmovdqu OWORD PTR [rsi+16], xmm9
  7054. vaesenclast xmm10, xmm10, xmm7
  7055. vaesenclast xmm11, xmm11, xmm7
  7056. vmovdqu xmm0, OWORD PTR [rdi+32]
  7057. vmovdqu xmm1, OWORD PTR [rdi+48]
  7058. vpxor xmm10, xmm10, xmm0
  7059. vpxor xmm11, xmm11, xmm1
  7060. vmovdqu OWORD PTR [rsi+32], xmm10
  7061. vmovdqu OWORD PTR [rsi+48], xmm11
  7062. vaesenclast xmm12, xmm12, xmm7
  7063. vaesenclast xmm13, xmm13, xmm7
  7064. vmovdqu xmm0, OWORD PTR [rdi+64]
  7065. vmovdqu xmm1, OWORD PTR [rdi+80]
  7066. vpxor xmm12, xmm12, xmm0
  7067. vpxor xmm13, xmm13, xmm1
  7068. vmovdqu OWORD PTR [rsi+64], xmm12
  7069. vmovdqu OWORD PTR [rsi+80], xmm13
  7070. vaesenclast xmm14, xmm14, xmm7
  7071. vaesenclast xmm15, xmm15, xmm7
  7072. vmovdqu xmm0, OWORD PTR [rdi+96]
  7073. vmovdqu xmm1, OWORD PTR [rdi+112]
  7074. vpxor xmm14, xmm14, xmm0
  7075. vpxor xmm15, xmm15, xmm1
  7076. vmovdqu OWORD PTR [rsi+96], xmm14
  7077. vmovdqu OWORD PTR [rsi+112], xmm15
  7078. cmp r13d, 128
  7079. mov ebx, 128
  7080. jle L_AES_GCM_encrypt_avx1_end_128
  7081. ; More 128 bytes of input
  7082. L_AES_GCM_encrypt_avx1_ghash_128:
  7083. lea rcx, QWORD PTR [rdi+rbx]
  7084. lea rdx, QWORD PTR [rsi+rbx]
  7085. vmovdqu xmm0, OWORD PTR [rsp+128]
  7086. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  7087. vpshufb xmm8, xmm0, xmm1
  7088. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  7089. vpshufb xmm9, xmm9, xmm1
  7090. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  7091. vpshufb xmm10, xmm10, xmm1
  7092. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  7093. vpshufb xmm11, xmm11, xmm1
  7094. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  7095. vpshufb xmm12, xmm12, xmm1
  7096. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  7097. vpshufb xmm13, xmm13, xmm1
  7098. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  7099. vpshufb xmm14, xmm14, xmm1
  7100. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  7101. vpshufb xmm15, xmm15, xmm1
  7102. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  7103. vmovdqa xmm7, OWORD PTR [r15]
  7104. vmovdqu OWORD PTR [rsp+128], xmm0
  7105. vpxor xmm8, xmm8, xmm7
  7106. vpxor xmm9, xmm9, xmm7
  7107. vpxor xmm10, xmm10, xmm7
  7108. vpxor xmm11, xmm11, xmm7
  7109. vpxor xmm12, xmm12, xmm7
  7110. vpxor xmm13, xmm13, xmm7
  7111. vpxor xmm14, xmm14, xmm7
  7112. vpxor xmm15, xmm15, xmm7
  7113. vmovdqu xmm7, OWORD PTR [rsp+112]
  7114. vmovdqu xmm0, OWORD PTR [rdx+-128]
  7115. vaesenc xmm8, xmm8, [r15+16]
  7116. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7117. vpxor xmm0, xmm0, xmm2
  7118. vpshufd xmm1, xmm7, 78
  7119. vpshufd xmm5, xmm0, 78
  7120. vpxor xmm1, xmm1, xmm7
  7121. vpxor xmm5, xmm5, xmm0
  7122. vpclmulqdq xmm3, xmm0, xmm7, 17
  7123. vaesenc xmm9, xmm9, [r15+16]
  7124. vaesenc xmm10, xmm10, [r15+16]
  7125. vpclmulqdq xmm2, xmm0, xmm7, 0
  7126. vaesenc xmm11, xmm11, [r15+16]
  7127. vaesenc xmm12, xmm12, [r15+16]
  7128. vpclmulqdq xmm1, xmm1, xmm5, 0
  7129. vaesenc xmm13, xmm13, [r15+16]
  7130. vaesenc xmm14, xmm14, [r15+16]
  7131. vaesenc xmm15, xmm15, [r15+16]
  7132. vpxor xmm1, xmm1, xmm2
  7133. vpxor xmm1, xmm1, xmm3
  7134. vmovdqu xmm7, OWORD PTR [rsp+96]
  7135. vmovdqu xmm0, OWORD PTR [rdx+-112]
  7136. vpshufd xmm4, xmm7, 78
  7137. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7138. vaesenc xmm8, xmm8, [r15+32]
  7139. vpxor xmm4, xmm4, xmm7
  7140. vpshufd xmm5, xmm0, 78
  7141. vpxor xmm5, xmm5, xmm0
  7142. vpclmulqdq xmm6, xmm0, xmm7, 17
  7143. vaesenc xmm9, xmm9, [r15+32]
  7144. vaesenc xmm10, xmm10, [r15+32]
  7145. vpclmulqdq xmm7, xmm0, xmm7, 0
  7146. vaesenc xmm11, xmm11, [r15+32]
  7147. vaesenc xmm12, xmm12, [r15+32]
  7148. vpclmulqdq xmm4, xmm4, xmm5, 0
  7149. vaesenc xmm13, xmm13, [r15+32]
  7150. vaesenc xmm14, xmm14, [r15+32]
  7151. vaesenc xmm15, xmm15, [r15+32]
  7152. vpxor xmm1, xmm1, xmm7
  7153. vpxor xmm2, xmm2, xmm7
  7154. vpxor xmm1, xmm1, xmm6
  7155. vpxor xmm3, xmm3, xmm6
  7156. vpxor xmm1, xmm1, xmm4
  7157. vmovdqu xmm7, OWORD PTR [rsp+80]
  7158. vmovdqu xmm0, OWORD PTR [rdx+-96]
  7159. vpshufd xmm4, xmm7, 78
  7160. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7161. vaesenc xmm8, xmm8, [r15+48]
  7162. vpxor xmm4, xmm4, xmm7
  7163. vpshufd xmm5, xmm0, 78
  7164. vpxor xmm5, xmm5, xmm0
  7165. vpclmulqdq xmm6, xmm0, xmm7, 17
  7166. vaesenc xmm9, xmm9, [r15+48]
  7167. vaesenc xmm10, xmm10, [r15+48]
  7168. vpclmulqdq xmm7, xmm0, xmm7, 0
  7169. vaesenc xmm11, xmm11, [r15+48]
  7170. vaesenc xmm12, xmm12, [r15+48]
  7171. vpclmulqdq xmm4, xmm4, xmm5, 0
  7172. vaesenc xmm13, xmm13, [r15+48]
  7173. vaesenc xmm14, xmm14, [r15+48]
  7174. vaesenc xmm15, xmm15, [r15+48]
  7175. vpxor xmm1, xmm1, xmm7
  7176. vpxor xmm2, xmm2, xmm7
  7177. vpxor xmm1, xmm1, xmm6
  7178. vpxor xmm3, xmm3, xmm6
  7179. vpxor xmm1, xmm1, xmm4
  7180. vmovdqu xmm7, OWORD PTR [rsp+64]
  7181. vmovdqu xmm0, OWORD PTR [rdx+-80]
  7182. vpshufd xmm4, xmm7, 78
  7183. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7184. vaesenc xmm8, xmm8, [r15+64]
  7185. vpxor xmm4, xmm4, xmm7
  7186. vpshufd xmm5, xmm0, 78
  7187. vpxor xmm5, xmm5, xmm0
  7188. vpclmulqdq xmm6, xmm0, xmm7, 17
  7189. vaesenc xmm9, xmm9, [r15+64]
  7190. vaesenc xmm10, xmm10, [r15+64]
  7191. vpclmulqdq xmm7, xmm0, xmm7, 0
  7192. vaesenc xmm11, xmm11, [r15+64]
  7193. vaesenc xmm12, xmm12, [r15+64]
  7194. vpclmulqdq xmm4, xmm4, xmm5, 0
  7195. vaesenc xmm13, xmm13, [r15+64]
  7196. vaesenc xmm14, xmm14, [r15+64]
  7197. vaesenc xmm15, xmm15, [r15+64]
  7198. vpxor xmm1, xmm1, xmm7
  7199. vpxor xmm2, xmm2, xmm7
  7200. vpxor xmm1, xmm1, xmm6
  7201. vpxor xmm3, xmm3, xmm6
  7202. vpxor xmm1, xmm1, xmm4
  7203. vmovdqu xmm7, OWORD PTR [rsp+48]
  7204. vmovdqu xmm0, OWORD PTR [rdx+-64]
  7205. vpshufd xmm4, xmm7, 78
  7206. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7207. vaesenc xmm8, xmm8, [r15+80]
  7208. vpxor xmm4, xmm4, xmm7
  7209. vpshufd xmm5, xmm0, 78
  7210. vpxor xmm5, xmm5, xmm0
  7211. vpclmulqdq xmm6, xmm0, xmm7, 17
  7212. vaesenc xmm9, xmm9, [r15+80]
  7213. vaesenc xmm10, xmm10, [r15+80]
  7214. vpclmulqdq xmm7, xmm0, xmm7, 0
  7215. vaesenc xmm11, xmm11, [r15+80]
  7216. vaesenc xmm12, xmm12, [r15+80]
  7217. vpclmulqdq xmm4, xmm4, xmm5, 0
  7218. vaesenc xmm13, xmm13, [r15+80]
  7219. vaesenc xmm14, xmm14, [r15+80]
  7220. vaesenc xmm15, xmm15, [r15+80]
  7221. vpxor xmm1, xmm1, xmm7
  7222. vpxor xmm2, xmm2, xmm7
  7223. vpxor xmm1, xmm1, xmm6
  7224. vpxor xmm3, xmm3, xmm6
  7225. vpxor xmm1, xmm1, xmm4
  7226. vmovdqu xmm7, OWORD PTR [rsp+32]
  7227. vmovdqu xmm0, OWORD PTR [rdx+-48]
  7228. vpshufd xmm4, xmm7, 78
  7229. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7230. vaesenc xmm8, xmm8, [r15+96]
  7231. vpxor xmm4, xmm4, xmm7
  7232. vpshufd xmm5, xmm0, 78
  7233. vpxor xmm5, xmm5, xmm0
  7234. vpclmulqdq xmm6, xmm0, xmm7, 17
  7235. vaesenc xmm9, xmm9, [r15+96]
  7236. vaesenc xmm10, xmm10, [r15+96]
  7237. vpclmulqdq xmm7, xmm0, xmm7, 0
  7238. vaesenc xmm11, xmm11, [r15+96]
  7239. vaesenc xmm12, xmm12, [r15+96]
  7240. vpclmulqdq xmm4, xmm4, xmm5, 0
  7241. vaesenc xmm13, xmm13, [r15+96]
  7242. vaesenc xmm14, xmm14, [r15+96]
  7243. vaesenc xmm15, xmm15, [r15+96]
  7244. vpxor xmm1, xmm1, xmm7
  7245. vpxor xmm2, xmm2, xmm7
  7246. vpxor xmm1, xmm1, xmm6
  7247. vpxor xmm3, xmm3, xmm6
  7248. vpxor xmm1, xmm1, xmm4
  7249. vmovdqu xmm7, OWORD PTR [rsp+16]
  7250. vmovdqu xmm0, OWORD PTR [rdx+-32]
  7251. vpshufd xmm4, xmm7, 78
  7252. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7253. vaesenc xmm8, xmm8, [r15+112]
  7254. vpxor xmm4, xmm4, xmm7
  7255. vpshufd xmm5, xmm0, 78
  7256. vpxor xmm5, xmm5, xmm0
  7257. vpclmulqdq xmm6, xmm0, xmm7, 17
  7258. vaesenc xmm9, xmm9, [r15+112]
  7259. vaesenc xmm10, xmm10, [r15+112]
  7260. vpclmulqdq xmm7, xmm0, xmm7, 0
  7261. vaesenc xmm11, xmm11, [r15+112]
  7262. vaesenc xmm12, xmm12, [r15+112]
  7263. vpclmulqdq xmm4, xmm4, xmm5, 0
  7264. vaesenc xmm13, xmm13, [r15+112]
  7265. vaesenc xmm14, xmm14, [r15+112]
  7266. vaesenc xmm15, xmm15, [r15+112]
  7267. vpxor xmm1, xmm1, xmm7
  7268. vpxor xmm2, xmm2, xmm7
  7269. vpxor xmm1, xmm1, xmm6
  7270. vpxor xmm3, xmm3, xmm6
  7271. vpxor xmm1, xmm1, xmm4
  7272. vmovdqu xmm7, OWORD PTR [rsp]
  7273. vmovdqu xmm0, OWORD PTR [rdx+-16]
  7274. vpshufd xmm4, xmm7, 78
  7275. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7276. vaesenc xmm8, xmm8, [r15+128]
  7277. vpxor xmm4, xmm4, xmm7
  7278. vpshufd xmm5, xmm0, 78
  7279. vpxor xmm5, xmm5, xmm0
  7280. vpclmulqdq xmm6, xmm0, xmm7, 17
  7281. vaesenc xmm9, xmm9, [r15+128]
  7282. vaesenc xmm10, xmm10, [r15+128]
  7283. vpclmulqdq xmm7, xmm0, xmm7, 0
  7284. vaesenc xmm11, xmm11, [r15+128]
  7285. vaesenc xmm12, xmm12, [r15+128]
  7286. vpclmulqdq xmm4, xmm4, xmm5, 0
  7287. vaesenc xmm13, xmm13, [r15+128]
  7288. vaesenc xmm14, xmm14, [r15+128]
  7289. vaesenc xmm15, xmm15, [r15+128]
  7290. vpxor xmm1, xmm1, xmm7
  7291. vpxor xmm2, xmm2, xmm7
  7292. vpxor xmm1, xmm1, xmm6
  7293. vpxor xmm3, xmm3, xmm6
  7294. vpxor xmm1, xmm1, xmm4
  7295. vpslldq xmm5, xmm1, 8
  7296. vpsrldq xmm1, xmm1, 8
  7297. vaesenc xmm8, xmm8, [r15+144]
  7298. vpxor xmm2, xmm2, xmm5
  7299. vpxor xmm3, xmm3, xmm1
  7300. vaesenc xmm9, xmm9, [r15+144]
  7301. vpslld xmm7, xmm2, 31
  7302. vpslld xmm4, xmm2, 30
  7303. vpslld xmm5, xmm2, 25
  7304. vaesenc xmm10, xmm10, [r15+144]
  7305. vpxor xmm7, xmm7, xmm4
  7306. vpxor xmm7, xmm7, xmm5
  7307. vaesenc xmm11, xmm11, [r15+144]
  7308. vpsrldq xmm4, xmm7, 4
  7309. vpslldq xmm7, xmm7, 12
  7310. vaesenc xmm12, xmm12, [r15+144]
  7311. vpxor xmm2, xmm2, xmm7
  7312. vpsrld xmm5, xmm2, 1
  7313. vaesenc xmm13, xmm13, [r15+144]
  7314. vpsrld xmm1, xmm2, 2
  7315. vpsrld xmm0, xmm2, 7
  7316. vaesenc xmm14, xmm14, [r15+144]
  7317. vpxor xmm5, xmm5, xmm1
  7318. vpxor xmm5, xmm5, xmm0
  7319. vaesenc xmm15, xmm15, [r15+144]
  7320. vpxor xmm5, xmm5, xmm4
  7321. vpxor xmm2, xmm2, xmm5
  7322. vpxor xmm2, xmm2, xmm3
  7323. cmp r10d, 11
  7324. vmovdqa xmm7, OWORD PTR [r15+160]
  7325. jl L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done
  7326. vaesenc xmm8, xmm8, xmm7
  7327. vaesenc xmm9, xmm9, xmm7
  7328. vaesenc xmm10, xmm10, xmm7
  7329. vaesenc xmm11, xmm11, xmm7
  7330. vaesenc xmm12, xmm12, xmm7
  7331. vaesenc xmm13, xmm13, xmm7
  7332. vaesenc xmm14, xmm14, xmm7
  7333. vaesenc xmm15, xmm15, xmm7
  7334. vmovdqa xmm7, OWORD PTR [r15+176]
  7335. vaesenc xmm8, xmm8, xmm7
  7336. vaesenc xmm9, xmm9, xmm7
  7337. vaesenc xmm10, xmm10, xmm7
  7338. vaesenc xmm11, xmm11, xmm7
  7339. vaesenc xmm12, xmm12, xmm7
  7340. vaesenc xmm13, xmm13, xmm7
  7341. vaesenc xmm14, xmm14, xmm7
  7342. vaesenc xmm15, xmm15, xmm7
  7343. cmp r10d, 13
  7344. vmovdqa xmm7, OWORD PTR [r15+192]
  7345. jl L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done
  7346. vaesenc xmm8, xmm8, xmm7
  7347. vaesenc xmm9, xmm9, xmm7
  7348. vaesenc xmm10, xmm10, xmm7
  7349. vaesenc xmm11, xmm11, xmm7
  7350. vaesenc xmm12, xmm12, xmm7
  7351. vaesenc xmm13, xmm13, xmm7
  7352. vaesenc xmm14, xmm14, xmm7
  7353. vaesenc xmm15, xmm15, xmm7
  7354. vmovdqa xmm7, OWORD PTR [r15+208]
  7355. vaesenc xmm8, xmm8, xmm7
  7356. vaesenc xmm9, xmm9, xmm7
  7357. vaesenc xmm10, xmm10, xmm7
  7358. vaesenc xmm11, xmm11, xmm7
  7359. vaesenc xmm12, xmm12, xmm7
  7360. vaesenc xmm13, xmm13, xmm7
  7361. vaesenc xmm14, xmm14, xmm7
  7362. vaesenc xmm15, xmm15, xmm7
  7363. vmovdqa xmm7, OWORD PTR [r15+224]
  7364. L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done:
  7365. vaesenclast xmm8, xmm8, xmm7
  7366. vaesenclast xmm9, xmm9, xmm7
  7367. vmovdqu xmm0, OWORD PTR [rcx]
  7368. vmovdqu xmm1, OWORD PTR [rcx+16]
  7369. vpxor xmm8, xmm8, xmm0
  7370. vpxor xmm9, xmm9, xmm1
  7371. vmovdqu OWORD PTR [rdx], xmm8
  7372. vmovdqu OWORD PTR [rdx+16], xmm9
  7373. vaesenclast xmm10, xmm10, xmm7
  7374. vaesenclast xmm11, xmm11, xmm7
  7375. vmovdqu xmm0, OWORD PTR [rcx+32]
  7376. vmovdqu xmm1, OWORD PTR [rcx+48]
  7377. vpxor xmm10, xmm10, xmm0
  7378. vpxor xmm11, xmm11, xmm1
  7379. vmovdqu OWORD PTR [rdx+32], xmm10
  7380. vmovdqu OWORD PTR [rdx+48], xmm11
  7381. vaesenclast xmm12, xmm12, xmm7
  7382. vaesenclast xmm13, xmm13, xmm7
  7383. vmovdqu xmm0, OWORD PTR [rcx+64]
  7384. vmovdqu xmm1, OWORD PTR [rcx+80]
  7385. vpxor xmm12, xmm12, xmm0
  7386. vpxor xmm13, xmm13, xmm1
  7387. vmovdqu OWORD PTR [rdx+64], xmm12
  7388. vmovdqu OWORD PTR [rdx+80], xmm13
  7389. vaesenclast xmm14, xmm14, xmm7
  7390. vaesenclast xmm15, xmm15, xmm7
  7391. vmovdqu xmm0, OWORD PTR [rcx+96]
  7392. vmovdqu xmm1, OWORD PTR [rcx+112]
  7393. vpxor xmm14, xmm14, xmm0
  7394. vpxor xmm15, xmm15, xmm1
  7395. vmovdqu OWORD PTR [rdx+96], xmm14
  7396. vmovdqu OWORD PTR [rdx+112], xmm15
  7397. add ebx, 128
  7398. cmp ebx, r13d
  7399. jl L_AES_GCM_encrypt_avx1_ghash_128
  7400. L_AES_GCM_encrypt_avx1_end_128:
  7401. vmovdqa xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7402. vpshufb xmm8, xmm8, xmm4
  7403. vpshufb xmm9, xmm9, xmm4
  7404. vpshufb xmm10, xmm10, xmm4
  7405. vpshufb xmm11, xmm11, xmm4
  7406. vpxor xmm8, xmm8, xmm2
  7407. vpshufb xmm12, xmm12, xmm4
  7408. vpshufb xmm13, xmm13, xmm4
  7409. vpshufb xmm14, xmm14, xmm4
  7410. vpshufb xmm15, xmm15, xmm4
  7411. vmovdqu xmm7, OWORD PTR [rsp]
  7412. vmovdqu xmm5, OWORD PTR [rsp+16]
  7413. ; ghash_gfmul_avx
  7414. vpshufd xmm1, xmm15, 78
  7415. vpshufd xmm2, xmm7, 78
  7416. vpclmulqdq xmm3, xmm7, xmm15, 17
  7417. vpclmulqdq xmm0, xmm7, xmm15, 0
  7418. vpxor xmm1, xmm1, xmm15
  7419. vpxor xmm2, xmm2, xmm7
  7420. vpclmulqdq xmm1, xmm1, xmm2, 0
  7421. vpxor xmm1, xmm1, xmm0
  7422. vpxor xmm1, xmm1, xmm3
  7423. vmovdqa xmm4, xmm0
  7424. vmovdqa xmm6, xmm3
  7425. vpslldq xmm2, xmm1, 8
  7426. vpsrldq xmm1, xmm1, 8
  7427. vpxor xmm4, xmm4, xmm2
  7428. vpxor xmm6, xmm6, xmm1
  7429. ; ghash_gfmul_xor_avx
  7430. vpshufd xmm1, xmm14, 78
  7431. vpshufd xmm2, xmm5, 78
  7432. vpclmulqdq xmm3, xmm5, xmm14, 17
  7433. vpclmulqdq xmm0, xmm5, xmm14, 0
  7434. vpxor xmm1, xmm1, xmm14
  7435. vpxor xmm2, xmm2, xmm5
  7436. vpclmulqdq xmm1, xmm1, xmm2, 0
  7437. vpxor xmm1, xmm1, xmm0
  7438. vpxor xmm1, xmm1, xmm3
  7439. vpxor xmm4, xmm4, xmm0
  7440. vpxor xmm6, xmm6, xmm3
  7441. vpslldq xmm2, xmm1, 8
  7442. vpsrldq xmm1, xmm1, 8
  7443. vpxor xmm4, xmm4, xmm2
  7444. vpxor xmm6, xmm6, xmm1
  7445. vmovdqu xmm7, OWORD PTR [rsp+32]
  7446. vmovdqu xmm5, OWORD PTR [rsp+48]
  7447. ; ghash_gfmul_xor_avx
  7448. vpshufd xmm1, xmm13, 78
  7449. vpshufd xmm2, xmm7, 78
  7450. vpclmulqdq xmm3, xmm7, xmm13, 17
  7451. vpclmulqdq xmm0, xmm7, xmm13, 0
  7452. vpxor xmm1, xmm1, xmm13
  7453. vpxor xmm2, xmm2, xmm7
  7454. vpclmulqdq xmm1, xmm1, xmm2, 0
  7455. vpxor xmm1, xmm1, xmm0
  7456. vpxor xmm1, xmm1, xmm3
  7457. vpxor xmm4, xmm4, xmm0
  7458. vpxor xmm6, xmm6, xmm3
  7459. vpslldq xmm2, xmm1, 8
  7460. vpsrldq xmm1, xmm1, 8
  7461. vpxor xmm4, xmm4, xmm2
  7462. vpxor xmm6, xmm6, xmm1
  7463. ; ghash_gfmul_xor_avx
  7464. vpshufd xmm1, xmm12, 78
  7465. vpshufd xmm2, xmm5, 78
  7466. vpclmulqdq xmm3, xmm5, xmm12, 17
  7467. vpclmulqdq xmm0, xmm5, xmm12, 0
  7468. vpxor xmm1, xmm1, xmm12
  7469. vpxor xmm2, xmm2, xmm5
  7470. vpclmulqdq xmm1, xmm1, xmm2, 0
  7471. vpxor xmm1, xmm1, xmm0
  7472. vpxor xmm1, xmm1, xmm3
  7473. vpxor xmm4, xmm4, xmm0
  7474. vpxor xmm6, xmm6, xmm3
  7475. vpslldq xmm2, xmm1, 8
  7476. vpsrldq xmm1, xmm1, 8
  7477. vpxor xmm4, xmm4, xmm2
  7478. vpxor xmm6, xmm6, xmm1
  7479. vmovdqu xmm7, OWORD PTR [rsp+64]
  7480. vmovdqu xmm5, OWORD PTR [rsp+80]
  7481. ; ghash_gfmul_xor_avx
  7482. vpshufd xmm1, xmm11, 78
  7483. vpshufd xmm2, xmm7, 78
  7484. vpclmulqdq xmm3, xmm7, xmm11, 17
  7485. vpclmulqdq xmm0, xmm7, xmm11, 0
  7486. vpxor xmm1, xmm1, xmm11
  7487. vpxor xmm2, xmm2, xmm7
  7488. vpclmulqdq xmm1, xmm1, xmm2, 0
  7489. vpxor xmm1, xmm1, xmm0
  7490. vpxor xmm1, xmm1, xmm3
  7491. vpxor xmm4, xmm4, xmm0
  7492. vpxor xmm6, xmm6, xmm3
  7493. vpslldq xmm2, xmm1, 8
  7494. vpsrldq xmm1, xmm1, 8
  7495. vpxor xmm4, xmm4, xmm2
  7496. vpxor xmm6, xmm6, xmm1
  7497. ; ghash_gfmul_xor_avx
  7498. vpshufd xmm1, xmm10, 78
  7499. vpshufd xmm2, xmm5, 78
  7500. vpclmulqdq xmm3, xmm5, xmm10, 17
  7501. vpclmulqdq xmm0, xmm5, xmm10, 0
  7502. vpxor xmm1, xmm1, xmm10
  7503. vpxor xmm2, xmm2, xmm5
  7504. vpclmulqdq xmm1, xmm1, xmm2, 0
  7505. vpxor xmm1, xmm1, xmm0
  7506. vpxor xmm1, xmm1, xmm3
  7507. vpxor xmm4, xmm4, xmm0
  7508. vpxor xmm6, xmm6, xmm3
  7509. vpslldq xmm2, xmm1, 8
  7510. vpsrldq xmm1, xmm1, 8
  7511. vpxor xmm4, xmm4, xmm2
  7512. vpxor xmm6, xmm6, xmm1
  7513. vmovdqu xmm7, OWORD PTR [rsp+96]
  7514. vmovdqu xmm5, OWORD PTR [rsp+112]
  7515. ; ghash_gfmul_xor_avx
  7516. vpshufd xmm1, xmm9, 78
  7517. vpshufd xmm2, xmm7, 78
  7518. vpclmulqdq xmm3, xmm7, xmm9, 17
  7519. vpclmulqdq xmm0, xmm7, xmm9, 0
  7520. vpxor xmm1, xmm1, xmm9
  7521. vpxor xmm2, xmm2, xmm7
  7522. vpclmulqdq xmm1, xmm1, xmm2, 0
  7523. vpxor xmm1, xmm1, xmm0
  7524. vpxor xmm1, xmm1, xmm3
  7525. vpxor xmm4, xmm4, xmm0
  7526. vpxor xmm6, xmm6, xmm3
  7527. vpslldq xmm2, xmm1, 8
  7528. vpsrldq xmm1, xmm1, 8
  7529. vpxor xmm4, xmm4, xmm2
  7530. vpxor xmm6, xmm6, xmm1
  7531. ; ghash_gfmul_xor_avx
  7532. vpshufd xmm1, xmm8, 78
  7533. vpshufd xmm2, xmm5, 78
  7534. vpclmulqdq xmm3, xmm5, xmm8, 17
  7535. vpclmulqdq xmm0, xmm5, xmm8, 0
  7536. vpxor xmm1, xmm1, xmm8
  7537. vpxor xmm2, xmm2, xmm5
  7538. vpclmulqdq xmm1, xmm1, xmm2, 0
  7539. vpxor xmm1, xmm1, xmm0
  7540. vpxor xmm1, xmm1, xmm3
  7541. vpxor xmm4, xmm4, xmm0
  7542. vpxor xmm6, xmm6, xmm3
  7543. vpslldq xmm2, xmm1, 8
  7544. vpsrldq xmm1, xmm1, 8
  7545. vpxor xmm4, xmm4, xmm2
  7546. vpxor xmm6, xmm6, xmm1
  7547. vpslld xmm0, xmm4, 31
  7548. vpslld xmm1, xmm4, 30
  7549. vpslld xmm2, xmm4, 25
  7550. vpxor xmm0, xmm0, xmm1
  7551. vpxor xmm0, xmm0, xmm2
  7552. vmovdqa xmm1, xmm0
  7553. vpsrldq xmm1, xmm1, 4
  7554. vpslldq xmm0, xmm0, 12
  7555. vpxor xmm4, xmm4, xmm0
  7556. vpsrld xmm2, xmm4, 1
  7557. vpsrld xmm3, xmm4, 2
  7558. vpsrld xmm0, xmm4, 7
  7559. vpxor xmm2, xmm2, xmm3
  7560. vpxor xmm2, xmm2, xmm0
  7561. vpxor xmm2, xmm2, xmm1
  7562. vpxor xmm2, xmm2, xmm4
  7563. vpxor xmm6, xmm6, xmm2
  7564. vmovdqu xmm5, OWORD PTR [rsp]
  7565. L_AES_GCM_encrypt_avx1_done_128:
  7566. mov edx, r9d
  7567. cmp ebx, edx
  7568. jge L_AES_GCM_encrypt_avx1_done_enc
  7569. mov r13d, r9d
  7570. and r13d, 4294967280
  7571. cmp ebx, r13d
  7572. jge L_AES_GCM_encrypt_avx1_last_block_done
  7573. vmovdqu xmm9, OWORD PTR [rsp+128]
  7574. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  7575. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  7576. vmovdqu OWORD PTR [rsp+128], xmm9
  7577. vpxor xmm8, xmm8, [r15]
  7578. vaesenc xmm8, xmm8, [r15+16]
  7579. vaesenc xmm8, xmm8, [r15+32]
  7580. vaesenc xmm8, xmm8, [r15+48]
  7581. vaesenc xmm8, xmm8, [r15+64]
  7582. vaesenc xmm8, xmm8, [r15+80]
  7583. vaesenc xmm8, xmm8, [r15+96]
  7584. vaesenc xmm8, xmm8, [r15+112]
  7585. vaesenc xmm8, xmm8, [r15+128]
  7586. vaesenc xmm8, xmm8, [r15+144]
  7587. cmp r10d, 11
  7588. vmovdqa xmm9, OWORD PTR [r15+160]
  7589. jl L_AES_GCM_encrypt_avx1_aesenc_block_last
  7590. vaesenc xmm8, xmm8, xmm9
  7591. vaesenc xmm8, xmm8, [r15+176]
  7592. cmp r10d, 13
  7593. vmovdqa xmm9, OWORD PTR [r15+192]
  7594. jl L_AES_GCM_encrypt_avx1_aesenc_block_last
  7595. vaesenc xmm8, xmm8, xmm9
  7596. vaesenc xmm8, xmm8, [r15+208]
  7597. vmovdqa xmm9, OWORD PTR [r15+224]
  7598. L_AES_GCM_encrypt_avx1_aesenc_block_last:
  7599. vaesenclast xmm8, xmm8, xmm9
  7600. vmovdqu xmm9, OWORD PTR [rdi+rbx]
  7601. vpxor xmm8, xmm8, xmm9
  7602. vmovdqu OWORD PTR [rsi+rbx], xmm8
  7603. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7604. vpxor xmm6, xmm6, xmm8
  7605. add ebx, 16
  7606. cmp ebx, r13d
  7607. jge L_AES_GCM_encrypt_avx1_last_block_ghash
  7608. L_AES_GCM_encrypt_avx1_last_block_start:
  7609. vmovdqu xmm13, OWORD PTR [rdi+rbx]
  7610. vmovdqu xmm9, OWORD PTR [rsp+128]
  7611. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  7612. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  7613. vmovdqu OWORD PTR [rsp+128], xmm9
  7614. vpxor xmm8, xmm8, [r15]
  7615. vpclmulqdq xmm10, xmm6, xmm5, 16
  7616. vaesenc xmm8, xmm8, [r15+16]
  7617. vaesenc xmm8, xmm8, [r15+32]
  7618. vpclmulqdq xmm11, xmm6, xmm5, 1
  7619. vaesenc xmm8, xmm8, [r15+48]
  7620. vaesenc xmm8, xmm8, [r15+64]
  7621. vpclmulqdq xmm12, xmm6, xmm5, 0
  7622. vaesenc xmm8, xmm8, [r15+80]
  7623. vpclmulqdq xmm1, xmm6, xmm5, 17
  7624. vaesenc xmm8, xmm8, [r15+96]
  7625. vpxor xmm10, xmm10, xmm11
  7626. vpslldq xmm2, xmm10, 8
  7627. vpsrldq xmm10, xmm10, 8
  7628. vaesenc xmm8, xmm8, [r15+112]
  7629. vpxor xmm2, xmm2, xmm12
  7630. vpxor xmm3, xmm1, xmm10
  7631. vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
  7632. vpclmulqdq xmm11, xmm2, xmm0, 16
  7633. vaesenc xmm8, xmm8, [r15+128]
  7634. vpshufd xmm10, xmm2, 78
  7635. vpxor xmm10, xmm10, xmm11
  7636. vpclmulqdq xmm11, xmm10, xmm0, 16
  7637. vaesenc xmm8, xmm8, [r15+144]
  7638. vpshufd xmm10, xmm10, 78
  7639. vpxor xmm10, xmm10, xmm11
  7640. vpxor xmm6, xmm10, xmm3
  7641. cmp r10d, 11
  7642. vmovdqa xmm9, OWORD PTR [r15+160]
  7643. jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last
  7644. vaesenc xmm8, xmm8, xmm9
  7645. vaesenc xmm8, xmm8, [r15+176]
  7646. cmp r10d, 13
  7647. vmovdqa xmm9, OWORD PTR [r15+192]
  7648. jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last
  7649. vaesenc xmm8, xmm8, xmm9
  7650. vaesenc xmm8, xmm8, [r15+208]
  7651. vmovdqa xmm9, OWORD PTR [r15+224]
  7652. L_AES_GCM_encrypt_avx1_aesenc_gfmul_last:
  7653. vaesenclast xmm8, xmm8, xmm9
  7654. vmovdqa xmm0, xmm13
  7655. vpxor xmm8, xmm8, xmm0
  7656. vmovdqu OWORD PTR [rsi+rbx], xmm8
  7657. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7658. add ebx, 16
  7659. vpxor xmm6, xmm6, xmm8
  7660. cmp ebx, r13d
  7661. jl L_AES_GCM_encrypt_avx1_last_block_start
  7662. L_AES_GCM_encrypt_avx1_last_block_ghash:
  7663. ; ghash_gfmul_red_avx
  7664. vpshufd xmm9, xmm5, 78
  7665. vpshufd xmm10, xmm6, 78
  7666. vpclmulqdq xmm11, xmm6, xmm5, 17
  7667. vpclmulqdq xmm8, xmm6, xmm5, 0
  7668. vpxor xmm9, xmm9, xmm5
  7669. vpxor xmm10, xmm10, xmm6
  7670. vpclmulqdq xmm9, xmm9, xmm10, 0
  7671. vpxor xmm9, xmm9, xmm8
  7672. vpxor xmm9, xmm9, xmm11
  7673. vpslldq xmm10, xmm9, 8
  7674. vpsrldq xmm9, xmm9, 8
  7675. vpxor xmm8, xmm8, xmm10
  7676. vpxor xmm6, xmm11, xmm9
  7677. vpslld xmm12, xmm8, 31
  7678. vpslld xmm13, xmm8, 30
  7679. vpslld xmm14, xmm8, 25
  7680. vpxor xmm12, xmm12, xmm13
  7681. vpxor xmm12, xmm12, xmm14
  7682. vpsrldq xmm13, xmm12, 4
  7683. vpslldq xmm12, xmm12, 12
  7684. vpxor xmm8, xmm8, xmm12
  7685. vpsrld xmm14, xmm8, 1
  7686. vpsrld xmm10, xmm8, 2
  7687. vpsrld xmm9, xmm8, 7
  7688. vpxor xmm14, xmm14, xmm10
  7689. vpxor xmm14, xmm14, xmm9
  7690. vpxor xmm14, xmm14, xmm13
  7691. vpxor xmm14, xmm14, xmm8
  7692. vpxor xmm6, xmm6, xmm14
  7693. L_AES_GCM_encrypt_avx1_last_block_done:
  7694. mov ecx, r9d
  7695. mov edx, ecx
  7696. and ecx, 15
  7697. jz L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done
  7698. vmovdqu xmm4, OWORD PTR [rsp+128]
  7699. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  7700. vpxor xmm4, xmm4, [r15]
  7701. vaesenc xmm4, xmm4, [r15+16]
  7702. vaesenc xmm4, xmm4, [r15+32]
  7703. vaesenc xmm4, xmm4, [r15+48]
  7704. vaesenc xmm4, xmm4, [r15+64]
  7705. vaesenc xmm4, xmm4, [r15+80]
  7706. vaesenc xmm4, xmm4, [r15+96]
  7707. vaesenc xmm4, xmm4, [r15+112]
  7708. vaesenc xmm4, xmm4, [r15+128]
  7709. vaesenc xmm4, xmm4, [r15+144]
  7710. cmp r10d, 11
  7711. vmovdqa xmm9, OWORD PTR [r15+160]
  7712. jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last
  7713. vaesenc xmm4, xmm4, xmm9
  7714. vaesenc xmm4, xmm4, [r15+176]
  7715. cmp r10d, 13
  7716. vmovdqa xmm9, OWORD PTR [r15+192]
  7717. jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last
  7718. vaesenc xmm4, xmm4, xmm9
  7719. vaesenc xmm4, xmm4, [r15+208]
  7720. vmovdqa xmm9, OWORD PTR [r15+224]
  7721. L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last:
  7722. vaesenclast xmm4, xmm4, xmm9
  7723. sub rsp, 16
  7724. xor ecx, ecx
  7725. vmovdqu OWORD PTR [rsp], xmm4
  7726. L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop:
  7727. movzx r13d, BYTE PTR [rdi+rbx]
  7728. xor r13b, BYTE PTR [rsp+rcx]
  7729. mov BYTE PTR [rsi+rbx], r13b
  7730. mov BYTE PTR [rsp+rcx], r13b
  7731. inc ebx
  7732. inc ecx
  7733. cmp ebx, edx
  7734. jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop
  7735. xor r13, r13
  7736. cmp ecx, 16
  7737. je L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc
  7738. L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop:
  7739. mov BYTE PTR [rsp+rcx], r13b
  7740. inc ecx
  7741. cmp ecx, 16
  7742. jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop
  7743. L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc:
  7744. vmovdqu xmm4, OWORD PTR [rsp]
  7745. add rsp, 16
  7746. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7747. vpxor xmm6, xmm6, xmm4
  7748. ; ghash_gfmul_red_avx
  7749. vpshufd xmm9, xmm5, 78
  7750. vpshufd xmm10, xmm6, 78
  7751. vpclmulqdq xmm11, xmm6, xmm5, 17
  7752. vpclmulqdq xmm8, xmm6, xmm5, 0
  7753. vpxor xmm9, xmm9, xmm5
  7754. vpxor xmm10, xmm10, xmm6
  7755. vpclmulqdq xmm9, xmm9, xmm10, 0
  7756. vpxor xmm9, xmm9, xmm8
  7757. vpxor xmm9, xmm9, xmm11
  7758. vpslldq xmm10, xmm9, 8
  7759. vpsrldq xmm9, xmm9, 8
  7760. vpxor xmm8, xmm8, xmm10
  7761. vpxor xmm6, xmm11, xmm9
  7762. vpslld xmm12, xmm8, 31
  7763. vpslld xmm13, xmm8, 30
  7764. vpslld xmm14, xmm8, 25
  7765. vpxor xmm12, xmm12, xmm13
  7766. vpxor xmm12, xmm12, xmm14
  7767. vpsrldq xmm13, xmm12, 4
  7768. vpslldq xmm12, xmm12, 12
  7769. vpxor xmm8, xmm8, xmm12
  7770. vpsrld xmm14, xmm8, 1
  7771. vpsrld xmm10, xmm8, 2
  7772. vpsrld xmm9, xmm8, 7
  7773. vpxor xmm14, xmm14, xmm10
  7774. vpxor xmm14, xmm14, xmm9
  7775. vpxor xmm14, xmm14, xmm13
  7776. vpxor xmm14, xmm14, xmm8
  7777. vpxor xmm6, xmm6, xmm14
  7778. L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done:
  7779. L_AES_GCM_encrypt_avx1_done_enc:
  7780. mov edx, r9d
  7781. mov ecx, r11d
  7782. shl rdx, 3
  7783. shl rcx, 3
  7784. vmovq xmm0, rdx
  7785. vmovq xmm1, rcx
  7786. vpunpcklqdq xmm0, xmm0, xmm1
  7787. vpxor xmm6, xmm6, xmm0
  7788. ; ghash_gfmul_red_avx
  7789. vpshufd xmm9, xmm5, 78
  7790. vpshufd xmm10, xmm6, 78
  7791. vpclmulqdq xmm11, xmm6, xmm5, 17
  7792. vpclmulqdq xmm8, xmm6, xmm5, 0
  7793. vpxor xmm9, xmm9, xmm5
  7794. vpxor xmm10, xmm10, xmm6
  7795. vpclmulqdq xmm9, xmm9, xmm10, 0
  7796. vpxor xmm9, xmm9, xmm8
  7797. vpxor xmm9, xmm9, xmm11
  7798. vpslldq xmm10, xmm9, 8
  7799. vpsrldq xmm9, xmm9, 8
  7800. vpxor xmm8, xmm8, xmm10
  7801. vpxor xmm6, xmm11, xmm9
  7802. vpslld xmm12, xmm8, 31
  7803. vpslld xmm13, xmm8, 30
  7804. vpslld xmm14, xmm8, 25
  7805. vpxor xmm12, xmm12, xmm13
  7806. vpxor xmm12, xmm12, xmm14
  7807. vpsrldq xmm13, xmm12, 4
  7808. vpslldq xmm12, xmm12, 12
  7809. vpxor xmm8, xmm8, xmm12
  7810. vpsrld xmm14, xmm8, 1
  7811. vpsrld xmm10, xmm8, 2
  7812. vpsrld xmm9, xmm8, 7
  7813. vpxor xmm14, xmm14, xmm10
  7814. vpxor xmm14, xmm14, xmm9
  7815. vpxor xmm14, xmm14, xmm13
  7816. vpxor xmm14, xmm14, xmm8
  7817. vpxor xmm6, xmm6, xmm14
  7818. vpshufb xmm6, xmm6, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7819. vmovdqu xmm0, OWORD PTR [rsp+144]
  7820. vpxor xmm0, xmm0, xmm6
  7821. cmp r14d, 16
  7822. je L_AES_GCM_encrypt_avx1_store_tag_16
  7823. xor rcx, rcx
  7824. vmovdqu OWORD PTR [rsp], xmm0
  7825. L_AES_GCM_encrypt_avx1_store_tag_loop:
  7826. movzx r13d, BYTE PTR [rsp+rcx]
  7827. mov BYTE PTR [r8+rcx], r13b
  7828. inc ecx
  7829. cmp ecx, r14d
  7830. jne L_AES_GCM_encrypt_avx1_store_tag_loop
  7831. jmp L_AES_GCM_encrypt_avx1_store_tag_done
  7832. L_AES_GCM_encrypt_avx1_store_tag_16:
  7833. vmovdqu OWORD PTR [r8], xmm0
  7834. L_AES_GCM_encrypt_avx1_store_tag_done:
  7835. vzeroupper
  7836. vmovdqu xmm6, OWORD PTR [rsp+160]
  7837. vmovdqu xmm7, OWORD PTR [rsp+176]
  7838. vmovdqu xmm8, OWORD PTR [rsp+192]
  7839. vmovdqu xmm9, OWORD PTR [rsp+208]
  7840. vmovdqu xmm10, OWORD PTR [rsp+224]
  7841. vmovdqu xmm11, OWORD PTR [rsp+240]
  7842. vmovdqu xmm12, OWORD PTR [rsp+256]
  7843. vmovdqu xmm13, OWORD PTR [rsp+272]
  7844. vmovdqu xmm14, OWORD PTR [rsp+288]
  7845. vmovdqu xmm15, OWORD PTR [rsp+304]
  7846. add rsp, 320
  7847. pop r15
  7848. pop r14
  7849. pop rbx
  7850. pop r12
  7851. pop rsi
  7852. pop rdi
  7853. pop r13
  7854. ret
  7855. AES_GCM_encrypt_avx1 ENDP
  7856. _text ENDS
  7857. _text SEGMENT READONLY PARA
  7858. AES_GCM_decrypt_avx1 PROC
  7859. push r13
  7860. push rdi
  7861. push rsi
  7862. push r12
  7863. push rbx
  7864. push r14
  7865. push r15
  7866. push rbp
  7867. mov rdi, rcx
  7868. mov rsi, rdx
  7869. mov r12, r8
  7870. mov rax, r9
  7871. mov r8, QWORD PTR [rsp+104]
  7872. mov r9d, DWORD PTR [rsp+112]
  7873. mov r11d, DWORD PTR [rsp+120]
  7874. mov ebx, DWORD PTR [rsp+128]
  7875. mov r14d, DWORD PTR [rsp+136]
  7876. mov r15, QWORD PTR [rsp+144]
  7877. mov r10d, DWORD PTR [rsp+152]
  7878. mov rbp, QWORD PTR [rsp+160]
  7879. sub rsp, 328
  7880. vmovdqu OWORD PTR [rsp+168], xmm6
  7881. vmovdqu OWORD PTR [rsp+184], xmm7
  7882. vmovdqu OWORD PTR [rsp+200], xmm8
  7883. vmovdqu OWORD PTR [rsp+216], xmm9
  7884. vmovdqu OWORD PTR [rsp+232], xmm10
  7885. vmovdqu OWORD PTR [rsp+248], xmm11
  7886. vmovdqu OWORD PTR [rsp+264], xmm12
  7887. vmovdqu OWORD PTR [rsp+280], xmm13
  7888. vmovdqu OWORD PTR [rsp+296], xmm14
  7889. vmovdqu OWORD PTR [rsp+312], xmm15
  7890. vpxor xmm4, xmm4, xmm4
  7891. vpxor xmm6, xmm6, xmm6
  7892. cmp ebx, 12
  7893. mov edx, ebx
  7894. jne L_AES_GCM_decrypt_avx1_iv_not_12
  7895. ; # Calculate values when IV is 12 bytes
  7896. ; Set counter based on IV
  7897. mov ecx, 16777216
  7898. vmovq xmm4, QWORD PTR [rax]
  7899. vpinsrd xmm4, xmm4, DWORD PTR [rax+8], 2
  7900. vpinsrd xmm4, xmm4, ecx, 3
  7901. ; H = Encrypt X(=0) and T = Encrypt counter
  7902. vmovdqa xmm5, OWORD PTR [r15]
  7903. vpxor xmm1, xmm4, xmm5
  7904. vmovdqa xmm7, OWORD PTR [r15+16]
  7905. vaesenc xmm5, xmm5, xmm7
  7906. vaesenc xmm1, xmm1, xmm7
  7907. vmovdqa xmm7, OWORD PTR [r15+32]
  7908. vaesenc xmm5, xmm5, xmm7
  7909. vaesenc xmm1, xmm1, xmm7
  7910. vmovdqa xmm7, OWORD PTR [r15+48]
  7911. vaesenc xmm5, xmm5, xmm7
  7912. vaesenc xmm1, xmm1, xmm7
  7913. vmovdqa xmm7, OWORD PTR [r15+64]
  7914. vaesenc xmm5, xmm5, xmm7
  7915. vaesenc xmm1, xmm1, xmm7
  7916. vmovdqa xmm7, OWORD PTR [r15+80]
  7917. vaesenc xmm5, xmm5, xmm7
  7918. vaesenc xmm1, xmm1, xmm7
  7919. vmovdqa xmm7, OWORD PTR [r15+96]
  7920. vaesenc xmm5, xmm5, xmm7
  7921. vaesenc xmm1, xmm1, xmm7
  7922. vmovdqa xmm7, OWORD PTR [r15+112]
  7923. vaesenc xmm5, xmm5, xmm7
  7924. vaesenc xmm1, xmm1, xmm7
  7925. vmovdqa xmm7, OWORD PTR [r15+128]
  7926. vaesenc xmm5, xmm5, xmm7
  7927. vaesenc xmm1, xmm1, xmm7
  7928. vmovdqa xmm7, OWORD PTR [r15+144]
  7929. vaesenc xmm5, xmm5, xmm7
  7930. vaesenc xmm1, xmm1, xmm7
  7931. cmp r10d, 11
  7932. vmovdqa xmm7, OWORD PTR [r15+160]
  7933. jl L_AES_GCM_decrypt_avx1_calc_iv_12_last
  7934. vaesenc xmm5, xmm5, xmm7
  7935. vaesenc xmm1, xmm1, xmm7
  7936. vmovdqa xmm7, OWORD PTR [r15+176]
  7937. vaesenc xmm5, xmm5, xmm7
  7938. vaesenc xmm1, xmm1, xmm7
  7939. cmp r10d, 13
  7940. vmovdqa xmm7, OWORD PTR [r15+192]
  7941. jl L_AES_GCM_decrypt_avx1_calc_iv_12_last
  7942. vaesenc xmm5, xmm5, xmm7
  7943. vaesenc xmm1, xmm1, xmm7
  7944. vmovdqa xmm7, OWORD PTR [r15+208]
  7945. vaesenc xmm5, xmm5, xmm7
  7946. vaesenc xmm1, xmm1, xmm7
  7947. vmovdqa xmm7, OWORD PTR [r15+224]
  7948. L_AES_GCM_decrypt_avx1_calc_iv_12_last:
  7949. vaesenclast xmm5, xmm5, xmm7
  7950. vaesenclast xmm1, xmm1, xmm7
  7951. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7952. vmovdqu OWORD PTR [rsp+144], xmm1
  7953. jmp L_AES_GCM_decrypt_avx1_iv_done
  7954. L_AES_GCM_decrypt_avx1_iv_not_12:
  7955. ; Calculate values when IV is not 12 bytes
  7956. ; H = Encrypt X(=0)
  7957. vmovdqa xmm5, OWORD PTR [r15]
  7958. vaesenc xmm5, xmm5, [r15+16]
  7959. vaesenc xmm5, xmm5, [r15+32]
  7960. vaesenc xmm5, xmm5, [r15+48]
  7961. vaesenc xmm5, xmm5, [r15+64]
  7962. vaesenc xmm5, xmm5, [r15+80]
  7963. vaesenc xmm5, xmm5, [r15+96]
  7964. vaesenc xmm5, xmm5, [r15+112]
  7965. vaesenc xmm5, xmm5, [r15+128]
  7966. vaesenc xmm5, xmm5, [r15+144]
  7967. cmp r10d, 11
  7968. vmovdqa xmm9, OWORD PTR [r15+160]
  7969. jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last
  7970. vaesenc xmm5, xmm5, xmm9
  7971. vaesenc xmm5, xmm5, [r15+176]
  7972. cmp r10d, 13
  7973. vmovdqa xmm9, OWORD PTR [r15+192]
  7974. jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last
  7975. vaesenc xmm5, xmm5, xmm9
  7976. vaesenc xmm5, xmm5, [r15+208]
  7977. vmovdqa xmm9, OWORD PTR [r15+224]
  7978. L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last:
  7979. vaesenclast xmm5, xmm5, xmm9
  7980. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7981. ; Calc counter
  7982. ; Initialization vector
  7983. cmp edx, 0
  7984. mov rcx, 0
  7985. je L_AES_GCM_decrypt_avx1_calc_iv_done
  7986. cmp edx, 16
  7987. jl L_AES_GCM_decrypt_avx1_calc_iv_lt16
  7988. and edx, 4294967280
  7989. L_AES_GCM_decrypt_avx1_calc_iv_16_loop:
  7990. vmovdqu xmm8, OWORD PTR [rax+rcx]
  7991. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7992. vpxor xmm4, xmm4, xmm8
  7993. ; ghash_gfmul_avx
  7994. vpshufd xmm1, xmm4, 78
  7995. vpshufd xmm2, xmm5, 78
  7996. vpclmulqdq xmm3, xmm5, xmm4, 17
  7997. vpclmulqdq xmm0, xmm5, xmm4, 0
  7998. vpxor xmm1, xmm1, xmm4
  7999. vpxor xmm2, xmm2, xmm5
  8000. vpclmulqdq xmm1, xmm1, xmm2, 0
  8001. vpxor xmm1, xmm1, xmm0
  8002. vpxor xmm1, xmm1, xmm3
  8003. vmovdqa xmm7, xmm0
  8004. vmovdqa xmm4, xmm3
  8005. vpslldq xmm2, xmm1, 8
  8006. vpsrldq xmm1, xmm1, 8
  8007. vpxor xmm7, xmm7, xmm2
  8008. vpxor xmm4, xmm4, xmm1
  8009. vpsrld xmm0, xmm7, 31
  8010. vpsrld xmm1, xmm4, 31
  8011. vpslld xmm7, xmm7, 1
  8012. vpslld xmm4, xmm4, 1
  8013. vpsrldq xmm2, xmm0, 12
  8014. vpslldq xmm0, xmm0, 4
  8015. vpslldq xmm1, xmm1, 4
  8016. vpor xmm4, xmm4, xmm2
  8017. vpor xmm7, xmm7, xmm0
  8018. vpor xmm4, xmm4, xmm1
  8019. vpslld xmm0, xmm7, 31
  8020. vpslld xmm1, xmm7, 30
  8021. vpslld xmm2, xmm7, 25
  8022. vpxor xmm0, xmm0, xmm1
  8023. vpxor xmm0, xmm0, xmm2
  8024. vmovdqa xmm1, xmm0
  8025. vpsrldq xmm1, xmm1, 4
  8026. vpslldq xmm0, xmm0, 12
  8027. vpxor xmm7, xmm7, xmm0
  8028. vpsrld xmm2, xmm7, 1
  8029. vpsrld xmm3, xmm7, 2
  8030. vpsrld xmm0, xmm7, 7
  8031. vpxor xmm2, xmm2, xmm3
  8032. vpxor xmm2, xmm2, xmm0
  8033. vpxor xmm2, xmm2, xmm1
  8034. vpxor xmm2, xmm2, xmm7
  8035. vpxor xmm4, xmm4, xmm2
  8036. add ecx, 16
  8037. cmp ecx, edx
  8038. jl L_AES_GCM_decrypt_avx1_calc_iv_16_loop
  8039. mov edx, ebx
  8040. cmp ecx, edx
  8041. je L_AES_GCM_decrypt_avx1_calc_iv_done
  8042. L_AES_GCM_decrypt_avx1_calc_iv_lt16:
  8043. sub rsp, 16
  8044. vpxor xmm8, xmm8, xmm8
  8045. xor ebx, ebx
  8046. vmovdqu OWORD PTR [rsp], xmm8
  8047. L_AES_GCM_decrypt_avx1_calc_iv_loop:
  8048. movzx r13d, BYTE PTR [rax+rcx]
  8049. mov BYTE PTR [rsp+rbx], r13b
  8050. inc ecx
  8051. inc ebx
  8052. cmp ecx, edx
  8053. jl L_AES_GCM_decrypt_avx1_calc_iv_loop
  8054. vmovdqu xmm8, OWORD PTR [rsp]
  8055. add rsp, 16
  8056. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8057. vpxor xmm4, xmm4, xmm8
  8058. ; ghash_gfmul_avx
  8059. vpshufd xmm1, xmm4, 78
  8060. vpshufd xmm2, xmm5, 78
  8061. vpclmulqdq xmm3, xmm5, xmm4, 17
  8062. vpclmulqdq xmm0, xmm5, xmm4, 0
  8063. vpxor xmm1, xmm1, xmm4
  8064. vpxor xmm2, xmm2, xmm5
  8065. vpclmulqdq xmm1, xmm1, xmm2, 0
  8066. vpxor xmm1, xmm1, xmm0
  8067. vpxor xmm1, xmm1, xmm3
  8068. vmovdqa xmm7, xmm0
  8069. vmovdqa xmm4, xmm3
  8070. vpslldq xmm2, xmm1, 8
  8071. vpsrldq xmm1, xmm1, 8
  8072. vpxor xmm7, xmm7, xmm2
  8073. vpxor xmm4, xmm4, xmm1
  8074. vpsrld xmm0, xmm7, 31
  8075. vpsrld xmm1, xmm4, 31
  8076. vpslld xmm7, xmm7, 1
  8077. vpslld xmm4, xmm4, 1
  8078. vpsrldq xmm2, xmm0, 12
  8079. vpslldq xmm0, xmm0, 4
  8080. vpslldq xmm1, xmm1, 4
  8081. vpor xmm4, xmm4, xmm2
  8082. vpor xmm7, xmm7, xmm0
  8083. vpor xmm4, xmm4, xmm1
  8084. vpslld xmm0, xmm7, 31
  8085. vpslld xmm1, xmm7, 30
  8086. vpslld xmm2, xmm7, 25
  8087. vpxor xmm0, xmm0, xmm1
  8088. vpxor xmm0, xmm0, xmm2
  8089. vmovdqa xmm1, xmm0
  8090. vpsrldq xmm1, xmm1, 4
  8091. vpslldq xmm0, xmm0, 12
  8092. vpxor xmm7, xmm7, xmm0
  8093. vpsrld xmm2, xmm7, 1
  8094. vpsrld xmm3, xmm7, 2
  8095. vpsrld xmm0, xmm7, 7
  8096. vpxor xmm2, xmm2, xmm3
  8097. vpxor xmm2, xmm2, xmm0
  8098. vpxor xmm2, xmm2, xmm1
  8099. vpxor xmm2, xmm2, xmm7
  8100. vpxor xmm4, xmm4, xmm2
  8101. L_AES_GCM_decrypt_avx1_calc_iv_done:
  8102. ; T = Encrypt counter
  8103. vpxor xmm0, xmm0, xmm0
  8104. shl edx, 3
  8105. vmovq xmm0, rdx
  8106. vpxor xmm4, xmm4, xmm0
  8107. ; ghash_gfmul_avx
  8108. vpshufd xmm1, xmm4, 78
  8109. vpshufd xmm2, xmm5, 78
  8110. vpclmulqdq xmm3, xmm5, xmm4, 17
  8111. vpclmulqdq xmm0, xmm5, xmm4, 0
  8112. vpxor xmm1, xmm1, xmm4
  8113. vpxor xmm2, xmm2, xmm5
  8114. vpclmulqdq xmm1, xmm1, xmm2, 0
  8115. vpxor xmm1, xmm1, xmm0
  8116. vpxor xmm1, xmm1, xmm3
  8117. vmovdqa xmm7, xmm0
  8118. vmovdqa xmm4, xmm3
  8119. vpslldq xmm2, xmm1, 8
  8120. vpsrldq xmm1, xmm1, 8
  8121. vpxor xmm7, xmm7, xmm2
  8122. vpxor xmm4, xmm4, xmm1
  8123. vpsrld xmm0, xmm7, 31
  8124. vpsrld xmm1, xmm4, 31
  8125. vpslld xmm7, xmm7, 1
  8126. vpslld xmm4, xmm4, 1
  8127. vpsrldq xmm2, xmm0, 12
  8128. vpslldq xmm0, xmm0, 4
  8129. vpslldq xmm1, xmm1, 4
  8130. vpor xmm4, xmm4, xmm2
  8131. vpor xmm7, xmm7, xmm0
  8132. vpor xmm4, xmm4, xmm1
  8133. vpslld xmm0, xmm7, 31
  8134. vpslld xmm1, xmm7, 30
  8135. vpslld xmm2, xmm7, 25
  8136. vpxor xmm0, xmm0, xmm1
  8137. vpxor xmm0, xmm0, xmm2
  8138. vmovdqa xmm1, xmm0
  8139. vpsrldq xmm1, xmm1, 4
  8140. vpslldq xmm0, xmm0, 12
  8141. vpxor xmm7, xmm7, xmm0
  8142. vpsrld xmm2, xmm7, 1
  8143. vpsrld xmm3, xmm7, 2
  8144. vpsrld xmm0, xmm7, 7
  8145. vpxor xmm2, xmm2, xmm3
  8146. vpxor xmm2, xmm2, xmm0
  8147. vpxor xmm2, xmm2, xmm1
  8148. vpxor xmm2, xmm2, xmm7
  8149. vpxor xmm4, xmm4, xmm2
  8150. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8151. ; Encrypt counter
  8152. vmovdqa xmm8, OWORD PTR [r15]
  8153. vpxor xmm8, xmm8, xmm4
  8154. vaesenc xmm8, xmm8, [r15+16]
  8155. vaesenc xmm8, xmm8, [r15+32]
  8156. vaesenc xmm8, xmm8, [r15+48]
  8157. vaesenc xmm8, xmm8, [r15+64]
  8158. vaesenc xmm8, xmm8, [r15+80]
  8159. vaesenc xmm8, xmm8, [r15+96]
  8160. vaesenc xmm8, xmm8, [r15+112]
  8161. vaesenc xmm8, xmm8, [r15+128]
  8162. vaesenc xmm8, xmm8, [r15+144]
  8163. cmp r10d, 11
  8164. vmovdqa xmm9, OWORD PTR [r15+160]
  8165. jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last
  8166. vaesenc xmm8, xmm8, xmm9
  8167. vaesenc xmm8, xmm8, [r15+176]
  8168. cmp r10d, 13
  8169. vmovdqa xmm9, OWORD PTR [r15+192]
  8170. jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last
  8171. vaesenc xmm8, xmm8, xmm9
  8172. vaesenc xmm8, xmm8, [r15+208]
  8173. vmovdqa xmm9, OWORD PTR [r15+224]
  8174. L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last:
  8175. vaesenclast xmm8, xmm8, xmm9
  8176. vmovdqu OWORD PTR [rsp+144], xmm8
  8177. L_AES_GCM_decrypt_avx1_iv_done:
  8178. ; Additional authentication data
  8179. mov edx, r11d
  8180. cmp edx, 0
  8181. je L_AES_GCM_decrypt_avx1_calc_aad_done
  8182. xor ecx, ecx
  8183. cmp edx, 16
  8184. jl L_AES_GCM_decrypt_avx1_calc_aad_lt16
  8185. and edx, 4294967280
  8186. L_AES_GCM_decrypt_avx1_calc_aad_16_loop:
  8187. vmovdqu xmm8, OWORD PTR [r12+rcx]
  8188. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8189. vpxor xmm6, xmm6, xmm8
  8190. ; ghash_gfmul_avx
  8191. vpshufd xmm1, xmm6, 78
  8192. vpshufd xmm2, xmm5, 78
  8193. vpclmulqdq xmm3, xmm5, xmm6, 17
  8194. vpclmulqdq xmm0, xmm5, xmm6, 0
  8195. vpxor xmm1, xmm1, xmm6
  8196. vpxor xmm2, xmm2, xmm5
  8197. vpclmulqdq xmm1, xmm1, xmm2, 0
  8198. vpxor xmm1, xmm1, xmm0
  8199. vpxor xmm1, xmm1, xmm3
  8200. vmovdqa xmm7, xmm0
  8201. vmovdqa xmm6, xmm3
  8202. vpslldq xmm2, xmm1, 8
  8203. vpsrldq xmm1, xmm1, 8
  8204. vpxor xmm7, xmm7, xmm2
  8205. vpxor xmm6, xmm6, xmm1
  8206. vpsrld xmm0, xmm7, 31
  8207. vpsrld xmm1, xmm6, 31
  8208. vpslld xmm7, xmm7, 1
  8209. vpslld xmm6, xmm6, 1
  8210. vpsrldq xmm2, xmm0, 12
  8211. vpslldq xmm0, xmm0, 4
  8212. vpslldq xmm1, xmm1, 4
  8213. vpor xmm6, xmm6, xmm2
  8214. vpor xmm7, xmm7, xmm0
  8215. vpor xmm6, xmm6, xmm1
  8216. vpslld xmm0, xmm7, 31
  8217. vpslld xmm1, xmm7, 30
  8218. vpslld xmm2, xmm7, 25
  8219. vpxor xmm0, xmm0, xmm1
  8220. vpxor xmm0, xmm0, xmm2
  8221. vmovdqa xmm1, xmm0
  8222. vpsrldq xmm1, xmm1, 4
  8223. vpslldq xmm0, xmm0, 12
  8224. vpxor xmm7, xmm7, xmm0
  8225. vpsrld xmm2, xmm7, 1
  8226. vpsrld xmm3, xmm7, 2
  8227. vpsrld xmm0, xmm7, 7
  8228. vpxor xmm2, xmm2, xmm3
  8229. vpxor xmm2, xmm2, xmm0
  8230. vpxor xmm2, xmm2, xmm1
  8231. vpxor xmm2, xmm2, xmm7
  8232. vpxor xmm6, xmm6, xmm2
  8233. add ecx, 16
  8234. cmp ecx, edx
  8235. jl L_AES_GCM_decrypt_avx1_calc_aad_16_loop
  8236. mov edx, r11d
  8237. cmp ecx, edx
  8238. je L_AES_GCM_decrypt_avx1_calc_aad_done
  8239. L_AES_GCM_decrypt_avx1_calc_aad_lt16:
  8240. sub rsp, 16
  8241. vpxor xmm8, xmm8, xmm8
  8242. xor ebx, ebx
  8243. vmovdqu OWORD PTR [rsp], xmm8
  8244. L_AES_GCM_decrypt_avx1_calc_aad_loop:
  8245. movzx r13d, BYTE PTR [r12+rcx]
  8246. mov BYTE PTR [rsp+rbx], r13b
  8247. inc ecx
  8248. inc ebx
  8249. cmp ecx, edx
  8250. jl L_AES_GCM_decrypt_avx1_calc_aad_loop
  8251. vmovdqu xmm8, OWORD PTR [rsp]
  8252. add rsp, 16
  8253. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8254. vpxor xmm6, xmm6, xmm8
  8255. ; ghash_gfmul_avx
  8256. vpshufd xmm1, xmm6, 78
  8257. vpshufd xmm2, xmm5, 78
  8258. vpclmulqdq xmm3, xmm5, xmm6, 17
  8259. vpclmulqdq xmm0, xmm5, xmm6, 0
  8260. vpxor xmm1, xmm1, xmm6
  8261. vpxor xmm2, xmm2, xmm5
  8262. vpclmulqdq xmm1, xmm1, xmm2, 0
  8263. vpxor xmm1, xmm1, xmm0
  8264. vpxor xmm1, xmm1, xmm3
  8265. vmovdqa xmm7, xmm0
  8266. vmovdqa xmm6, xmm3
  8267. vpslldq xmm2, xmm1, 8
  8268. vpsrldq xmm1, xmm1, 8
  8269. vpxor xmm7, xmm7, xmm2
  8270. vpxor xmm6, xmm6, xmm1
  8271. vpsrld xmm0, xmm7, 31
  8272. vpsrld xmm1, xmm6, 31
  8273. vpslld xmm7, xmm7, 1
  8274. vpslld xmm6, xmm6, 1
  8275. vpsrldq xmm2, xmm0, 12
  8276. vpslldq xmm0, xmm0, 4
  8277. vpslldq xmm1, xmm1, 4
  8278. vpor xmm6, xmm6, xmm2
  8279. vpor xmm7, xmm7, xmm0
  8280. vpor xmm6, xmm6, xmm1
  8281. vpslld xmm0, xmm7, 31
  8282. vpslld xmm1, xmm7, 30
  8283. vpslld xmm2, xmm7, 25
  8284. vpxor xmm0, xmm0, xmm1
  8285. vpxor xmm0, xmm0, xmm2
  8286. vmovdqa xmm1, xmm0
  8287. vpsrldq xmm1, xmm1, 4
  8288. vpslldq xmm0, xmm0, 12
  8289. vpxor xmm7, xmm7, xmm0
  8290. vpsrld xmm2, xmm7, 1
  8291. vpsrld xmm3, xmm7, 2
  8292. vpsrld xmm0, xmm7, 7
  8293. vpxor xmm2, xmm2, xmm3
  8294. vpxor xmm2, xmm2, xmm0
  8295. vpxor xmm2, xmm2, xmm1
  8296. vpxor xmm2, xmm2, xmm7
  8297. vpxor xmm6, xmm6, xmm2
  8298. L_AES_GCM_decrypt_avx1_calc_aad_done:
  8299. ; Calculate counter and H
  8300. vpsrlq xmm9, xmm5, 63
  8301. vpsllq xmm8, xmm5, 1
  8302. vpslldq xmm9, xmm9, 8
  8303. vpor xmm8, xmm8, xmm9
  8304. vpshufd xmm5, xmm5, 255
  8305. vpsrad xmm5, xmm5, 31
  8306. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  8307. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  8308. vpaddd xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_one
  8309. vpxor xmm5, xmm5, xmm8
  8310. vmovdqu OWORD PTR [rsp+128], xmm4
  8311. xor ebx, ebx
  8312. cmp r9d, 128
  8313. mov r13d, r9d
  8314. jl L_AES_GCM_decrypt_avx1_done_128
  8315. and r13d, 4294967168
  8316. vmovdqa xmm2, xmm6
  8317. ; H ^ 1
  8318. vmovdqu OWORD PTR [rsp], xmm5
  8319. ; H ^ 2
  8320. vpclmulqdq xmm8, xmm5, xmm5, 0
  8321. vpclmulqdq xmm0, xmm5, xmm5, 17
  8322. vpslld xmm12, xmm8, 31
  8323. vpslld xmm13, xmm8, 30
  8324. vpslld xmm14, xmm8, 25
  8325. vpxor xmm12, xmm12, xmm13
  8326. vpxor xmm12, xmm12, xmm14
  8327. vpsrldq xmm13, xmm12, 4
  8328. vpslldq xmm12, xmm12, 12
  8329. vpxor xmm8, xmm8, xmm12
  8330. vpsrld xmm14, xmm8, 1
  8331. vpsrld xmm10, xmm8, 2
  8332. vpsrld xmm9, xmm8, 7
  8333. vpxor xmm14, xmm14, xmm10
  8334. vpxor xmm14, xmm14, xmm9
  8335. vpxor xmm14, xmm14, xmm13
  8336. vpxor xmm14, xmm14, xmm8
  8337. vpxor xmm0, xmm0, xmm14
  8338. vmovdqu OWORD PTR [rsp+16], xmm0
  8339. ; H ^ 3
  8340. ; ghash_gfmul_red_avx
  8341. vpshufd xmm9, xmm5, 78
  8342. vpshufd xmm10, xmm0, 78
  8343. vpclmulqdq xmm11, xmm0, xmm5, 17
  8344. vpclmulqdq xmm8, xmm0, xmm5, 0
  8345. vpxor xmm9, xmm9, xmm5
  8346. vpxor xmm10, xmm10, xmm0
  8347. vpclmulqdq xmm9, xmm9, xmm10, 0
  8348. vpxor xmm9, xmm9, xmm8
  8349. vpxor xmm9, xmm9, xmm11
  8350. vpslldq xmm10, xmm9, 8
  8351. vpsrldq xmm9, xmm9, 8
  8352. vpxor xmm8, xmm8, xmm10
  8353. vpxor xmm1, xmm11, xmm9
  8354. vpslld xmm12, xmm8, 31
  8355. vpslld xmm13, xmm8, 30
  8356. vpslld xmm14, xmm8, 25
  8357. vpxor xmm12, xmm12, xmm13
  8358. vpxor xmm12, xmm12, xmm14
  8359. vpsrldq xmm13, xmm12, 4
  8360. vpslldq xmm12, xmm12, 12
  8361. vpxor xmm8, xmm8, xmm12
  8362. vpsrld xmm14, xmm8, 1
  8363. vpsrld xmm10, xmm8, 2
  8364. vpsrld xmm9, xmm8, 7
  8365. vpxor xmm14, xmm14, xmm10
  8366. vpxor xmm14, xmm14, xmm9
  8367. vpxor xmm14, xmm14, xmm13
  8368. vpxor xmm14, xmm14, xmm8
  8369. vpxor xmm1, xmm1, xmm14
  8370. vmovdqu OWORD PTR [rsp+32], xmm1
  8371. ; H ^ 4
  8372. vpclmulqdq xmm8, xmm0, xmm0, 0
  8373. vpclmulqdq xmm3, xmm0, xmm0, 17
  8374. vpslld xmm12, xmm8, 31
  8375. vpslld xmm13, xmm8, 30
  8376. vpslld xmm14, xmm8, 25
  8377. vpxor xmm12, xmm12, xmm13
  8378. vpxor xmm12, xmm12, xmm14
  8379. vpsrldq xmm13, xmm12, 4
  8380. vpslldq xmm12, xmm12, 12
  8381. vpxor xmm8, xmm8, xmm12
  8382. vpsrld xmm14, xmm8, 1
  8383. vpsrld xmm10, xmm8, 2
  8384. vpsrld xmm9, xmm8, 7
  8385. vpxor xmm14, xmm14, xmm10
  8386. vpxor xmm14, xmm14, xmm9
  8387. vpxor xmm14, xmm14, xmm13
  8388. vpxor xmm14, xmm14, xmm8
  8389. vpxor xmm3, xmm3, xmm14
  8390. vmovdqu OWORD PTR [rsp+48], xmm3
  8391. ; H ^ 5
  8392. ; ghash_gfmul_red_avx
  8393. vpshufd xmm9, xmm0, 78
  8394. vpshufd xmm10, xmm1, 78
  8395. vpclmulqdq xmm11, xmm1, xmm0, 17
  8396. vpclmulqdq xmm8, xmm1, xmm0, 0
  8397. vpxor xmm9, xmm9, xmm0
  8398. vpxor xmm10, xmm10, xmm1
  8399. vpclmulqdq xmm9, xmm9, xmm10, 0
  8400. vpxor xmm9, xmm9, xmm8
  8401. vpxor xmm9, xmm9, xmm11
  8402. vpslldq xmm10, xmm9, 8
  8403. vpsrldq xmm9, xmm9, 8
  8404. vpxor xmm8, xmm8, xmm10
  8405. vpxor xmm7, xmm11, xmm9
  8406. vpslld xmm12, xmm8, 31
  8407. vpslld xmm13, xmm8, 30
  8408. vpslld xmm14, xmm8, 25
  8409. vpxor xmm12, xmm12, xmm13
  8410. vpxor xmm12, xmm12, xmm14
  8411. vpsrldq xmm13, xmm12, 4
  8412. vpslldq xmm12, xmm12, 12
  8413. vpxor xmm8, xmm8, xmm12
  8414. vpsrld xmm14, xmm8, 1
  8415. vpsrld xmm10, xmm8, 2
  8416. vpsrld xmm9, xmm8, 7
  8417. vpxor xmm14, xmm14, xmm10
  8418. vpxor xmm14, xmm14, xmm9
  8419. vpxor xmm14, xmm14, xmm13
  8420. vpxor xmm14, xmm14, xmm8
  8421. vpxor xmm7, xmm7, xmm14
  8422. vmovdqu OWORD PTR [rsp+64], xmm7
  8423. ; H ^ 6
  8424. vpclmulqdq xmm8, xmm1, xmm1, 0
  8425. vpclmulqdq xmm7, xmm1, xmm1, 17
  8426. vpslld xmm12, xmm8, 31
  8427. vpslld xmm13, xmm8, 30
  8428. vpslld xmm14, xmm8, 25
  8429. vpxor xmm12, xmm12, xmm13
  8430. vpxor xmm12, xmm12, xmm14
  8431. vpsrldq xmm13, xmm12, 4
  8432. vpslldq xmm12, xmm12, 12
  8433. vpxor xmm8, xmm8, xmm12
  8434. vpsrld xmm14, xmm8, 1
  8435. vpsrld xmm10, xmm8, 2
  8436. vpsrld xmm9, xmm8, 7
  8437. vpxor xmm14, xmm14, xmm10
  8438. vpxor xmm14, xmm14, xmm9
  8439. vpxor xmm14, xmm14, xmm13
  8440. vpxor xmm14, xmm14, xmm8
  8441. vpxor xmm7, xmm7, xmm14
  8442. vmovdqu OWORD PTR [rsp+80], xmm7
  8443. ; H ^ 7
  8444. ; ghash_gfmul_red_avx
  8445. vpshufd xmm9, xmm1, 78
  8446. vpshufd xmm10, xmm3, 78
  8447. vpclmulqdq xmm11, xmm3, xmm1, 17
  8448. vpclmulqdq xmm8, xmm3, xmm1, 0
  8449. vpxor xmm9, xmm9, xmm1
  8450. vpxor xmm10, xmm10, xmm3
  8451. vpclmulqdq xmm9, xmm9, xmm10, 0
  8452. vpxor xmm9, xmm9, xmm8
  8453. vpxor xmm9, xmm9, xmm11
  8454. vpslldq xmm10, xmm9, 8
  8455. vpsrldq xmm9, xmm9, 8
  8456. vpxor xmm8, xmm8, xmm10
  8457. vpxor xmm7, xmm11, xmm9
  8458. vpslld xmm12, xmm8, 31
  8459. vpslld xmm13, xmm8, 30
  8460. vpslld xmm14, xmm8, 25
  8461. vpxor xmm12, xmm12, xmm13
  8462. vpxor xmm12, xmm12, xmm14
  8463. vpsrldq xmm13, xmm12, 4
  8464. vpslldq xmm12, xmm12, 12
  8465. vpxor xmm8, xmm8, xmm12
  8466. vpsrld xmm14, xmm8, 1
  8467. vpsrld xmm10, xmm8, 2
  8468. vpsrld xmm9, xmm8, 7
  8469. vpxor xmm14, xmm14, xmm10
  8470. vpxor xmm14, xmm14, xmm9
  8471. vpxor xmm14, xmm14, xmm13
  8472. vpxor xmm14, xmm14, xmm8
  8473. vpxor xmm7, xmm7, xmm14
  8474. vmovdqu OWORD PTR [rsp+96], xmm7
  8475. ; H ^ 8
  8476. vpclmulqdq xmm8, xmm3, xmm3, 0
  8477. vpclmulqdq xmm7, xmm3, xmm3, 17
  8478. vpslld xmm12, xmm8, 31
  8479. vpslld xmm13, xmm8, 30
  8480. vpslld xmm14, xmm8, 25
  8481. vpxor xmm12, xmm12, xmm13
  8482. vpxor xmm12, xmm12, xmm14
  8483. vpsrldq xmm13, xmm12, 4
  8484. vpslldq xmm12, xmm12, 12
  8485. vpxor xmm8, xmm8, xmm12
  8486. vpsrld xmm14, xmm8, 1
  8487. vpsrld xmm10, xmm8, 2
  8488. vpsrld xmm9, xmm8, 7
  8489. vpxor xmm14, xmm14, xmm10
  8490. vpxor xmm14, xmm14, xmm9
  8491. vpxor xmm14, xmm14, xmm13
  8492. vpxor xmm14, xmm14, xmm8
  8493. vpxor xmm7, xmm7, xmm14
  8494. vmovdqu OWORD PTR [rsp+112], xmm7
  8495. L_AES_GCM_decrypt_avx1_ghash_128:
  8496. lea rcx, QWORD PTR [rdi+rbx]
  8497. lea rdx, QWORD PTR [rsi+rbx]
  8498. vmovdqu xmm0, OWORD PTR [rsp+128]
  8499. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  8500. vpshufb xmm8, xmm0, xmm1
  8501. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  8502. vpshufb xmm9, xmm9, xmm1
  8503. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  8504. vpshufb xmm10, xmm10, xmm1
  8505. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  8506. vpshufb xmm11, xmm11, xmm1
  8507. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  8508. vpshufb xmm12, xmm12, xmm1
  8509. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  8510. vpshufb xmm13, xmm13, xmm1
  8511. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  8512. vpshufb xmm14, xmm14, xmm1
  8513. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  8514. vpshufb xmm15, xmm15, xmm1
  8515. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  8516. vmovdqa xmm7, OWORD PTR [r15]
  8517. vmovdqu OWORD PTR [rsp+128], xmm0
  8518. vpxor xmm8, xmm8, xmm7
  8519. vpxor xmm9, xmm9, xmm7
  8520. vpxor xmm10, xmm10, xmm7
  8521. vpxor xmm11, xmm11, xmm7
  8522. vpxor xmm12, xmm12, xmm7
  8523. vpxor xmm13, xmm13, xmm7
  8524. vpxor xmm14, xmm14, xmm7
  8525. vpxor xmm15, xmm15, xmm7
  8526. vmovdqu xmm7, OWORD PTR [rsp+112]
  8527. vmovdqu xmm0, OWORD PTR [rcx]
  8528. vaesenc xmm8, xmm8, [r15+16]
  8529. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8530. vpxor xmm0, xmm0, xmm2
  8531. vpshufd xmm1, xmm7, 78
  8532. vpshufd xmm5, xmm0, 78
  8533. vpxor xmm1, xmm1, xmm7
  8534. vpxor xmm5, xmm5, xmm0
  8535. vpclmulqdq xmm3, xmm0, xmm7, 17
  8536. vaesenc xmm9, xmm9, [r15+16]
  8537. vaesenc xmm10, xmm10, [r15+16]
  8538. vpclmulqdq xmm2, xmm0, xmm7, 0
  8539. vaesenc xmm11, xmm11, [r15+16]
  8540. vaesenc xmm12, xmm12, [r15+16]
  8541. vpclmulqdq xmm1, xmm1, xmm5, 0
  8542. vaesenc xmm13, xmm13, [r15+16]
  8543. vaesenc xmm14, xmm14, [r15+16]
  8544. vaesenc xmm15, xmm15, [r15+16]
  8545. vpxor xmm1, xmm1, xmm2
  8546. vpxor xmm1, xmm1, xmm3
  8547. vmovdqu xmm7, OWORD PTR [rsp+96]
  8548. vmovdqu xmm0, OWORD PTR [rcx+16]
  8549. vpshufd xmm4, xmm7, 78
  8550. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8551. vaesenc xmm8, xmm8, [r15+32]
  8552. vpxor xmm4, xmm4, xmm7
  8553. vpshufd xmm5, xmm0, 78
  8554. vpxor xmm5, xmm5, xmm0
  8555. vpclmulqdq xmm6, xmm0, xmm7, 17
  8556. vaesenc xmm9, xmm9, [r15+32]
  8557. vaesenc xmm10, xmm10, [r15+32]
  8558. vpclmulqdq xmm7, xmm0, xmm7, 0
  8559. vaesenc xmm11, xmm11, [r15+32]
  8560. vaesenc xmm12, xmm12, [r15+32]
  8561. vpclmulqdq xmm4, xmm4, xmm5, 0
  8562. vaesenc xmm13, xmm13, [r15+32]
  8563. vaesenc xmm14, xmm14, [r15+32]
  8564. vaesenc xmm15, xmm15, [r15+32]
  8565. vpxor xmm1, xmm1, xmm7
  8566. vpxor xmm2, xmm2, xmm7
  8567. vpxor xmm1, xmm1, xmm6
  8568. vpxor xmm3, xmm3, xmm6
  8569. vpxor xmm1, xmm1, xmm4
  8570. vmovdqu xmm7, OWORD PTR [rsp+80]
  8571. vmovdqu xmm0, OWORD PTR [rcx+32]
  8572. vpshufd xmm4, xmm7, 78
  8573. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8574. vaesenc xmm8, xmm8, [r15+48]
  8575. vpxor xmm4, xmm4, xmm7
  8576. vpshufd xmm5, xmm0, 78
  8577. vpxor xmm5, xmm5, xmm0
  8578. vpclmulqdq xmm6, xmm0, xmm7, 17
  8579. vaesenc xmm9, xmm9, [r15+48]
  8580. vaesenc xmm10, xmm10, [r15+48]
  8581. vpclmulqdq xmm7, xmm0, xmm7, 0
  8582. vaesenc xmm11, xmm11, [r15+48]
  8583. vaesenc xmm12, xmm12, [r15+48]
  8584. vpclmulqdq xmm4, xmm4, xmm5, 0
  8585. vaesenc xmm13, xmm13, [r15+48]
  8586. vaesenc xmm14, xmm14, [r15+48]
  8587. vaesenc xmm15, xmm15, [r15+48]
  8588. vpxor xmm1, xmm1, xmm7
  8589. vpxor xmm2, xmm2, xmm7
  8590. vpxor xmm1, xmm1, xmm6
  8591. vpxor xmm3, xmm3, xmm6
  8592. vpxor xmm1, xmm1, xmm4
  8593. vmovdqu xmm7, OWORD PTR [rsp+64]
  8594. vmovdqu xmm0, OWORD PTR [rcx+48]
  8595. vpshufd xmm4, xmm7, 78
  8596. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8597. vaesenc xmm8, xmm8, [r15+64]
  8598. vpxor xmm4, xmm4, xmm7
  8599. vpshufd xmm5, xmm0, 78
  8600. vpxor xmm5, xmm5, xmm0
  8601. vpclmulqdq xmm6, xmm0, xmm7, 17
  8602. vaesenc xmm9, xmm9, [r15+64]
  8603. vaesenc xmm10, xmm10, [r15+64]
  8604. vpclmulqdq xmm7, xmm0, xmm7, 0
  8605. vaesenc xmm11, xmm11, [r15+64]
  8606. vaesenc xmm12, xmm12, [r15+64]
  8607. vpclmulqdq xmm4, xmm4, xmm5, 0
  8608. vaesenc xmm13, xmm13, [r15+64]
  8609. vaesenc xmm14, xmm14, [r15+64]
  8610. vaesenc xmm15, xmm15, [r15+64]
  8611. vpxor xmm1, xmm1, xmm7
  8612. vpxor xmm2, xmm2, xmm7
  8613. vpxor xmm1, xmm1, xmm6
  8614. vpxor xmm3, xmm3, xmm6
  8615. vpxor xmm1, xmm1, xmm4
  8616. vmovdqu xmm7, OWORD PTR [rsp+48]
  8617. vmovdqu xmm0, OWORD PTR [rcx+64]
  8618. vpshufd xmm4, xmm7, 78
  8619. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8620. vaesenc xmm8, xmm8, [r15+80]
  8621. vpxor xmm4, xmm4, xmm7
  8622. vpshufd xmm5, xmm0, 78
  8623. vpxor xmm5, xmm5, xmm0
  8624. vpclmulqdq xmm6, xmm0, xmm7, 17
  8625. vaesenc xmm9, xmm9, [r15+80]
  8626. vaesenc xmm10, xmm10, [r15+80]
  8627. vpclmulqdq xmm7, xmm0, xmm7, 0
  8628. vaesenc xmm11, xmm11, [r15+80]
  8629. vaesenc xmm12, xmm12, [r15+80]
  8630. vpclmulqdq xmm4, xmm4, xmm5, 0
  8631. vaesenc xmm13, xmm13, [r15+80]
  8632. vaesenc xmm14, xmm14, [r15+80]
  8633. vaesenc xmm15, xmm15, [r15+80]
  8634. vpxor xmm1, xmm1, xmm7
  8635. vpxor xmm2, xmm2, xmm7
  8636. vpxor xmm1, xmm1, xmm6
  8637. vpxor xmm3, xmm3, xmm6
  8638. vpxor xmm1, xmm1, xmm4
  8639. vmovdqu xmm7, OWORD PTR [rsp+32]
  8640. vmovdqu xmm0, OWORD PTR [rcx+80]
  8641. vpshufd xmm4, xmm7, 78
  8642. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8643. vaesenc xmm8, xmm8, [r15+96]
  8644. vpxor xmm4, xmm4, xmm7
  8645. vpshufd xmm5, xmm0, 78
  8646. vpxor xmm5, xmm5, xmm0
  8647. vpclmulqdq xmm6, xmm0, xmm7, 17
  8648. vaesenc xmm9, xmm9, [r15+96]
  8649. vaesenc xmm10, xmm10, [r15+96]
  8650. vpclmulqdq xmm7, xmm0, xmm7, 0
  8651. vaesenc xmm11, xmm11, [r15+96]
  8652. vaesenc xmm12, xmm12, [r15+96]
  8653. vpclmulqdq xmm4, xmm4, xmm5, 0
  8654. vaesenc xmm13, xmm13, [r15+96]
  8655. vaesenc xmm14, xmm14, [r15+96]
  8656. vaesenc xmm15, xmm15, [r15+96]
  8657. vpxor xmm1, xmm1, xmm7
  8658. vpxor xmm2, xmm2, xmm7
  8659. vpxor xmm1, xmm1, xmm6
  8660. vpxor xmm3, xmm3, xmm6
  8661. vpxor xmm1, xmm1, xmm4
  8662. vmovdqu xmm7, OWORD PTR [rsp+16]
  8663. vmovdqu xmm0, OWORD PTR [rcx+96]
  8664. vpshufd xmm4, xmm7, 78
  8665. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8666. vaesenc xmm8, xmm8, [r15+112]
  8667. vpxor xmm4, xmm4, xmm7
  8668. vpshufd xmm5, xmm0, 78
  8669. vpxor xmm5, xmm5, xmm0
  8670. vpclmulqdq xmm6, xmm0, xmm7, 17
  8671. vaesenc xmm9, xmm9, [r15+112]
  8672. vaesenc xmm10, xmm10, [r15+112]
  8673. vpclmulqdq xmm7, xmm0, xmm7, 0
  8674. vaesenc xmm11, xmm11, [r15+112]
  8675. vaesenc xmm12, xmm12, [r15+112]
  8676. vpclmulqdq xmm4, xmm4, xmm5, 0
  8677. vaesenc xmm13, xmm13, [r15+112]
  8678. vaesenc xmm14, xmm14, [r15+112]
  8679. vaesenc xmm15, xmm15, [r15+112]
  8680. vpxor xmm1, xmm1, xmm7
  8681. vpxor xmm2, xmm2, xmm7
  8682. vpxor xmm1, xmm1, xmm6
  8683. vpxor xmm3, xmm3, xmm6
  8684. vpxor xmm1, xmm1, xmm4
  8685. vmovdqu xmm7, OWORD PTR [rsp]
  8686. vmovdqu xmm0, OWORD PTR [rcx+112]
  8687. vpshufd xmm4, xmm7, 78
  8688. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8689. vaesenc xmm8, xmm8, [r15+128]
  8690. vpxor xmm4, xmm4, xmm7
  8691. vpshufd xmm5, xmm0, 78
  8692. vpxor xmm5, xmm5, xmm0
  8693. vpclmulqdq xmm6, xmm0, xmm7, 17
  8694. vaesenc xmm9, xmm9, [r15+128]
  8695. vaesenc xmm10, xmm10, [r15+128]
  8696. vpclmulqdq xmm7, xmm0, xmm7, 0
  8697. vaesenc xmm11, xmm11, [r15+128]
  8698. vaesenc xmm12, xmm12, [r15+128]
  8699. vpclmulqdq xmm4, xmm4, xmm5, 0
  8700. vaesenc xmm13, xmm13, [r15+128]
  8701. vaesenc xmm14, xmm14, [r15+128]
  8702. vaesenc xmm15, xmm15, [r15+128]
  8703. vpxor xmm1, xmm1, xmm7
  8704. vpxor xmm2, xmm2, xmm7
  8705. vpxor xmm1, xmm1, xmm6
  8706. vpxor xmm3, xmm3, xmm6
  8707. vpxor xmm1, xmm1, xmm4
  8708. vpslldq xmm5, xmm1, 8
  8709. vpsrldq xmm1, xmm1, 8
  8710. vaesenc xmm8, xmm8, [r15+144]
  8711. vpxor xmm2, xmm2, xmm5
  8712. vpxor xmm3, xmm3, xmm1
  8713. vaesenc xmm9, xmm9, [r15+144]
  8714. vpslld xmm7, xmm2, 31
  8715. vpslld xmm4, xmm2, 30
  8716. vpslld xmm5, xmm2, 25
  8717. vaesenc xmm10, xmm10, [r15+144]
  8718. vpxor xmm7, xmm7, xmm4
  8719. vpxor xmm7, xmm7, xmm5
  8720. vaesenc xmm11, xmm11, [r15+144]
  8721. vpsrldq xmm4, xmm7, 4
  8722. vpslldq xmm7, xmm7, 12
  8723. vaesenc xmm12, xmm12, [r15+144]
  8724. vpxor xmm2, xmm2, xmm7
  8725. vpsrld xmm5, xmm2, 1
  8726. vaesenc xmm13, xmm13, [r15+144]
  8727. vpsrld xmm1, xmm2, 2
  8728. vpsrld xmm0, xmm2, 7
  8729. vaesenc xmm14, xmm14, [r15+144]
  8730. vpxor xmm5, xmm5, xmm1
  8731. vpxor xmm5, xmm5, xmm0
  8732. vaesenc xmm15, xmm15, [r15+144]
  8733. vpxor xmm5, xmm5, xmm4
  8734. vpxor xmm2, xmm2, xmm5
  8735. vpxor xmm2, xmm2, xmm3
  8736. cmp r10d, 11
  8737. vmovdqa xmm7, OWORD PTR [r15+160]
  8738. jl L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done
  8739. vaesenc xmm8, xmm8, xmm7
  8740. vaesenc xmm9, xmm9, xmm7
  8741. vaesenc xmm10, xmm10, xmm7
  8742. vaesenc xmm11, xmm11, xmm7
  8743. vaesenc xmm12, xmm12, xmm7
  8744. vaesenc xmm13, xmm13, xmm7
  8745. vaesenc xmm14, xmm14, xmm7
  8746. vaesenc xmm15, xmm15, xmm7
  8747. vmovdqa xmm7, OWORD PTR [r15+176]
  8748. vaesenc xmm8, xmm8, xmm7
  8749. vaesenc xmm9, xmm9, xmm7
  8750. vaesenc xmm10, xmm10, xmm7
  8751. vaesenc xmm11, xmm11, xmm7
  8752. vaesenc xmm12, xmm12, xmm7
  8753. vaesenc xmm13, xmm13, xmm7
  8754. vaesenc xmm14, xmm14, xmm7
  8755. vaesenc xmm15, xmm15, xmm7
  8756. cmp r10d, 13
  8757. vmovdqa xmm7, OWORD PTR [r15+192]
  8758. jl L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done
  8759. vaesenc xmm8, xmm8, xmm7
  8760. vaesenc xmm9, xmm9, xmm7
  8761. vaesenc xmm10, xmm10, xmm7
  8762. vaesenc xmm11, xmm11, xmm7
  8763. vaesenc xmm12, xmm12, xmm7
  8764. vaesenc xmm13, xmm13, xmm7
  8765. vaesenc xmm14, xmm14, xmm7
  8766. vaesenc xmm15, xmm15, xmm7
  8767. vmovdqa xmm7, OWORD PTR [r15+208]
  8768. vaesenc xmm8, xmm8, xmm7
  8769. vaesenc xmm9, xmm9, xmm7
  8770. vaesenc xmm10, xmm10, xmm7
  8771. vaesenc xmm11, xmm11, xmm7
  8772. vaesenc xmm12, xmm12, xmm7
  8773. vaesenc xmm13, xmm13, xmm7
  8774. vaesenc xmm14, xmm14, xmm7
  8775. vaesenc xmm15, xmm15, xmm7
  8776. vmovdqa xmm7, OWORD PTR [r15+224]
  8777. L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done:
  8778. vaesenclast xmm8, xmm8, xmm7
  8779. vaesenclast xmm9, xmm9, xmm7
  8780. vmovdqu xmm0, OWORD PTR [rcx]
  8781. vmovdqu xmm1, OWORD PTR [rcx+16]
  8782. vpxor xmm8, xmm8, xmm0
  8783. vpxor xmm9, xmm9, xmm1
  8784. vmovdqu OWORD PTR [rdx], xmm8
  8785. vmovdqu OWORD PTR [rdx+16], xmm9
  8786. vaesenclast xmm10, xmm10, xmm7
  8787. vaesenclast xmm11, xmm11, xmm7
  8788. vmovdqu xmm0, OWORD PTR [rcx+32]
  8789. vmovdqu xmm1, OWORD PTR [rcx+48]
  8790. vpxor xmm10, xmm10, xmm0
  8791. vpxor xmm11, xmm11, xmm1
  8792. vmovdqu OWORD PTR [rdx+32], xmm10
  8793. vmovdqu OWORD PTR [rdx+48], xmm11
  8794. vaesenclast xmm12, xmm12, xmm7
  8795. vaesenclast xmm13, xmm13, xmm7
  8796. vmovdqu xmm0, OWORD PTR [rcx+64]
  8797. vmovdqu xmm1, OWORD PTR [rcx+80]
  8798. vpxor xmm12, xmm12, xmm0
  8799. vpxor xmm13, xmm13, xmm1
  8800. vmovdqu OWORD PTR [rdx+64], xmm12
  8801. vmovdqu OWORD PTR [rdx+80], xmm13
  8802. vaesenclast xmm14, xmm14, xmm7
  8803. vaesenclast xmm15, xmm15, xmm7
  8804. vmovdqu xmm0, OWORD PTR [rcx+96]
  8805. vmovdqu xmm1, OWORD PTR [rcx+112]
  8806. vpxor xmm14, xmm14, xmm0
  8807. vpxor xmm15, xmm15, xmm1
  8808. vmovdqu OWORD PTR [rdx+96], xmm14
  8809. vmovdqu OWORD PTR [rdx+112], xmm15
  8810. add ebx, 128
  8811. cmp ebx, r13d
  8812. jl L_AES_GCM_decrypt_avx1_ghash_128
  8813. vmovdqa xmm6, xmm2
  8814. vmovdqu xmm5, OWORD PTR [rsp]
  8815. L_AES_GCM_decrypt_avx1_done_128:
  8816. mov edx, r9d
  8817. cmp ebx, edx
  8818. jge L_AES_GCM_decrypt_avx1_done_dec
  8819. mov r13d, r9d
  8820. and r13d, 4294967280
  8821. cmp ebx, r13d
  8822. jge L_AES_GCM_decrypt_avx1_last_block_done
  8823. L_AES_GCM_decrypt_avx1_last_block_start:
  8824. vmovdqu xmm13, OWORD PTR [rdi+rbx]
  8825. vmovdqa xmm0, xmm5
  8826. vpshufb xmm1, xmm13, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8827. vpxor xmm1, xmm1, xmm6
  8828. vmovdqu xmm9, OWORD PTR [rsp+128]
  8829. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  8830. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  8831. vmovdqu OWORD PTR [rsp+128], xmm9
  8832. vpxor xmm8, xmm8, [r15]
  8833. vpclmulqdq xmm10, xmm1, xmm0, 16
  8834. vaesenc xmm8, xmm8, [r15+16]
  8835. vaesenc xmm8, xmm8, [r15+32]
  8836. vpclmulqdq xmm11, xmm1, xmm0, 1
  8837. vaesenc xmm8, xmm8, [r15+48]
  8838. vaesenc xmm8, xmm8, [r15+64]
  8839. vpclmulqdq xmm12, xmm1, xmm0, 0
  8840. vaesenc xmm8, xmm8, [r15+80]
  8841. vpclmulqdq xmm1, xmm1, xmm0, 17
  8842. vaesenc xmm8, xmm8, [r15+96]
  8843. vpxor xmm10, xmm10, xmm11
  8844. vpslldq xmm2, xmm10, 8
  8845. vpsrldq xmm10, xmm10, 8
  8846. vaesenc xmm8, xmm8, [r15+112]
  8847. vpxor xmm2, xmm2, xmm12
  8848. vpxor xmm3, xmm1, xmm10
  8849. vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
  8850. vpclmulqdq xmm11, xmm2, xmm0, 16
  8851. vaesenc xmm8, xmm8, [r15+128]
  8852. vpshufd xmm10, xmm2, 78
  8853. vpxor xmm10, xmm10, xmm11
  8854. vpclmulqdq xmm11, xmm10, xmm0, 16
  8855. vaesenc xmm8, xmm8, [r15+144]
  8856. vpshufd xmm10, xmm10, 78
  8857. vpxor xmm10, xmm10, xmm11
  8858. vpxor xmm6, xmm10, xmm3
  8859. cmp r10d, 11
  8860. vmovdqa xmm9, OWORD PTR [r15+160]
  8861. jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last
  8862. vaesenc xmm8, xmm8, xmm9
  8863. vaesenc xmm8, xmm8, [r15+176]
  8864. cmp r10d, 13
  8865. vmovdqa xmm9, OWORD PTR [r15+192]
  8866. jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last
  8867. vaesenc xmm8, xmm8, xmm9
  8868. vaesenc xmm8, xmm8, [r15+208]
  8869. vmovdqa xmm9, OWORD PTR [r15+224]
  8870. L_AES_GCM_decrypt_avx1_aesenc_gfmul_last:
  8871. vaesenclast xmm8, xmm8, xmm9
  8872. vmovdqa xmm0, xmm13
  8873. vpxor xmm8, xmm8, xmm0
  8874. vmovdqu OWORD PTR [rsi+rbx], xmm8
  8875. add ebx, 16
  8876. cmp ebx, r13d
  8877. jl L_AES_GCM_decrypt_avx1_last_block_start
  8878. L_AES_GCM_decrypt_avx1_last_block_done:
  8879. mov ecx, r9d
  8880. mov edx, ecx
  8881. and ecx, 15
  8882. jz L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done
  8883. vmovdqu xmm4, OWORD PTR [rsp+128]
  8884. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  8885. vpxor xmm4, xmm4, [r15]
  8886. vaesenc xmm4, xmm4, [r15+16]
  8887. vaesenc xmm4, xmm4, [r15+32]
  8888. vaesenc xmm4, xmm4, [r15+48]
  8889. vaesenc xmm4, xmm4, [r15+64]
  8890. vaesenc xmm4, xmm4, [r15+80]
  8891. vaesenc xmm4, xmm4, [r15+96]
  8892. vaesenc xmm4, xmm4, [r15+112]
  8893. vaesenc xmm4, xmm4, [r15+128]
  8894. vaesenc xmm4, xmm4, [r15+144]
  8895. cmp r10d, 11
  8896. vmovdqa xmm9, OWORD PTR [r15+160]
  8897. jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last
  8898. vaesenc xmm4, xmm4, xmm9
  8899. vaesenc xmm4, xmm4, [r15+176]
  8900. cmp r10d, 13
  8901. vmovdqa xmm9, OWORD PTR [r15+192]
  8902. jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last
  8903. vaesenc xmm4, xmm4, xmm9
  8904. vaesenc xmm4, xmm4, [r15+208]
  8905. vmovdqa xmm9, OWORD PTR [r15+224]
  8906. L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last:
  8907. vaesenclast xmm4, xmm4, xmm9
  8908. sub rsp, 32
  8909. xor ecx, ecx
  8910. vmovdqu OWORD PTR [rsp], xmm4
  8911. vpxor xmm0, xmm0, xmm0
  8912. vmovdqu OWORD PTR [rsp+16], xmm0
  8913. L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop:
  8914. movzx r13d, BYTE PTR [rdi+rbx]
  8915. mov BYTE PTR [rsp+rcx+16], r13b
  8916. xor r13b, BYTE PTR [rsp+rcx]
  8917. mov BYTE PTR [rsi+rbx], r13b
  8918. inc ebx
  8919. inc ecx
  8920. cmp ebx, edx
  8921. jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop
  8922. vmovdqu xmm4, OWORD PTR [rsp+16]
  8923. add rsp, 32
  8924. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8925. vpxor xmm6, xmm6, xmm4
  8926. ; ghash_gfmul_red_avx
  8927. vpshufd xmm9, xmm5, 78
  8928. vpshufd xmm10, xmm6, 78
  8929. vpclmulqdq xmm11, xmm6, xmm5, 17
  8930. vpclmulqdq xmm8, xmm6, xmm5, 0
  8931. vpxor xmm9, xmm9, xmm5
  8932. vpxor xmm10, xmm10, xmm6
  8933. vpclmulqdq xmm9, xmm9, xmm10, 0
  8934. vpxor xmm9, xmm9, xmm8
  8935. vpxor xmm9, xmm9, xmm11
  8936. vpslldq xmm10, xmm9, 8
  8937. vpsrldq xmm9, xmm9, 8
  8938. vpxor xmm8, xmm8, xmm10
  8939. vpxor xmm6, xmm11, xmm9
  8940. vpslld xmm12, xmm8, 31
  8941. vpslld xmm13, xmm8, 30
  8942. vpslld xmm14, xmm8, 25
  8943. vpxor xmm12, xmm12, xmm13
  8944. vpxor xmm12, xmm12, xmm14
  8945. vpsrldq xmm13, xmm12, 4
  8946. vpslldq xmm12, xmm12, 12
  8947. vpxor xmm8, xmm8, xmm12
  8948. vpsrld xmm14, xmm8, 1
  8949. vpsrld xmm10, xmm8, 2
  8950. vpsrld xmm9, xmm8, 7
  8951. vpxor xmm14, xmm14, xmm10
  8952. vpxor xmm14, xmm14, xmm9
  8953. vpxor xmm14, xmm14, xmm13
  8954. vpxor xmm14, xmm14, xmm8
  8955. vpxor xmm6, xmm6, xmm14
  8956. L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done:
  8957. L_AES_GCM_decrypt_avx1_done_dec:
  8958. mov edx, r9d
  8959. mov ecx, r11d
  8960. shl rdx, 3
  8961. shl rcx, 3
  8962. vmovq xmm0, rdx
  8963. vmovq xmm1, rcx
  8964. vpunpcklqdq xmm0, xmm0, xmm1
  8965. vpxor xmm6, xmm6, xmm0
  8966. ; ghash_gfmul_red_avx
  8967. vpshufd xmm9, xmm5, 78
  8968. vpshufd xmm10, xmm6, 78
  8969. vpclmulqdq xmm11, xmm6, xmm5, 17
  8970. vpclmulqdq xmm8, xmm6, xmm5, 0
  8971. vpxor xmm9, xmm9, xmm5
  8972. vpxor xmm10, xmm10, xmm6
  8973. vpclmulqdq xmm9, xmm9, xmm10, 0
  8974. vpxor xmm9, xmm9, xmm8
  8975. vpxor xmm9, xmm9, xmm11
  8976. vpslldq xmm10, xmm9, 8
  8977. vpsrldq xmm9, xmm9, 8
  8978. vpxor xmm8, xmm8, xmm10
  8979. vpxor xmm6, xmm11, xmm9
  8980. vpslld xmm12, xmm8, 31
  8981. vpslld xmm13, xmm8, 30
  8982. vpslld xmm14, xmm8, 25
  8983. vpxor xmm12, xmm12, xmm13
  8984. vpxor xmm12, xmm12, xmm14
  8985. vpsrldq xmm13, xmm12, 4
  8986. vpslldq xmm12, xmm12, 12
  8987. vpxor xmm8, xmm8, xmm12
  8988. vpsrld xmm14, xmm8, 1
  8989. vpsrld xmm10, xmm8, 2
  8990. vpsrld xmm9, xmm8, 7
  8991. vpxor xmm14, xmm14, xmm10
  8992. vpxor xmm14, xmm14, xmm9
  8993. vpxor xmm14, xmm14, xmm13
  8994. vpxor xmm14, xmm14, xmm8
  8995. vpxor xmm6, xmm6, xmm14
  8996. vpshufb xmm6, xmm6, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8997. vmovdqu xmm0, OWORD PTR [rsp+144]
  8998. vpxor xmm0, xmm0, xmm6
  8999. cmp r14d, 16
  9000. je L_AES_GCM_decrypt_avx1_cmp_tag_16
  9001. sub rsp, 16
  9002. xor rcx, rcx
  9003. xor rbx, rbx
  9004. vmovdqu OWORD PTR [rsp], xmm0
  9005. L_AES_GCM_decrypt_avx1_cmp_tag_loop:
  9006. movzx r13d, BYTE PTR [rsp+rcx]
  9007. xor r13b, BYTE PTR [r8+rcx]
  9008. or bl, r13b
  9009. inc ecx
  9010. cmp ecx, r14d
  9011. jne L_AES_GCM_decrypt_avx1_cmp_tag_loop
  9012. cmp rbx, 0
  9013. sete bl
  9014. add rsp, 16
  9015. xor rcx, rcx
  9016. jmp L_AES_GCM_decrypt_avx1_cmp_tag_done
  9017. L_AES_GCM_decrypt_avx1_cmp_tag_16:
  9018. vmovdqu xmm1, OWORD PTR [r8]
  9019. vpcmpeqb xmm0, xmm0, xmm1
  9020. vpmovmskb rdx, xmm0
  9021. ; %%edx == 0xFFFF then return 1 else => return 0
  9022. xor ebx, ebx
  9023. cmp edx, 65535
  9024. sete bl
  9025. L_AES_GCM_decrypt_avx1_cmp_tag_done:
  9026. mov DWORD PTR [rbp], ebx
  9027. vzeroupper
  9028. vmovdqu xmm6, OWORD PTR [rsp+168]
  9029. vmovdqu xmm7, OWORD PTR [rsp+184]
  9030. vmovdqu xmm8, OWORD PTR [rsp+200]
  9031. vmovdqu xmm9, OWORD PTR [rsp+216]
  9032. vmovdqu xmm10, OWORD PTR [rsp+232]
  9033. vmovdqu xmm11, OWORD PTR [rsp+248]
  9034. vmovdqu xmm12, OWORD PTR [rsp+264]
  9035. vmovdqu xmm13, OWORD PTR [rsp+280]
  9036. vmovdqu xmm14, OWORD PTR [rsp+296]
  9037. vmovdqu xmm15, OWORD PTR [rsp+312]
  9038. add rsp, 328
  9039. pop rbp
  9040. pop r15
  9041. pop r14
  9042. pop rbx
  9043. pop r12
  9044. pop rsi
  9045. pop rdi
  9046. pop r13
  9047. ret
  9048. AES_GCM_decrypt_avx1 ENDP
  9049. _text ENDS
  9050. _text SEGMENT READONLY PARA
  9051. AES_GCM_init_avx1 PROC
  9052. push rdi
  9053. push rsi
  9054. push r12
  9055. push r13
  9056. mov rdi, rcx
  9057. mov rsi, rdx
  9058. mov r10, r8
  9059. mov r11d, r9d
  9060. mov rax, QWORD PTR [rsp+72]
  9061. mov r8, QWORD PTR [rsp+80]
  9062. mov r9, QWORD PTR [rsp+88]
  9063. sub rsp, 80
  9064. vmovdqu OWORD PTR [rsp+16], xmm6
  9065. vmovdqu OWORD PTR [rsp+32], xmm7
  9066. vmovdqu OWORD PTR [rsp+48], xmm8
  9067. vmovdqu OWORD PTR [rsp+64], xmm15
  9068. vpxor xmm4, xmm4, xmm4
  9069. mov edx, r11d
  9070. cmp edx, 12
  9071. jne L_AES_GCM_init_avx1_iv_not_12
  9072. ; # Calculate values when IV is 12 bytes
  9073. ; Set counter based on IV
  9074. mov ecx, 16777216
  9075. vmovq xmm4, QWORD PTR [r10]
  9076. vpinsrd xmm4, xmm4, DWORD PTR [r10+8], 2
  9077. vpinsrd xmm4, xmm4, ecx, 3
  9078. ; H = Encrypt X(=0) and T = Encrypt counter
  9079. vmovdqa xmm5, OWORD PTR [rdi]
  9080. vpxor xmm1, xmm4, xmm5
  9081. vmovdqa xmm6, OWORD PTR [rdi+16]
  9082. vaesenc xmm5, xmm5, xmm6
  9083. vaesenc xmm1, xmm1, xmm6
  9084. vmovdqa xmm6, OWORD PTR [rdi+32]
  9085. vaesenc xmm5, xmm5, xmm6
  9086. vaesenc xmm1, xmm1, xmm6
  9087. vmovdqa xmm6, OWORD PTR [rdi+48]
  9088. vaesenc xmm5, xmm5, xmm6
  9089. vaesenc xmm1, xmm1, xmm6
  9090. vmovdqa xmm6, OWORD PTR [rdi+64]
  9091. vaesenc xmm5, xmm5, xmm6
  9092. vaesenc xmm1, xmm1, xmm6
  9093. vmovdqa xmm6, OWORD PTR [rdi+80]
  9094. vaesenc xmm5, xmm5, xmm6
  9095. vaesenc xmm1, xmm1, xmm6
  9096. vmovdqa xmm6, OWORD PTR [rdi+96]
  9097. vaesenc xmm5, xmm5, xmm6
  9098. vaesenc xmm1, xmm1, xmm6
  9099. vmovdqa xmm6, OWORD PTR [rdi+112]
  9100. vaesenc xmm5, xmm5, xmm6
  9101. vaesenc xmm1, xmm1, xmm6
  9102. vmovdqa xmm6, OWORD PTR [rdi+128]
  9103. vaesenc xmm5, xmm5, xmm6
  9104. vaesenc xmm1, xmm1, xmm6
  9105. vmovdqa xmm6, OWORD PTR [rdi+144]
  9106. vaesenc xmm5, xmm5, xmm6
  9107. vaesenc xmm1, xmm1, xmm6
  9108. cmp esi, 11
  9109. vmovdqa xmm6, OWORD PTR [rdi+160]
  9110. jl L_AES_GCM_init_avx1_calc_iv_12_last
  9111. vaesenc xmm5, xmm5, xmm6
  9112. vaesenc xmm1, xmm1, xmm6
  9113. vmovdqa xmm6, OWORD PTR [rdi+176]
  9114. vaesenc xmm5, xmm5, xmm6
  9115. vaesenc xmm1, xmm1, xmm6
  9116. cmp esi, 13
  9117. vmovdqa xmm6, OWORD PTR [rdi+192]
  9118. jl L_AES_GCM_init_avx1_calc_iv_12_last
  9119. vaesenc xmm5, xmm5, xmm6
  9120. vaesenc xmm1, xmm1, xmm6
  9121. vmovdqa xmm6, OWORD PTR [rdi+208]
  9122. vaesenc xmm5, xmm5, xmm6
  9123. vaesenc xmm1, xmm1, xmm6
  9124. vmovdqa xmm6, OWORD PTR [rdi+224]
  9125. L_AES_GCM_init_avx1_calc_iv_12_last:
  9126. vaesenclast xmm5, xmm5, xmm6
  9127. vaesenclast xmm1, xmm1, xmm6
  9128. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9129. vmovdqu xmm15, xmm1
  9130. jmp L_AES_GCM_init_avx1_iv_done
  9131. L_AES_GCM_init_avx1_iv_not_12:
  9132. ; Calculate values when IV is not 12 bytes
  9133. ; H = Encrypt X(=0)
  9134. vmovdqa xmm5, OWORD PTR [rdi]
  9135. vaesenc xmm5, xmm5, [rdi+16]
  9136. vaesenc xmm5, xmm5, [rdi+32]
  9137. vaesenc xmm5, xmm5, [rdi+48]
  9138. vaesenc xmm5, xmm5, [rdi+64]
  9139. vaesenc xmm5, xmm5, [rdi+80]
  9140. vaesenc xmm5, xmm5, [rdi+96]
  9141. vaesenc xmm5, xmm5, [rdi+112]
  9142. vaesenc xmm5, xmm5, [rdi+128]
  9143. vaesenc xmm5, xmm5, [rdi+144]
  9144. cmp esi, 11
  9145. vmovdqa xmm8, OWORD PTR [rdi+160]
  9146. jl L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last
  9147. vaesenc xmm5, xmm5, xmm8
  9148. vaesenc xmm5, xmm5, [rdi+176]
  9149. cmp esi, 13
  9150. vmovdqa xmm8, OWORD PTR [rdi+192]
  9151. jl L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last
  9152. vaesenc xmm5, xmm5, xmm8
  9153. vaesenc xmm5, xmm5, [rdi+208]
  9154. vmovdqa xmm8, OWORD PTR [rdi+224]
  9155. L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last:
  9156. vaesenclast xmm5, xmm5, xmm8
  9157. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9158. ; Calc counter
  9159. ; Initialization vector
  9160. cmp edx, 0
  9161. mov rcx, 0
  9162. je L_AES_GCM_init_avx1_calc_iv_done
  9163. cmp edx, 16
  9164. jl L_AES_GCM_init_avx1_calc_iv_lt16
  9165. and edx, 4294967280
  9166. L_AES_GCM_init_avx1_calc_iv_16_loop:
  9167. vmovdqu xmm7, OWORD PTR [r10+rcx]
  9168. vpshufb xmm7, xmm7, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9169. vpxor xmm4, xmm4, xmm7
  9170. ; ghash_gfmul_avx
  9171. vpshufd xmm1, xmm4, 78
  9172. vpshufd xmm2, xmm5, 78
  9173. vpclmulqdq xmm3, xmm5, xmm4, 17
  9174. vpclmulqdq xmm0, xmm5, xmm4, 0
  9175. vpxor xmm1, xmm1, xmm4
  9176. vpxor xmm2, xmm2, xmm5
  9177. vpclmulqdq xmm1, xmm1, xmm2, 0
  9178. vpxor xmm1, xmm1, xmm0
  9179. vpxor xmm1, xmm1, xmm3
  9180. vmovdqa xmm6, xmm0
  9181. vmovdqa xmm4, xmm3
  9182. vpslldq xmm2, xmm1, 8
  9183. vpsrldq xmm1, xmm1, 8
  9184. vpxor xmm6, xmm6, xmm2
  9185. vpxor xmm4, xmm4, xmm1
  9186. vpsrld xmm0, xmm6, 31
  9187. vpsrld xmm1, xmm4, 31
  9188. vpslld xmm6, xmm6, 1
  9189. vpslld xmm4, xmm4, 1
  9190. vpsrldq xmm2, xmm0, 12
  9191. vpslldq xmm0, xmm0, 4
  9192. vpslldq xmm1, xmm1, 4
  9193. vpor xmm4, xmm4, xmm2
  9194. vpor xmm6, xmm6, xmm0
  9195. vpor xmm4, xmm4, xmm1
  9196. vpslld xmm0, xmm6, 31
  9197. vpslld xmm1, xmm6, 30
  9198. vpslld xmm2, xmm6, 25
  9199. vpxor xmm0, xmm0, xmm1
  9200. vpxor xmm0, xmm0, xmm2
  9201. vmovdqa xmm1, xmm0
  9202. vpsrldq xmm1, xmm1, 4
  9203. vpslldq xmm0, xmm0, 12
  9204. vpxor xmm6, xmm6, xmm0
  9205. vpsrld xmm2, xmm6, 1
  9206. vpsrld xmm3, xmm6, 2
  9207. vpsrld xmm0, xmm6, 7
  9208. vpxor xmm2, xmm2, xmm3
  9209. vpxor xmm2, xmm2, xmm0
  9210. vpxor xmm2, xmm2, xmm1
  9211. vpxor xmm2, xmm2, xmm6
  9212. vpxor xmm4, xmm4, xmm2
  9213. add ecx, 16
  9214. cmp ecx, edx
  9215. jl L_AES_GCM_init_avx1_calc_iv_16_loop
  9216. mov edx, r11d
  9217. cmp ecx, edx
  9218. je L_AES_GCM_init_avx1_calc_iv_done
  9219. L_AES_GCM_init_avx1_calc_iv_lt16:
  9220. sub rsp, 16
  9221. vpxor xmm7, xmm7, xmm7
  9222. xor r13d, r13d
  9223. vmovdqu OWORD PTR [rsp], xmm7
  9224. L_AES_GCM_init_avx1_calc_iv_loop:
  9225. movzx r12d, BYTE PTR [r10+rcx]
  9226. mov BYTE PTR [rsp+r13], r12b
  9227. inc ecx
  9228. inc r13d
  9229. cmp ecx, edx
  9230. jl L_AES_GCM_init_avx1_calc_iv_loop
  9231. vmovdqu xmm7, OWORD PTR [rsp]
  9232. add rsp, 16
  9233. vpshufb xmm7, xmm7, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9234. vpxor xmm4, xmm4, xmm7
  9235. ; ghash_gfmul_avx
  9236. vpshufd xmm1, xmm4, 78
  9237. vpshufd xmm2, xmm5, 78
  9238. vpclmulqdq xmm3, xmm5, xmm4, 17
  9239. vpclmulqdq xmm0, xmm5, xmm4, 0
  9240. vpxor xmm1, xmm1, xmm4
  9241. vpxor xmm2, xmm2, xmm5
  9242. vpclmulqdq xmm1, xmm1, xmm2, 0
  9243. vpxor xmm1, xmm1, xmm0
  9244. vpxor xmm1, xmm1, xmm3
  9245. vmovdqa xmm6, xmm0
  9246. vmovdqa xmm4, xmm3
  9247. vpslldq xmm2, xmm1, 8
  9248. vpsrldq xmm1, xmm1, 8
  9249. vpxor xmm6, xmm6, xmm2
  9250. vpxor xmm4, xmm4, xmm1
  9251. vpsrld xmm0, xmm6, 31
  9252. vpsrld xmm1, xmm4, 31
  9253. vpslld xmm6, xmm6, 1
  9254. vpslld xmm4, xmm4, 1
  9255. vpsrldq xmm2, xmm0, 12
  9256. vpslldq xmm0, xmm0, 4
  9257. vpslldq xmm1, xmm1, 4
  9258. vpor xmm4, xmm4, xmm2
  9259. vpor xmm6, xmm6, xmm0
  9260. vpor xmm4, xmm4, xmm1
  9261. vpslld xmm0, xmm6, 31
  9262. vpslld xmm1, xmm6, 30
  9263. vpslld xmm2, xmm6, 25
  9264. vpxor xmm0, xmm0, xmm1
  9265. vpxor xmm0, xmm0, xmm2
  9266. vmovdqa xmm1, xmm0
  9267. vpsrldq xmm1, xmm1, 4
  9268. vpslldq xmm0, xmm0, 12
  9269. vpxor xmm6, xmm6, xmm0
  9270. vpsrld xmm2, xmm6, 1
  9271. vpsrld xmm3, xmm6, 2
  9272. vpsrld xmm0, xmm6, 7
  9273. vpxor xmm2, xmm2, xmm3
  9274. vpxor xmm2, xmm2, xmm0
  9275. vpxor xmm2, xmm2, xmm1
  9276. vpxor xmm2, xmm2, xmm6
  9277. vpxor xmm4, xmm4, xmm2
  9278. L_AES_GCM_init_avx1_calc_iv_done:
  9279. ; T = Encrypt counter
  9280. vpxor xmm0, xmm0, xmm0
  9281. shl edx, 3
  9282. vmovq xmm0, rdx
  9283. vpxor xmm4, xmm4, xmm0
  9284. ; ghash_gfmul_avx
  9285. vpshufd xmm1, xmm4, 78
  9286. vpshufd xmm2, xmm5, 78
  9287. vpclmulqdq xmm3, xmm5, xmm4, 17
  9288. vpclmulqdq xmm0, xmm5, xmm4, 0
  9289. vpxor xmm1, xmm1, xmm4
  9290. vpxor xmm2, xmm2, xmm5
  9291. vpclmulqdq xmm1, xmm1, xmm2, 0
  9292. vpxor xmm1, xmm1, xmm0
  9293. vpxor xmm1, xmm1, xmm3
  9294. vmovdqa xmm6, xmm0
  9295. vmovdqa xmm4, xmm3
  9296. vpslldq xmm2, xmm1, 8
  9297. vpsrldq xmm1, xmm1, 8
  9298. vpxor xmm6, xmm6, xmm2
  9299. vpxor xmm4, xmm4, xmm1
  9300. vpsrld xmm0, xmm6, 31
  9301. vpsrld xmm1, xmm4, 31
  9302. vpslld xmm6, xmm6, 1
  9303. vpslld xmm4, xmm4, 1
  9304. vpsrldq xmm2, xmm0, 12
  9305. vpslldq xmm0, xmm0, 4
  9306. vpslldq xmm1, xmm1, 4
  9307. vpor xmm4, xmm4, xmm2
  9308. vpor xmm6, xmm6, xmm0
  9309. vpor xmm4, xmm4, xmm1
  9310. vpslld xmm0, xmm6, 31
  9311. vpslld xmm1, xmm6, 30
  9312. vpslld xmm2, xmm6, 25
  9313. vpxor xmm0, xmm0, xmm1
  9314. vpxor xmm0, xmm0, xmm2
  9315. vmovdqa xmm1, xmm0
  9316. vpsrldq xmm1, xmm1, 4
  9317. vpslldq xmm0, xmm0, 12
  9318. vpxor xmm6, xmm6, xmm0
  9319. vpsrld xmm2, xmm6, 1
  9320. vpsrld xmm3, xmm6, 2
  9321. vpsrld xmm0, xmm6, 7
  9322. vpxor xmm2, xmm2, xmm3
  9323. vpxor xmm2, xmm2, xmm0
  9324. vpxor xmm2, xmm2, xmm1
  9325. vpxor xmm2, xmm2, xmm6
  9326. vpxor xmm4, xmm4, xmm2
  9327. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9328. ; Encrypt counter
  9329. vmovdqa xmm7, OWORD PTR [rdi]
  9330. vpxor xmm7, xmm7, xmm4
  9331. vaesenc xmm7, xmm7, [rdi+16]
  9332. vaesenc xmm7, xmm7, [rdi+32]
  9333. vaesenc xmm7, xmm7, [rdi+48]
  9334. vaesenc xmm7, xmm7, [rdi+64]
  9335. vaesenc xmm7, xmm7, [rdi+80]
  9336. vaesenc xmm7, xmm7, [rdi+96]
  9337. vaesenc xmm7, xmm7, [rdi+112]
  9338. vaesenc xmm7, xmm7, [rdi+128]
  9339. vaesenc xmm7, xmm7, [rdi+144]
  9340. cmp esi, 11
  9341. vmovdqa xmm8, OWORD PTR [rdi+160]
  9342. jl L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last
  9343. vaesenc xmm7, xmm7, xmm8
  9344. vaesenc xmm7, xmm7, [rdi+176]
  9345. cmp esi, 13
  9346. vmovdqa xmm8, OWORD PTR [rdi+192]
  9347. jl L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last
  9348. vaesenc xmm7, xmm7, xmm8
  9349. vaesenc xmm7, xmm7, [rdi+208]
  9350. vmovdqa xmm8, OWORD PTR [rdi+224]
  9351. L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last:
  9352. vaesenclast xmm7, xmm7, xmm8
  9353. vmovdqu xmm15, xmm7
  9354. L_AES_GCM_init_avx1_iv_done:
  9355. vmovdqa OWORD PTR [r9], xmm15
  9356. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  9357. vpaddd xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_one
  9358. vmovdqa OWORD PTR [rax], xmm5
  9359. vmovdqa OWORD PTR [r8], xmm4
  9360. vzeroupper
  9361. vmovdqu xmm6, OWORD PTR [rsp+16]
  9362. vmovdqu xmm7, OWORD PTR [rsp+32]
  9363. vmovdqu xmm8, OWORD PTR [rsp+48]
  9364. vmovdqu xmm15, OWORD PTR [rsp+64]
  9365. add rsp, 80
  9366. pop r13
  9367. pop r12
  9368. pop rsi
  9369. pop rdi
  9370. ret
  9371. AES_GCM_init_avx1 ENDP
  9372. _text ENDS
  9373. _text SEGMENT READONLY PARA
  9374. AES_GCM_aad_update_avx1 PROC
  9375. mov rax, rcx
  9376. sub rsp, 32
  9377. vmovdqu OWORD PTR [rsp], xmm6
  9378. vmovdqu OWORD PTR [rsp+16], xmm7
  9379. vmovdqa xmm5, OWORD PTR [r8]
  9380. vmovdqa xmm6, OWORD PTR [r9]
  9381. xor ecx, ecx
  9382. L_AES_GCM_aad_update_avx1_16_loop:
  9383. vmovdqu xmm7, OWORD PTR [rax+rcx]
  9384. vpshufb xmm7, xmm7, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9385. vpxor xmm5, xmm5, xmm7
  9386. ; ghash_gfmul_avx
  9387. vpshufd xmm1, xmm5, 78
  9388. vpshufd xmm2, xmm6, 78
  9389. vpclmulqdq xmm3, xmm6, xmm5, 17
  9390. vpclmulqdq xmm0, xmm6, xmm5, 0
  9391. vpxor xmm1, xmm1, xmm5
  9392. vpxor xmm2, xmm2, xmm6
  9393. vpclmulqdq xmm1, xmm1, xmm2, 0
  9394. vpxor xmm1, xmm1, xmm0
  9395. vpxor xmm1, xmm1, xmm3
  9396. vmovdqa xmm4, xmm0
  9397. vmovdqa xmm5, xmm3
  9398. vpslldq xmm2, xmm1, 8
  9399. vpsrldq xmm1, xmm1, 8
  9400. vpxor xmm4, xmm4, xmm2
  9401. vpxor xmm5, xmm5, xmm1
  9402. vpsrld xmm0, xmm4, 31
  9403. vpsrld xmm1, xmm5, 31
  9404. vpslld xmm4, xmm4, 1
  9405. vpslld xmm5, xmm5, 1
  9406. vpsrldq xmm2, xmm0, 12
  9407. vpslldq xmm0, xmm0, 4
  9408. vpslldq xmm1, xmm1, 4
  9409. vpor xmm5, xmm5, xmm2
  9410. vpor xmm4, xmm4, xmm0
  9411. vpor xmm5, xmm5, xmm1
  9412. vpslld xmm0, xmm4, 31
  9413. vpslld xmm1, xmm4, 30
  9414. vpslld xmm2, xmm4, 25
  9415. vpxor xmm0, xmm0, xmm1
  9416. vpxor xmm0, xmm0, xmm2
  9417. vmovdqa xmm1, xmm0
  9418. vpsrldq xmm1, xmm1, 4
  9419. vpslldq xmm0, xmm0, 12
  9420. vpxor xmm4, xmm4, xmm0
  9421. vpsrld xmm2, xmm4, 1
  9422. vpsrld xmm3, xmm4, 2
  9423. vpsrld xmm0, xmm4, 7
  9424. vpxor xmm2, xmm2, xmm3
  9425. vpxor xmm2, xmm2, xmm0
  9426. vpxor xmm2, xmm2, xmm1
  9427. vpxor xmm2, xmm2, xmm4
  9428. vpxor xmm5, xmm5, xmm2
  9429. add ecx, 16
  9430. cmp ecx, edx
  9431. jl L_AES_GCM_aad_update_avx1_16_loop
  9432. vmovdqa OWORD PTR [r8], xmm5
  9433. vzeroupper
  9434. vmovdqu xmm6, OWORD PTR [rsp]
  9435. vmovdqu xmm7, OWORD PTR [rsp+16]
  9436. add rsp, 32
  9437. ret
  9438. AES_GCM_aad_update_avx1 ENDP
  9439. _text ENDS
  9440. _text SEGMENT READONLY PARA
  9441. AES_GCM_encrypt_block_avx1 PROC
  9442. mov r10, r8
  9443. mov r11, r9
  9444. mov rax, QWORD PTR [rsp+40]
  9445. vmovdqu xmm1, OWORD PTR [rax]
  9446. vpshufb xmm0, xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  9447. vpaddd xmm1, xmm1, OWORD PTR L_avx1_aes_gcm_one
  9448. vmovdqu OWORD PTR [rax], xmm1
  9449. vpxor xmm0, xmm0, [rcx]
  9450. vaesenc xmm0, xmm0, [rcx+16]
  9451. vaesenc xmm0, xmm0, [rcx+32]
  9452. vaesenc xmm0, xmm0, [rcx+48]
  9453. vaesenc xmm0, xmm0, [rcx+64]
  9454. vaesenc xmm0, xmm0, [rcx+80]
  9455. vaesenc xmm0, xmm0, [rcx+96]
  9456. vaesenc xmm0, xmm0, [rcx+112]
  9457. vaesenc xmm0, xmm0, [rcx+128]
  9458. vaesenc xmm0, xmm0, [rcx+144]
  9459. cmp edx, 11
  9460. vmovdqa xmm1, OWORD PTR [rcx+160]
  9461. jl L_AES_GCM_encrypt_block_avx1_aesenc_block_last
  9462. vaesenc xmm0, xmm0, xmm1
  9463. vaesenc xmm0, xmm0, [rcx+176]
  9464. cmp edx, 13
  9465. vmovdqa xmm1, OWORD PTR [rcx+192]
  9466. jl L_AES_GCM_encrypt_block_avx1_aesenc_block_last
  9467. vaesenc xmm0, xmm0, xmm1
  9468. vaesenc xmm0, xmm0, [rcx+208]
  9469. vmovdqa xmm1, OWORD PTR [rcx+224]
  9470. L_AES_GCM_encrypt_block_avx1_aesenc_block_last:
  9471. vaesenclast xmm0, xmm0, xmm1
  9472. vmovdqu xmm1, OWORD PTR [r11]
  9473. vpxor xmm0, xmm0, xmm1
  9474. vmovdqu OWORD PTR [r10], xmm0
  9475. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9476. vzeroupper
  9477. ret
  9478. AES_GCM_encrypt_block_avx1 ENDP
  9479. _text ENDS
  9480. _text SEGMENT READONLY PARA
  9481. AES_GCM_ghash_block_avx1 PROC
  9482. sub rsp, 32
  9483. vmovdqu OWORD PTR [rsp], xmm6
  9484. vmovdqu OWORD PTR [rsp+16], xmm7
  9485. vmovdqa xmm4, OWORD PTR [rdx]
  9486. vmovdqa xmm5, OWORD PTR [r8]
  9487. vmovdqu xmm7, OWORD PTR [rcx]
  9488. vpshufb xmm7, xmm7, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9489. vpxor xmm4, xmm4, xmm7
  9490. ; ghash_gfmul_avx
  9491. vpshufd xmm1, xmm4, 78
  9492. vpshufd xmm2, xmm5, 78
  9493. vpclmulqdq xmm3, xmm5, xmm4, 17
  9494. vpclmulqdq xmm0, xmm5, xmm4, 0
  9495. vpxor xmm1, xmm1, xmm4
  9496. vpxor xmm2, xmm2, xmm5
  9497. vpclmulqdq xmm1, xmm1, xmm2, 0
  9498. vpxor xmm1, xmm1, xmm0
  9499. vpxor xmm1, xmm1, xmm3
  9500. vmovdqa xmm6, xmm0
  9501. vmovdqa xmm4, xmm3
  9502. vpslldq xmm2, xmm1, 8
  9503. vpsrldq xmm1, xmm1, 8
  9504. vpxor xmm6, xmm6, xmm2
  9505. vpxor xmm4, xmm4, xmm1
  9506. vpsrld xmm0, xmm6, 31
  9507. vpsrld xmm1, xmm4, 31
  9508. vpslld xmm6, xmm6, 1
  9509. vpslld xmm4, xmm4, 1
  9510. vpsrldq xmm2, xmm0, 12
  9511. vpslldq xmm0, xmm0, 4
  9512. vpslldq xmm1, xmm1, 4
  9513. vpor xmm4, xmm4, xmm2
  9514. vpor xmm6, xmm6, xmm0
  9515. vpor xmm4, xmm4, xmm1
  9516. vpslld xmm0, xmm6, 31
  9517. vpslld xmm1, xmm6, 30
  9518. vpslld xmm2, xmm6, 25
  9519. vpxor xmm0, xmm0, xmm1
  9520. vpxor xmm0, xmm0, xmm2
  9521. vmovdqa xmm1, xmm0
  9522. vpsrldq xmm1, xmm1, 4
  9523. vpslldq xmm0, xmm0, 12
  9524. vpxor xmm6, xmm6, xmm0
  9525. vpsrld xmm2, xmm6, 1
  9526. vpsrld xmm3, xmm6, 2
  9527. vpsrld xmm0, xmm6, 7
  9528. vpxor xmm2, xmm2, xmm3
  9529. vpxor xmm2, xmm2, xmm0
  9530. vpxor xmm2, xmm2, xmm1
  9531. vpxor xmm2, xmm2, xmm6
  9532. vpxor xmm4, xmm4, xmm2
  9533. vmovdqa OWORD PTR [rdx], xmm4
  9534. vzeroupper
  9535. vmovdqu xmm6, OWORD PTR [rsp]
  9536. vmovdqu xmm7, OWORD PTR [rsp+16]
  9537. add rsp, 32
  9538. ret
  9539. AES_GCM_ghash_block_avx1 ENDP
  9540. _text ENDS
  9541. _text SEGMENT READONLY PARA
  9542. AES_GCM_encrypt_update_avx1 PROC
  9543. push r13
  9544. push r12
  9545. push r14
  9546. push r15
  9547. push rdi
  9548. mov rax, rcx
  9549. mov r10, r8
  9550. mov r8d, edx
  9551. mov r11, r9
  9552. mov r9d, DWORD PTR [rsp+80]
  9553. mov r12, QWORD PTR [rsp+88]
  9554. mov r14, QWORD PTR [rsp+96]
  9555. mov r15, QWORD PTR [rsp+104]
  9556. sub rsp, 320
  9557. vmovdqu OWORD PTR [rsp+160], xmm6
  9558. vmovdqu OWORD PTR [rsp+176], xmm7
  9559. vmovdqu OWORD PTR [rsp+192], xmm8
  9560. vmovdqu OWORD PTR [rsp+208], xmm9
  9561. vmovdqu OWORD PTR [rsp+224], xmm10
  9562. vmovdqu OWORD PTR [rsp+240], xmm11
  9563. vmovdqu OWORD PTR [rsp+256], xmm12
  9564. vmovdqu OWORD PTR [rsp+272], xmm13
  9565. vmovdqu OWORD PTR [rsp+288], xmm14
  9566. vmovdqu OWORD PTR [rsp+304], xmm15
  9567. vmovdqa xmm6, OWORD PTR [r12]
  9568. vmovdqa xmm5, OWORD PTR [r14]
  9569. vpsrlq xmm9, xmm5, 63
  9570. vpsllq xmm8, xmm5, 1
  9571. vpslldq xmm9, xmm9, 8
  9572. vpor xmm8, xmm8, xmm9
  9573. vpshufd xmm5, xmm5, 255
  9574. vpsrad xmm5, xmm5, 31
  9575. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  9576. vpxor xmm5, xmm5, xmm8
  9577. xor edi, edi
  9578. cmp r9d, 128
  9579. mov r13d, r9d
  9580. jl L_AES_GCM_encrypt_update_avx1_done_128
  9581. and r13d, 4294967168
  9582. vmovdqa xmm2, xmm6
  9583. ; H ^ 1
  9584. vmovdqu OWORD PTR [rsp], xmm5
  9585. ; H ^ 2
  9586. vpclmulqdq xmm8, xmm5, xmm5, 0
  9587. vpclmulqdq xmm0, xmm5, xmm5, 17
  9588. vpslld xmm12, xmm8, 31
  9589. vpslld xmm13, xmm8, 30
  9590. vpslld xmm14, xmm8, 25
  9591. vpxor xmm12, xmm12, xmm13
  9592. vpxor xmm12, xmm12, xmm14
  9593. vpsrldq xmm13, xmm12, 4
  9594. vpslldq xmm12, xmm12, 12
  9595. vpxor xmm8, xmm8, xmm12
  9596. vpsrld xmm14, xmm8, 1
  9597. vpsrld xmm10, xmm8, 2
  9598. vpsrld xmm9, xmm8, 7
  9599. vpxor xmm14, xmm14, xmm10
  9600. vpxor xmm14, xmm14, xmm9
  9601. vpxor xmm14, xmm14, xmm13
  9602. vpxor xmm14, xmm14, xmm8
  9603. vpxor xmm0, xmm0, xmm14
  9604. vmovdqu OWORD PTR [rsp+16], xmm0
  9605. ; H ^ 3
  9606. ; ghash_gfmul_red_avx
  9607. vpshufd xmm9, xmm5, 78
  9608. vpshufd xmm10, xmm0, 78
  9609. vpclmulqdq xmm11, xmm0, xmm5, 17
  9610. vpclmulqdq xmm8, xmm0, xmm5, 0
  9611. vpxor xmm9, xmm9, xmm5
  9612. vpxor xmm10, xmm10, xmm0
  9613. vpclmulqdq xmm9, xmm9, xmm10, 0
  9614. vpxor xmm9, xmm9, xmm8
  9615. vpxor xmm9, xmm9, xmm11
  9616. vpslldq xmm10, xmm9, 8
  9617. vpsrldq xmm9, xmm9, 8
  9618. vpxor xmm8, xmm8, xmm10
  9619. vpxor xmm1, xmm11, xmm9
  9620. vpslld xmm12, xmm8, 31
  9621. vpslld xmm13, xmm8, 30
  9622. vpslld xmm14, xmm8, 25
  9623. vpxor xmm12, xmm12, xmm13
  9624. vpxor xmm12, xmm12, xmm14
  9625. vpsrldq xmm13, xmm12, 4
  9626. vpslldq xmm12, xmm12, 12
  9627. vpxor xmm8, xmm8, xmm12
  9628. vpsrld xmm14, xmm8, 1
  9629. vpsrld xmm10, xmm8, 2
  9630. vpsrld xmm9, xmm8, 7
  9631. vpxor xmm14, xmm14, xmm10
  9632. vpxor xmm14, xmm14, xmm9
  9633. vpxor xmm14, xmm14, xmm13
  9634. vpxor xmm14, xmm14, xmm8
  9635. vpxor xmm1, xmm1, xmm14
  9636. vmovdqu OWORD PTR [rsp+32], xmm1
  9637. ; H ^ 4
  9638. vpclmulqdq xmm8, xmm0, xmm0, 0
  9639. vpclmulqdq xmm3, xmm0, xmm0, 17
  9640. vpslld xmm12, xmm8, 31
  9641. vpslld xmm13, xmm8, 30
  9642. vpslld xmm14, xmm8, 25
  9643. vpxor xmm12, xmm12, xmm13
  9644. vpxor xmm12, xmm12, xmm14
  9645. vpsrldq xmm13, xmm12, 4
  9646. vpslldq xmm12, xmm12, 12
  9647. vpxor xmm8, xmm8, xmm12
  9648. vpsrld xmm14, xmm8, 1
  9649. vpsrld xmm10, xmm8, 2
  9650. vpsrld xmm9, xmm8, 7
  9651. vpxor xmm14, xmm14, xmm10
  9652. vpxor xmm14, xmm14, xmm9
  9653. vpxor xmm14, xmm14, xmm13
  9654. vpxor xmm14, xmm14, xmm8
  9655. vpxor xmm3, xmm3, xmm14
  9656. vmovdqu OWORD PTR [rsp+48], xmm3
  9657. ; H ^ 5
  9658. ; ghash_gfmul_red_avx
  9659. vpshufd xmm9, xmm0, 78
  9660. vpshufd xmm10, xmm1, 78
  9661. vpclmulqdq xmm11, xmm1, xmm0, 17
  9662. vpclmulqdq xmm8, xmm1, xmm0, 0
  9663. vpxor xmm9, xmm9, xmm0
  9664. vpxor xmm10, xmm10, xmm1
  9665. vpclmulqdq xmm9, xmm9, xmm10, 0
  9666. vpxor xmm9, xmm9, xmm8
  9667. vpxor xmm9, xmm9, xmm11
  9668. vpslldq xmm10, xmm9, 8
  9669. vpsrldq xmm9, xmm9, 8
  9670. vpxor xmm8, xmm8, xmm10
  9671. vpxor xmm7, xmm11, xmm9
  9672. vpslld xmm12, xmm8, 31
  9673. vpslld xmm13, xmm8, 30
  9674. vpslld xmm14, xmm8, 25
  9675. vpxor xmm12, xmm12, xmm13
  9676. vpxor xmm12, xmm12, xmm14
  9677. vpsrldq xmm13, xmm12, 4
  9678. vpslldq xmm12, xmm12, 12
  9679. vpxor xmm8, xmm8, xmm12
  9680. vpsrld xmm14, xmm8, 1
  9681. vpsrld xmm10, xmm8, 2
  9682. vpsrld xmm9, xmm8, 7
  9683. vpxor xmm14, xmm14, xmm10
  9684. vpxor xmm14, xmm14, xmm9
  9685. vpxor xmm14, xmm14, xmm13
  9686. vpxor xmm14, xmm14, xmm8
  9687. vpxor xmm7, xmm7, xmm14
  9688. vmovdqu OWORD PTR [rsp+64], xmm7
  9689. ; H ^ 6
  9690. vpclmulqdq xmm8, xmm1, xmm1, 0
  9691. vpclmulqdq xmm7, xmm1, xmm1, 17
  9692. vpslld xmm12, xmm8, 31
  9693. vpslld xmm13, xmm8, 30
  9694. vpslld xmm14, xmm8, 25
  9695. vpxor xmm12, xmm12, xmm13
  9696. vpxor xmm12, xmm12, xmm14
  9697. vpsrldq xmm13, xmm12, 4
  9698. vpslldq xmm12, xmm12, 12
  9699. vpxor xmm8, xmm8, xmm12
  9700. vpsrld xmm14, xmm8, 1
  9701. vpsrld xmm10, xmm8, 2
  9702. vpsrld xmm9, xmm8, 7
  9703. vpxor xmm14, xmm14, xmm10
  9704. vpxor xmm14, xmm14, xmm9
  9705. vpxor xmm14, xmm14, xmm13
  9706. vpxor xmm14, xmm14, xmm8
  9707. vpxor xmm7, xmm7, xmm14
  9708. vmovdqu OWORD PTR [rsp+80], xmm7
  9709. ; H ^ 7
  9710. ; ghash_gfmul_red_avx
  9711. vpshufd xmm9, xmm1, 78
  9712. vpshufd xmm10, xmm3, 78
  9713. vpclmulqdq xmm11, xmm3, xmm1, 17
  9714. vpclmulqdq xmm8, xmm3, xmm1, 0
  9715. vpxor xmm9, xmm9, xmm1
  9716. vpxor xmm10, xmm10, xmm3
  9717. vpclmulqdq xmm9, xmm9, xmm10, 0
  9718. vpxor xmm9, xmm9, xmm8
  9719. vpxor xmm9, xmm9, xmm11
  9720. vpslldq xmm10, xmm9, 8
  9721. vpsrldq xmm9, xmm9, 8
  9722. vpxor xmm8, xmm8, xmm10
  9723. vpxor xmm7, xmm11, xmm9
  9724. vpslld xmm12, xmm8, 31
  9725. vpslld xmm13, xmm8, 30
  9726. vpslld xmm14, xmm8, 25
  9727. vpxor xmm12, xmm12, xmm13
  9728. vpxor xmm12, xmm12, xmm14
  9729. vpsrldq xmm13, xmm12, 4
  9730. vpslldq xmm12, xmm12, 12
  9731. vpxor xmm8, xmm8, xmm12
  9732. vpsrld xmm14, xmm8, 1
  9733. vpsrld xmm10, xmm8, 2
  9734. vpsrld xmm9, xmm8, 7
  9735. vpxor xmm14, xmm14, xmm10
  9736. vpxor xmm14, xmm14, xmm9
  9737. vpxor xmm14, xmm14, xmm13
  9738. vpxor xmm14, xmm14, xmm8
  9739. vpxor xmm7, xmm7, xmm14
  9740. vmovdqu OWORD PTR [rsp+96], xmm7
  9741. ; H ^ 8
  9742. vpclmulqdq xmm8, xmm3, xmm3, 0
  9743. vpclmulqdq xmm7, xmm3, xmm3, 17
  9744. vpslld xmm12, xmm8, 31
  9745. vpslld xmm13, xmm8, 30
  9746. vpslld xmm14, xmm8, 25
  9747. vpxor xmm12, xmm12, xmm13
  9748. vpxor xmm12, xmm12, xmm14
  9749. vpsrldq xmm13, xmm12, 4
  9750. vpslldq xmm12, xmm12, 12
  9751. vpxor xmm8, xmm8, xmm12
  9752. vpsrld xmm14, xmm8, 1
  9753. vpsrld xmm10, xmm8, 2
  9754. vpsrld xmm9, xmm8, 7
  9755. vpxor xmm14, xmm14, xmm10
  9756. vpxor xmm14, xmm14, xmm9
  9757. vpxor xmm14, xmm14, xmm13
  9758. vpxor xmm14, xmm14, xmm8
  9759. vpxor xmm7, xmm7, xmm14
  9760. vmovdqu OWORD PTR [rsp+112], xmm7
  9761. ; First 128 bytes of input
  9762. vmovdqu xmm0, OWORD PTR [r15]
  9763. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  9764. vpshufb xmm8, xmm0, xmm1
  9765. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  9766. vpshufb xmm9, xmm9, xmm1
  9767. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  9768. vpshufb xmm10, xmm10, xmm1
  9769. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  9770. vpshufb xmm11, xmm11, xmm1
  9771. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  9772. vpshufb xmm12, xmm12, xmm1
  9773. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  9774. vpshufb xmm13, xmm13, xmm1
  9775. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  9776. vpshufb xmm14, xmm14, xmm1
  9777. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  9778. vpshufb xmm15, xmm15, xmm1
  9779. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  9780. vmovdqa xmm7, OWORD PTR [rax]
  9781. vmovdqu OWORD PTR [r15], xmm0
  9782. vpxor xmm8, xmm8, xmm7
  9783. vpxor xmm9, xmm9, xmm7
  9784. vpxor xmm10, xmm10, xmm7
  9785. vpxor xmm11, xmm11, xmm7
  9786. vpxor xmm12, xmm12, xmm7
  9787. vpxor xmm13, xmm13, xmm7
  9788. vpxor xmm14, xmm14, xmm7
  9789. vpxor xmm15, xmm15, xmm7
  9790. vmovdqa xmm7, OWORD PTR [rax+16]
  9791. vaesenc xmm8, xmm8, xmm7
  9792. vaesenc xmm9, xmm9, xmm7
  9793. vaesenc xmm10, xmm10, xmm7
  9794. vaesenc xmm11, xmm11, xmm7
  9795. vaesenc xmm12, xmm12, xmm7
  9796. vaesenc xmm13, xmm13, xmm7
  9797. vaesenc xmm14, xmm14, xmm7
  9798. vaesenc xmm15, xmm15, xmm7
  9799. vmovdqa xmm7, OWORD PTR [rax+32]
  9800. vaesenc xmm8, xmm8, xmm7
  9801. vaesenc xmm9, xmm9, xmm7
  9802. vaesenc xmm10, xmm10, xmm7
  9803. vaesenc xmm11, xmm11, xmm7
  9804. vaesenc xmm12, xmm12, xmm7
  9805. vaesenc xmm13, xmm13, xmm7
  9806. vaesenc xmm14, xmm14, xmm7
  9807. vaesenc xmm15, xmm15, xmm7
  9808. vmovdqa xmm7, OWORD PTR [rax+48]
  9809. vaesenc xmm8, xmm8, xmm7
  9810. vaesenc xmm9, xmm9, xmm7
  9811. vaesenc xmm10, xmm10, xmm7
  9812. vaesenc xmm11, xmm11, xmm7
  9813. vaesenc xmm12, xmm12, xmm7
  9814. vaesenc xmm13, xmm13, xmm7
  9815. vaesenc xmm14, xmm14, xmm7
  9816. vaesenc xmm15, xmm15, xmm7
  9817. vmovdqa xmm7, OWORD PTR [rax+64]
  9818. vaesenc xmm8, xmm8, xmm7
  9819. vaesenc xmm9, xmm9, xmm7
  9820. vaesenc xmm10, xmm10, xmm7
  9821. vaesenc xmm11, xmm11, xmm7
  9822. vaesenc xmm12, xmm12, xmm7
  9823. vaesenc xmm13, xmm13, xmm7
  9824. vaesenc xmm14, xmm14, xmm7
  9825. vaesenc xmm15, xmm15, xmm7
  9826. vmovdqa xmm7, OWORD PTR [rax+80]
  9827. vaesenc xmm8, xmm8, xmm7
  9828. vaesenc xmm9, xmm9, xmm7
  9829. vaesenc xmm10, xmm10, xmm7
  9830. vaesenc xmm11, xmm11, xmm7
  9831. vaesenc xmm12, xmm12, xmm7
  9832. vaesenc xmm13, xmm13, xmm7
  9833. vaesenc xmm14, xmm14, xmm7
  9834. vaesenc xmm15, xmm15, xmm7
  9835. vmovdqa xmm7, OWORD PTR [rax+96]
  9836. vaesenc xmm8, xmm8, xmm7
  9837. vaesenc xmm9, xmm9, xmm7
  9838. vaesenc xmm10, xmm10, xmm7
  9839. vaesenc xmm11, xmm11, xmm7
  9840. vaesenc xmm12, xmm12, xmm7
  9841. vaesenc xmm13, xmm13, xmm7
  9842. vaesenc xmm14, xmm14, xmm7
  9843. vaesenc xmm15, xmm15, xmm7
  9844. vmovdqa xmm7, OWORD PTR [rax+112]
  9845. vaesenc xmm8, xmm8, xmm7
  9846. vaesenc xmm9, xmm9, xmm7
  9847. vaesenc xmm10, xmm10, xmm7
  9848. vaesenc xmm11, xmm11, xmm7
  9849. vaesenc xmm12, xmm12, xmm7
  9850. vaesenc xmm13, xmm13, xmm7
  9851. vaesenc xmm14, xmm14, xmm7
  9852. vaesenc xmm15, xmm15, xmm7
  9853. vmovdqa xmm7, OWORD PTR [rax+128]
  9854. vaesenc xmm8, xmm8, xmm7
  9855. vaesenc xmm9, xmm9, xmm7
  9856. vaesenc xmm10, xmm10, xmm7
  9857. vaesenc xmm11, xmm11, xmm7
  9858. vaesenc xmm12, xmm12, xmm7
  9859. vaesenc xmm13, xmm13, xmm7
  9860. vaesenc xmm14, xmm14, xmm7
  9861. vaesenc xmm15, xmm15, xmm7
  9862. vmovdqa xmm7, OWORD PTR [rax+144]
  9863. vaesenc xmm8, xmm8, xmm7
  9864. vaesenc xmm9, xmm9, xmm7
  9865. vaesenc xmm10, xmm10, xmm7
  9866. vaesenc xmm11, xmm11, xmm7
  9867. vaesenc xmm12, xmm12, xmm7
  9868. vaesenc xmm13, xmm13, xmm7
  9869. vaesenc xmm14, xmm14, xmm7
  9870. vaesenc xmm15, xmm15, xmm7
  9871. cmp r8d, 11
  9872. vmovdqa xmm7, OWORD PTR [rax+160]
  9873. jl L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done
  9874. vaesenc xmm8, xmm8, xmm7
  9875. vaesenc xmm9, xmm9, xmm7
  9876. vaesenc xmm10, xmm10, xmm7
  9877. vaesenc xmm11, xmm11, xmm7
  9878. vaesenc xmm12, xmm12, xmm7
  9879. vaesenc xmm13, xmm13, xmm7
  9880. vaesenc xmm14, xmm14, xmm7
  9881. vaesenc xmm15, xmm15, xmm7
  9882. vmovdqa xmm7, OWORD PTR [rax+176]
  9883. vaesenc xmm8, xmm8, xmm7
  9884. vaesenc xmm9, xmm9, xmm7
  9885. vaesenc xmm10, xmm10, xmm7
  9886. vaesenc xmm11, xmm11, xmm7
  9887. vaesenc xmm12, xmm12, xmm7
  9888. vaesenc xmm13, xmm13, xmm7
  9889. vaesenc xmm14, xmm14, xmm7
  9890. vaesenc xmm15, xmm15, xmm7
  9891. cmp r8d, 13
  9892. vmovdqa xmm7, OWORD PTR [rax+192]
  9893. jl L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done
  9894. vaesenc xmm8, xmm8, xmm7
  9895. vaesenc xmm9, xmm9, xmm7
  9896. vaesenc xmm10, xmm10, xmm7
  9897. vaesenc xmm11, xmm11, xmm7
  9898. vaesenc xmm12, xmm12, xmm7
  9899. vaesenc xmm13, xmm13, xmm7
  9900. vaesenc xmm14, xmm14, xmm7
  9901. vaesenc xmm15, xmm15, xmm7
  9902. vmovdqa xmm7, OWORD PTR [rax+208]
  9903. vaesenc xmm8, xmm8, xmm7
  9904. vaesenc xmm9, xmm9, xmm7
  9905. vaesenc xmm10, xmm10, xmm7
  9906. vaesenc xmm11, xmm11, xmm7
  9907. vaesenc xmm12, xmm12, xmm7
  9908. vaesenc xmm13, xmm13, xmm7
  9909. vaesenc xmm14, xmm14, xmm7
  9910. vaesenc xmm15, xmm15, xmm7
  9911. vmovdqa xmm7, OWORD PTR [rax+224]
  9912. L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done:
  9913. vaesenclast xmm8, xmm8, xmm7
  9914. vaesenclast xmm9, xmm9, xmm7
  9915. vmovdqu xmm0, OWORD PTR [r11]
  9916. vmovdqu xmm1, OWORD PTR [r11+16]
  9917. vpxor xmm8, xmm8, xmm0
  9918. vpxor xmm9, xmm9, xmm1
  9919. vmovdqu OWORD PTR [r10], xmm8
  9920. vmovdqu OWORD PTR [r10+16], xmm9
  9921. vaesenclast xmm10, xmm10, xmm7
  9922. vaesenclast xmm11, xmm11, xmm7
  9923. vmovdqu xmm0, OWORD PTR [r11+32]
  9924. vmovdqu xmm1, OWORD PTR [r11+48]
  9925. vpxor xmm10, xmm10, xmm0
  9926. vpxor xmm11, xmm11, xmm1
  9927. vmovdqu OWORD PTR [r10+32], xmm10
  9928. vmovdqu OWORD PTR [r10+48], xmm11
  9929. vaesenclast xmm12, xmm12, xmm7
  9930. vaesenclast xmm13, xmm13, xmm7
  9931. vmovdqu xmm0, OWORD PTR [r11+64]
  9932. vmovdqu xmm1, OWORD PTR [r11+80]
  9933. vpxor xmm12, xmm12, xmm0
  9934. vpxor xmm13, xmm13, xmm1
  9935. vmovdqu OWORD PTR [r10+64], xmm12
  9936. vmovdqu OWORD PTR [r10+80], xmm13
  9937. vaesenclast xmm14, xmm14, xmm7
  9938. vaesenclast xmm15, xmm15, xmm7
  9939. vmovdqu xmm0, OWORD PTR [r11+96]
  9940. vmovdqu xmm1, OWORD PTR [r11+112]
  9941. vpxor xmm14, xmm14, xmm0
  9942. vpxor xmm15, xmm15, xmm1
  9943. vmovdqu OWORD PTR [r10+96], xmm14
  9944. vmovdqu OWORD PTR [r10+112], xmm15
  9945. cmp r13d, 128
  9946. mov edi, 128
  9947. jle L_AES_GCM_encrypt_update_avx1_end_128
  9948. ; More 128 bytes of input
  9949. L_AES_GCM_encrypt_update_avx1_ghash_128:
  9950. lea rcx, QWORD PTR [r11+rdi]
  9951. lea rdx, QWORD PTR [r10+rdi]
  9952. vmovdqu xmm0, OWORD PTR [r15]
  9953. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  9954. vpshufb xmm8, xmm0, xmm1
  9955. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  9956. vpshufb xmm9, xmm9, xmm1
  9957. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  9958. vpshufb xmm10, xmm10, xmm1
  9959. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  9960. vpshufb xmm11, xmm11, xmm1
  9961. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  9962. vpshufb xmm12, xmm12, xmm1
  9963. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  9964. vpshufb xmm13, xmm13, xmm1
  9965. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  9966. vpshufb xmm14, xmm14, xmm1
  9967. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  9968. vpshufb xmm15, xmm15, xmm1
  9969. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  9970. vmovdqa xmm7, OWORD PTR [rax]
  9971. vmovdqu OWORD PTR [r15], xmm0
  9972. vpxor xmm8, xmm8, xmm7
  9973. vpxor xmm9, xmm9, xmm7
  9974. vpxor xmm10, xmm10, xmm7
  9975. vpxor xmm11, xmm11, xmm7
  9976. vpxor xmm12, xmm12, xmm7
  9977. vpxor xmm13, xmm13, xmm7
  9978. vpxor xmm14, xmm14, xmm7
  9979. vpxor xmm15, xmm15, xmm7
  9980. vmovdqu xmm7, OWORD PTR [rsp+112]
  9981. vmovdqu xmm0, OWORD PTR [rdx+-128]
  9982. vaesenc xmm8, xmm8, [rax+16]
  9983. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9984. vpxor xmm0, xmm0, xmm2
  9985. vpshufd xmm1, xmm7, 78
  9986. vpshufd xmm5, xmm0, 78
  9987. vpxor xmm1, xmm1, xmm7
  9988. vpxor xmm5, xmm5, xmm0
  9989. vpclmulqdq xmm3, xmm0, xmm7, 17
  9990. vaesenc xmm9, xmm9, [rax+16]
  9991. vaesenc xmm10, xmm10, [rax+16]
  9992. vpclmulqdq xmm2, xmm0, xmm7, 0
  9993. vaesenc xmm11, xmm11, [rax+16]
  9994. vaesenc xmm12, xmm12, [rax+16]
  9995. vpclmulqdq xmm1, xmm1, xmm5, 0
  9996. vaesenc xmm13, xmm13, [rax+16]
  9997. vaesenc xmm14, xmm14, [rax+16]
  9998. vaesenc xmm15, xmm15, [rax+16]
  9999. vpxor xmm1, xmm1, xmm2
  10000. vpxor xmm1, xmm1, xmm3
  10001. vmovdqu xmm7, OWORD PTR [rsp+96]
  10002. vmovdqu xmm0, OWORD PTR [rdx+-112]
  10003. vpshufd xmm4, xmm7, 78
  10004. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10005. vaesenc xmm8, xmm8, [rax+32]
  10006. vpxor xmm4, xmm4, xmm7
  10007. vpshufd xmm5, xmm0, 78
  10008. vpxor xmm5, xmm5, xmm0
  10009. vpclmulqdq xmm6, xmm0, xmm7, 17
  10010. vaesenc xmm9, xmm9, [rax+32]
  10011. vaesenc xmm10, xmm10, [rax+32]
  10012. vpclmulqdq xmm7, xmm0, xmm7, 0
  10013. vaesenc xmm11, xmm11, [rax+32]
  10014. vaesenc xmm12, xmm12, [rax+32]
  10015. vpclmulqdq xmm4, xmm4, xmm5, 0
  10016. vaesenc xmm13, xmm13, [rax+32]
  10017. vaesenc xmm14, xmm14, [rax+32]
  10018. vaesenc xmm15, xmm15, [rax+32]
  10019. vpxor xmm1, xmm1, xmm7
  10020. vpxor xmm2, xmm2, xmm7
  10021. vpxor xmm1, xmm1, xmm6
  10022. vpxor xmm3, xmm3, xmm6
  10023. vpxor xmm1, xmm1, xmm4
  10024. vmovdqu xmm7, OWORD PTR [rsp+80]
  10025. vmovdqu xmm0, OWORD PTR [rdx+-96]
  10026. vpshufd xmm4, xmm7, 78
  10027. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10028. vaesenc xmm8, xmm8, [rax+48]
  10029. vpxor xmm4, xmm4, xmm7
  10030. vpshufd xmm5, xmm0, 78
  10031. vpxor xmm5, xmm5, xmm0
  10032. vpclmulqdq xmm6, xmm0, xmm7, 17
  10033. vaesenc xmm9, xmm9, [rax+48]
  10034. vaesenc xmm10, xmm10, [rax+48]
  10035. vpclmulqdq xmm7, xmm0, xmm7, 0
  10036. vaesenc xmm11, xmm11, [rax+48]
  10037. vaesenc xmm12, xmm12, [rax+48]
  10038. vpclmulqdq xmm4, xmm4, xmm5, 0
  10039. vaesenc xmm13, xmm13, [rax+48]
  10040. vaesenc xmm14, xmm14, [rax+48]
  10041. vaesenc xmm15, xmm15, [rax+48]
  10042. vpxor xmm1, xmm1, xmm7
  10043. vpxor xmm2, xmm2, xmm7
  10044. vpxor xmm1, xmm1, xmm6
  10045. vpxor xmm3, xmm3, xmm6
  10046. vpxor xmm1, xmm1, xmm4
  10047. vmovdqu xmm7, OWORD PTR [rsp+64]
  10048. vmovdqu xmm0, OWORD PTR [rdx+-80]
  10049. vpshufd xmm4, xmm7, 78
  10050. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10051. vaesenc xmm8, xmm8, [rax+64]
  10052. vpxor xmm4, xmm4, xmm7
  10053. vpshufd xmm5, xmm0, 78
  10054. vpxor xmm5, xmm5, xmm0
  10055. vpclmulqdq xmm6, xmm0, xmm7, 17
  10056. vaesenc xmm9, xmm9, [rax+64]
  10057. vaesenc xmm10, xmm10, [rax+64]
  10058. vpclmulqdq xmm7, xmm0, xmm7, 0
  10059. vaesenc xmm11, xmm11, [rax+64]
  10060. vaesenc xmm12, xmm12, [rax+64]
  10061. vpclmulqdq xmm4, xmm4, xmm5, 0
  10062. vaesenc xmm13, xmm13, [rax+64]
  10063. vaesenc xmm14, xmm14, [rax+64]
  10064. vaesenc xmm15, xmm15, [rax+64]
  10065. vpxor xmm1, xmm1, xmm7
  10066. vpxor xmm2, xmm2, xmm7
  10067. vpxor xmm1, xmm1, xmm6
  10068. vpxor xmm3, xmm3, xmm6
  10069. vpxor xmm1, xmm1, xmm4
  10070. vmovdqu xmm7, OWORD PTR [rsp+48]
  10071. vmovdqu xmm0, OWORD PTR [rdx+-64]
  10072. vpshufd xmm4, xmm7, 78
  10073. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10074. vaesenc xmm8, xmm8, [rax+80]
  10075. vpxor xmm4, xmm4, xmm7
  10076. vpshufd xmm5, xmm0, 78
  10077. vpxor xmm5, xmm5, xmm0
  10078. vpclmulqdq xmm6, xmm0, xmm7, 17
  10079. vaesenc xmm9, xmm9, [rax+80]
  10080. vaesenc xmm10, xmm10, [rax+80]
  10081. vpclmulqdq xmm7, xmm0, xmm7, 0
  10082. vaesenc xmm11, xmm11, [rax+80]
  10083. vaesenc xmm12, xmm12, [rax+80]
  10084. vpclmulqdq xmm4, xmm4, xmm5, 0
  10085. vaesenc xmm13, xmm13, [rax+80]
  10086. vaesenc xmm14, xmm14, [rax+80]
  10087. vaesenc xmm15, xmm15, [rax+80]
  10088. vpxor xmm1, xmm1, xmm7
  10089. vpxor xmm2, xmm2, xmm7
  10090. vpxor xmm1, xmm1, xmm6
  10091. vpxor xmm3, xmm3, xmm6
  10092. vpxor xmm1, xmm1, xmm4
  10093. vmovdqu xmm7, OWORD PTR [rsp+32]
  10094. vmovdqu xmm0, OWORD PTR [rdx+-48]
  10095. vpshufd xmm4, xmm7, 78
  10096. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10097. vaesenc xmm8, xmm8, [rax+96]
  10098. vpxor xmm4, xmm4, xmm7
  10099. vpshufd xmm5, xmm0, 78
  10100. vpxor xmm5, xmm5, xmm0
  10101. vpclmulqdq xmm6, xmm0, xmm7, 17
  10102. vaesenc xmm9, xmm9, [rax+96]
  10103. vaesenc xmm10, xmm10, [rax+96]
  10104. vpclmulqdq xmm7, xmm0, xmm7, 0
  10105. vaesenc xmm11, xmm11, [rax+96]
  10106. vaesenc xmm12, xmm12, [rax+96]
  10107. vpclmulqdq xmm4, xmm4, xmm5, 0
  10108. vaesenc xmm13, xmm13, [rax+96]
  10109. vaesenc xmm14, xmm14, [rax+96]
  10110. vaesenc xmm15, xmm15, [rax+96]
  10111. vpxor xmm1, xmm1, xmm7
  10112. vpxor xmm2, xmm2, xmm7
  10113. vpxor xmm1, xmm1, xmm6
  10114. vpxor xmm3, xmm3, xmm6
  10115. vpxor xmm1, xmm1, xmm4
  10116. vmovdqu xmm7, OWORD PTR [rsp+16]
  10117. vmovdqu xmm0, OWORD PTR [rdx+-32]
  10118. vpshufd xmm4, xmm7, 78
  10119. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10120. vaesenc xmm8, xmm8, [rax+112]
  10121. vpxor xmm4, xmm4, xmm7
  10122. vpshufd xmm5, xmm0, 78
  10123. vpxor xmm5, xmm5, xmm0
  10124. vpclmulqdq xmm6, xmm0, xmm7, 17
  10125. vaesenc xmm9, xmm9, [rax+112]
  10126. vaesenc xmm10, xmm10, [rax+112]
  10127. vpclmulqdq xmm7, xmm0, xmm7, 0
  10128. vaesenc xmm11, xmm11, [rax+112]
  10129. vaesenc xmm12, xmm12, [rax+112]
  10130. vpclmulqdq xmm4, xmm4, xmm5, 0
  10131. vaesenc xmm13, xmm13, [rax+112]
  10132. vaesenc xmm14, xmm14, [rax+112]
  10133. vaesenc xmm15, xmm15, [rax+112]
  10134. vpxor xmm1, xmm1, xmm7
  10135. vpxor xmm2, xmm2, xmm7
  10136. vpxor xmm1, xmm1, xmm6
  10137. vpxor xmm3, xmm3, xmm6
  10138. vpxor xmm1, xmm1, xmm4
  10139. vmovdqu xmm7, OWORD PTR [rsp]
  10140. vmovdqu xmm0, OWORD PTR [rdx+-16]
  10141. vpshufd xmm4, xmm7, 78
  10142. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10143. vaesenc xmm8, xmm8, [rax+128]
  10144. vpxor xmm4, xmm4, xmm7
  10145. vpshufd xmm5, xmm0, 78
  10146. vpxor xmm5, xmm5, xmm0
  10147. vpclmulqdq xmm6, xmm0, xmm7, 17
  10148. vaesenc xmm9, xmm9, [rax+128]
  10149. vaesenc xmm10, xmm10, [rax+128]
  10150. vpclmulqdq xmm7, xmm0, xmm7, 0
  10151. vaesenc xmm11, xmm11, [rax+128]
  10152. vaesenc xmm12, xmm12, [rax+128]
  10153. vpclmulqdq xmm4, xmm4, xmm5, 0
  10154. vaesenc xmm13, xmm13, [rax+128]
  10155. vaesenc xmm14, xmm14, [rax+128]
  10156. vaesenc xmm15, xmm15, [rax+128]
  10157. vpxor xmm1, xmm1, xmm7
  10158. vpxor xmm2, xmm2, xmm7
  10159. vpxor xmm1, xmm1, xmm6
  10160. vpxor xmm3, xmm3, xmm6
  10161. vpxor xmm1, xmm1, xmm4
  10162. vpslldq xmm5, xmm1, 8
  10163. vpsrldq xmm1, xmm1, 8
  10164. vaesenc xmm8, xmm8, [rax+144]
  10165. vpxor xmm2, xmm2, xmm5
  10166. vpxor xmm3, xmm3, xmm1
  10167. vaesenc xmm9, xmm9, [rax+144]
  10168. vpslld xmm7, xmm2, 31
  10169. vpslld xmm4, xmm2, 30
  10170. vpslld xmm5, xmm2, 25
  10171. vaesenc xmm10, xmm10, [rax+144]
  10172. vpxor xmm7, xmm7, xmm4
  10173. vpxor xmm7, xmm7, xmm5
  10174. vaesenc xmm11, xmm11, [rax+144]
  10175. vpsrldq xmm4, xmm7, 4
  10176. vpslldq xmm7, xmm7, 12
  10177. vaesenc xmm12, xmm12, [rax+144]
  10178. vpxor xmm2, xmm2, xmm7
  10179. vpsrld xmm5, xmm2, 1
  10180. vaesenc xmm13, xmm13, [rax+144]
  10181. vpsrld xmm1, xmm2, 2
  10182. vpsrld xmm0, xmm2, 7
  10183. vaesenc xmm14, xmm14, [rax+144]
  10184. vpxor xmm5, xmm5, xmm1
  10185. vpxor xmm5, xmm5, xmm0
  10186. vaesenc xmm15, xmm15, [rax+144]
  10187. vpxor xmm5, xmm5, xmm4
  10188. vpxor xmm2, xmm2, xmm5
  10189. vpxor xmm2, xmm2, xmm3
  10190. cmp r8d, 11
  10191. vmovdqa xmm7, OWORD PTR [rax+160]
  10192. jl L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done
  10193. vaesenc xmm8, xmm8, xmm7
  10194. vaesenc xmm9, xmm9, xmm7
  10195. vaesenc xmm10, xmm10, xmm7
  10196. vaesenc xmm11, xmm11, xmm7
  10197. vaesenc xmm12, xmm12, xmm7
  10198. vaesenc xmm13, xmm13, xmm7
  10199. vaesenc xmm14, xmm14, xmm7
  10200. vaesenc xmm15, xmm15, xmm7
  10201. vmovdqa xmm7, OWORD PTR [rax+176]
  10202. vaesenc xmm8, xmm8, xmm7
  10203. vaesenc xmm9, xmm9, xmm7
  10204. vaesenc xmm10, xmm10, xmm7
  10205. vaesenc xmm11, xmm11, xmm7
  10206. vaesenc xmm12, xmm12, xmm7
  10207. vaesenc xmm13, xmm13, xmm7
  10208. vaesenc xmm14, xmm14, xmm7
  10209. vaesenc xmm15, xmm15, xmm7
  10210. cmp r8d, 13
  10211. vmovdqa xmm7, OWORD PTR [rax+192]
  10212. jl L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done
  10213. vaesenc xmm8, xmm8, xmm7
  10214. vaesenc xmm9, xmm9, xmm7
  10215. vaesenc xmm10, xmm10, xmm7
  10216. vaesenc xmm11, xmm11, xmm7
  10217. vaesenc xmm12, xmm12, xmm7
  10218. vaesenc xmm13, xmm13, xmm7
  10219. vaesenc xmm14, xmm14, xmm7
  10220. vaesenc xmm15, xmm15, xmm7
  10221. vmovdqa xmm7, OWORD PTR [rax+208]
  10222. vaesenc xmm8, xmm8, xmm7
  10223. vaesenc xmm9, xmm9, xmm7
  10224. vaesenc xmm10, xmm10, xmm7
  10225. vaesenc xmm11, xmm11, xmm7
  10226. vaesenc xmm12, xmm12, xmm7
  10227. vaesenc xmm13, xmm13, xmm7
  10228. vaesenc xmm14, xmm14, xmm7
  10229. vaesenc xmm15, xmm15, xmm7
  10230. vmovdqa xmm7, OWORD PTR [rax+224]
  10231. L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done:
  10232. vaesenclast xmm8, xmm8, xmm7
  10233. vaesenclast xmm9, xmm9, xmm7
  10234. vmovdqu xmm0, OWORD PTR [rcx]
  10235. vmovdqu xmm1, OWORD PTR [rcx+16]
  10236. vpxor xmm8, xmm8, xmm0
  10237. vpxor xmm9, xmm9, xmm1
  10238. vmovdqu OWORD PTR [rdx], xmm8
  10239. vmovdqu OWORD PTR [rdx+16], xmm9
  10240. vaesenclast xmm10, xmm10, xmm7
  10241. vaesenclast xmm11, xmm11, xmm7
  10242. vmovdqu xmm0, OWORD PTR [rcx+32]
  10243. vmovdqu xmm1, OWORD PTR [rcx+48]
  10244. vpxor xmm10, xmm10, xmm0
  10245. vpxor xmm11, xmm11, xmm1
  10246. vmovdqu OWORD PTR [rdx+32], xmm10
  10247. vmovdqu OWORD PTR [rdx+48], xmm11
  10248. vaesenclast xmm12, xmm12, xmm7
  10249. vaesenclast xmm13, xmm13, xmm7
  10250. vmovdqu xmm0, OWORD PTR [rcx+64]
  10251. vmovdqu xmm1, OWORD PTR [rcx+80]
  10252. vpxor xmm12, xmm12, xmm0
  10253. vpxor xmm13, xmm13, xmm1
  10254. vmovdqu OWORD PTR [rdx+64], xmm12
  10255. vmovdqu OWORD PTR [rdx+80], xmm13
  10256. vaesenclast xmm14, xmm14, xmm7
  10257. vaesenclast xmm15, xmm15, xmm7
  10258. vmovdqu xmm0, OWORD PTR [rcx+96]
  10259. vmovdqu xmm1, OWORD PTR [rcx+112]
  10260. vpxor xmm14, xmm14, xmm0
  10261. vpxor xmm15, xmm15, xmm1
  10262. vmovdqu OWORD PTR [rdx+96], xmm14
  10263. vmovdqu OWORD PTR [rdx+112], xmm15
  10264. add edi, 128
  10265. cmp edi, r13d
  10266. jl L_AES_GCM_encrypt_update_avx1_ghash_128
  10267. L_AES_GCM_encrypt_update_avx1_end_128:
  10268. vmovdqa xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10269. vpshufb xmm8, xmm8, xmm4
  10270. vpshufb xmm9, xmm9, xmm4
  10271. vpshufb xmm10, xmm10, xmm4
  10272. vpshufb xmm11, xmm11, xmm4
  10273. vpxor xmm8, xmm8, xmm2
  10274. vpshufb xmm12, xmm12, xmm4
  10275. vpshufb xmm13, xmm13, xmm4
  10276. vpshufb xmm14, xmm14, xmm4
  10277. vpshufb xmm15, xmm15, xmm4
  10278. vmovdqu xmm7, OWORD PTR [rsp]
  10279. vmovdqu xmm5, OWORD PTR [rsp+16]
  10280. ; ghash_gfmul_avx
  10281. vpshufd xmm1, xmm15, 78
  10282. vpshufd xmm2, xmm7, 78
  10283. vpclmulqdq xmm3, xmm7, xmm15, 17
  10284. vpclmulqdq xmm0, xmm7, xmm15, 0
  10285. vpxor xmm1, xmm1, xmm15
  10286. vpxor xmm2, xmm2, xmm7
  10287. vpclmulqdq xmm1, xmm1, xmm2, 0
  10288. vpxor xmm1, xmm1, xmm0
  10289. vpxor xmm1, xmm1, xmm3
  10290. vmovdqa xmm4, xmm0
  10291. vmovdqa xmm6, xmm3
  10292. vpslldq xmm2, xmm1, 8
  10293. vpsrldq xmm1, xmm1, 8
  10294. vpxor xmm4, xmm4, xmm2
  10295. vpxor xmm6, xmm6, xmm1
  10296. ; ghash_gfmul_xor_avx
  10297. vpshufd xmm1, xmm14, 78
  10298. vpshufd xmm2, xmm5, 78
  10299. vpclmulqdq xmm3, xmm5, xmm14, 17
  10300. vpclmulqdq xmm0, xmm5, xmm14, 0
  10301. vpxor xmm1, xmm1, xmm14
  10302. vpxor xmm2, xmm2, xmm5
  10303. vpclmulqdq xmm1, xmm1, xmm2, 0
  10304. vpxor xmm1, xmm1, xmm0
  10305. vpxor xmm1, xmm1, xmm3
  10306. vpxor xmm4, xmm4, xmm0
  10307. vpxor xmm6, xmm6, xmm3
  10308. vpslldq xmm2, xmm1, 8
  10309. vpsrldq xmm1, xmm1, 8
  10310. vpxor xmm4, xmm4, xmm2
  10311. vpxor xmm6, xmm6, xmm1
  10312. vmovdqu xmm7, OWORD PTR [rsp+32]
  10313. vmovdqu xmm5, OWORD PTR [rsp+48]
  10314. ; ghash_gfmul_xor_avx
  10315. vpshufd xmm1, xmm13, 78
  10316. vpshufd xmm2, xmm7, 78
  10317. vpclmulqdq xmm3, xmm7, xmm13, 17
  10318. vpclmulqdq xmm0, xmm7, xmm13, 0
  10319. vpxor xmm1, xmm1, xmm13
  10320. vpxor xmm2, xmm2, xmm7
  10321. vpclmulqdq xmm1, xmm1, xmm2, 0
  10322. vpxor xmm1, xmm1, xmm0
  10323. vpxor xmm1, xmm1, xmm3
  10324. vpxor xmm4, xmm4, xmm0
  10325. vpxor xmm6, xmm6, xmm3
  10326. vpslldq xmm2, xmm1, 8
  10327. vpsrldq xmm1, xmm1, 8
  10328. vpxor xmm4, xmm4, xmm2
  10329. vpxor xmm6, xmm6, xmm1
  10330. ; ghash_gfmul_xor_avx
  10331. vpshufd xmm1, xmm12, 78
  10332. vpshufd xmm2, xmm5, 78
  10333. vpclmulqdq xmm3, xmm5, xmm12, 17
  10334. vpclmulqdq xmm0, xmm5, xmm12, 0
  10335. vpxor xmm1, xmm1, xmm12
  10336. vpxor xmm2, xmm2, xmm5
  10337. vpclmulqdq xmm1, xmm1, xmm2, 0
  10338. vpxor xmm1, xmm1, xmm0
  10339. vpxor xmm1, xmm1, xmm3
  10340. vpxor xmm4, xmm4, xmm0
  10341. vpxor xmm6, xmm6, xmm3
  10342. vpslldq xmm2, xmm1, 8
  10343. vpsrldq xmm1, xmm1, 8
  10344. vpxor xmm4, xmm4, xmm2
  10345. vpxor xmm6, xmm6, xmm1
  10346. vmovdqu xmm7, OWORD PTR [rsp+64]
  10347. vmovdqu xmm5, OWORD PTR [rsp+80]
  10348. ; ghash_gfmul_xor_avx
  10349. vpshufd xmm1, xmm11, 78
  10350. vpshufd xmm2, xmm7, 78
  10351. vpclmulqdq xmm3, xmm7, xmm11, 17
  10352. vpclmulqdq xmm0, xmm7, xmm11, 0
  10353. vpxor xmm1, xmm1, xmm11
  10354. vpxor xmm2, xmm2, xmm7
  10355. vpclmulqdq xmm1, xmm1, xmm2, 0
  10356. vpxor xmm1, xmm1, xmm0
  10357. vpxor xmm1, xmm1, xmm3
  10358. vpxor xmm4, xmm4, xmm0
  10359. vpxor xmm6, xmm6, xmm3
  10360. vpslldq xmm2, xmm1, 8
  10361. vpsrldq xmm1, xmm1, 8
  10362. vpxor xmm4, xmm4, xmm2
  10363. vpxor xmm6, xmm6, xmm1
  10364. ; ghash_gfmul_xor_avx
  10365. vpshufd xmm1, xmm10, 78
  10366. vpshufd xmm2, xmm5, 78
  10367. vpclmulqdq xmm3, xmm5, xmm10, 17
  10368. vpclmulqdq xmm0, xmm5, xmm10, 0
  10369. vpxor xmm1, xmm1, xmm10
  10370. vpxor xmm2, xmm2, xmm5
  10371. vpclmulqdq xmm1, xmm1, xmm2, 0
  10372. vpxor xmm1, xmm1, xmm0
  10373. vpxor xmm1, xmm1, xmm3
  10374. vpxor xmm4, xmm4, xmm0
  10375. vpxor xmm6, xmm6, xmm3
  10376. vpslldq xmm2, xmm1, 8
  10377. vpsrldq xmm1, xmm1, 8
  10378. vpxor xmm4, xmm4, xmm2
  10379. vpxor xmm6, xmm6, xmm1
  10380. vmovdqu xmm7, OWORD PTR [rsp+96]
  10381. vmovdqu xmm5, OWORD PTR [rsp+112]
  10382. ; ghash_gfmul_xor_avx
  10383. vpshufd xmm1, xmm9, 78
  10384. vpshufd xmm2, xmm7, 78
  10385. vpclmulqdq xmm3, xmm7, xmm9, 17
  10386. vpclmulqdq xmm0, xmm7, xmm9, 0
  10387. vpxor xmm1, xmm1, xmm9
  10388. vpxor xmm2, xmm2, xmm7
  10389. vpclmulqdq xmm1, xmm1, xmm2, 0
  10390. vpxor xmm1, xmm1, xmm0
  10391. vpxor xmm1, xmm1, xmm3
  10392. vpxor xmm4, xmm4, xmm0
  10393. vpxor xmm6, xmm6, xmm3
  10394. vpslldq xmm2, xmm1, 8
  10395. vpsrldq xmm1, xmm1, 8
  10396. vpxor xmm4, xmm4, xmm2
  10397. vpxor xmm6, xmm6, xmm1
  10398. ; ghash_gfmul_xor_avx
  10399. vpshufd xmm1, xmm8, 78
  10400. vpshufd xmm2, xmm5, 78
  10401. vpclmulqdq xmm3, xmm5, xmm8, 17
  10402. vpclmulqdq xmm0, xmm5, xmm8, 0
  10403. vpxor xmm1, xmm1, xmm8
  10404. vpxor xmm2, xmm2, xmm5
  10405. vpclmulqdq xmm1, xmm1, xmm2, 0
  10406. vpxor xmm1, xmm1, xmm0
  10407. vpxor xmm1, xmm1, xmm3
  10408. vpxor xmm4, xmm4, xmm0
  10409. vpxor xmm6, xmm6, xmm3
  10410. vpslldq xmm2, xmm1, 8
  10411. vpsrldq xmm1, xmm1, 8
  10412. vpxor xmm4, xmm4, xmm2
  10413. vpxor xmm6, xmm6, xmm1
  10414. vpslld xmm0, xmm4, 31
  10415. vpslld xmm1, xmm4, 30
  10416. vpslld xmm2, xmm4, 25
  10417. vpxor xmm0, xmm0, xmm1
  10418. vpxor xmm0, xmm0, xmm2
  10419. vmovdqa xmm1, xmm0
  10420. vpsrldq xmm1, xmm1, 4
  10421. vpslldq xmm0, xmm0, 12
  10422. vpxor xmm4, xmm4, xmm0
  10423. vpsrld xmm2, xmm4, 1
  10424. vpsrld xmm3, xmm4, 2
  10425. vpsrld xmm0, xmm4, 7
  10426. vpxor xmm2, xmm2, xmm3
  10427. vpxor xmm2, xmm2, xmm0
  10428. vpxor xmm2, xmm2, xmm1
  10429. vpxor xmm2, xmm2, xmm4
  10430. vpxor xmm6, xmm6, xmm2
  10431. vmovdqu xmm5, OWORD PTR [rsp]
  10432. L_AES_GCM_encrypt_update_avx1_done_128:
  10433. mov edx, r9d
  10434. cmp edi, edx
  10435. jge L_AES_GCM_encrypt_update_avx1_done_enc
  10436. mov r13d, r9d
  10437. and r13d, 4294967280
  10438. cmp edi, r13d
  10439. jge L_AES_GCM_encrypt_update_avx1_last_block_done
  10440. vmovdqu xmm9, OWORD PTR [r15]
  10441. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  10442. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  10443. vmovdqu OWORD PTR [r15], xmm9
  10444. vpxor xmm8, xmm8, [rax]
  10445. vaesenc xmm8, xmm8, [rax+16]
  10446. vaesenc xmm8, xmm8, [rax+32]
  10447. vaesenc xmm8, xmm8, [rax+48]
  10448. vaesenc xmm8, xmm8, [rax+64]
  10449. vaesenc xmm8, xmm8, [rax+80]
  10450. vaesenc xmm8, xmm8, [rax+96]
  10451. vaesenc xmm8, xmm8, [rax+112]
  10452. vaesenc xmm8, xmm8, [rax+128]
  10453. vaesenc xmm8, xmm8, [rax+144]
  10454. cmp r8d, 11
  10455. vmovdqa xmm9, OWORD PTR [rax+160]
  10456. jl L_AES_GCM_encrypt_update_avx1_aesenc_block_last
  10457. vaesenc xmm8, xmm8, xmm9
  10458. vaesenc xmm8, xmm8, [rax+176]
  10459. cmp r8d, 13
  10460. vmovdqa xmm9, OWORD PTR [rax+192]
  10461. jl L_AES_GCM_encrypt_update_avx1_aesenc_block_last
  10462. vaesenc xmm8, xmm8, xmm9
  10463. vaesenc xmm8, xmm8, [rax+208]
  10464. vmovdqa xmm9, OWORD PTR [rax+224]
  10465. L_AES_GCM_encrypt_update_avx1_aesenc_block_last:
  10466. vaesenclast xmm8, xmm8, xmm9
  10467. vmovdqu xmm9, OWORD PTR [r11+rdi]
  10468. vpxor xmm8, xmm8, xmm9
  10469. vmovdqu OWORD PTR [r10+rdi], xmm8
  10470. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10471. vpxor xmm6, xmm6, xmm8
  10472. add edi, 16
  10473. cmp edi, r13d
  10474. jge L_AES_GCM_encrypt_update_avx1_last_block_ghash
  10475. L_AES_GCM_encrypt_update_avx1_last_block_start:
  10476. vmovdqu xmm13, OWORD PTR [r11+rdi]
  10477. vmovdqu xmm9, OWORD PTR [r15]
  10478. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  10479. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  10480. vmovdqu OWORD PTR [r15], xmm9
  10481. vpxor xmm8, xmm8, [rax]
  10482. vpclmulqdq xmm10, xmm6, xmm5, 16
  10483. vaesenc xmm8, xmm8, [rax+16]
  10484. vaesenc xmm8, xmm8, [rax+32]
  10485. vpclmulqdq xmm11, xmm6, xmm5, 1
  10486. vaesenc xmm8, xmm8, [rax+48]
  10487. vaesenc xmm8, xmm8, [rax+64]
  10488. vpclmulqdq xmm12, xmm6, xmm5, 0
  10489. vaesenc xmm8, xmm8, [rax+80]
  10490. vpclmulqdq xmm1, xmm6, xmm5, 17
  10491. vaesenc xmm8, xmm8, [rax+96]
  10492. vpxor xmm10, xmm10, xmm11
  10493. vpslldq xmm2, xmm10, 8
  10494. vpsrldq xmm10, xmm10, 8
  10495. vaesenc xmm8, xmm8, [rax+112]
  10496. vpxor xmm2, xmm2, xmm12
  10497. vpxor xmm3, xmm1, xmm10
  10498. vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
  10499. vpclmulqdq xmm11, xmm2, xmm0, 16
  10500. vaesenc xmm8, xmm8, [rax+128]
  10501. vpshufd xmm10, xmm2, 78
  10502. vpxor xmm10, xmm10, xmm11
  10503. vpclmulqdq xmm11, xmm10, xmm0, 16
  10504. vaesenc xmm8, xmm8, [rax+144]
  10505. vpshufd xmm10, xmm10, 78
  10506. vpxor xmm10, xmm10, xmm11
  10507. vpxor xmm6, xmm10, xmm3
  10508. cmp r8d, 11
  10509. vmovdqa xmm9, OWORD PTR [rax+160]
  10510. jl L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last
  10511. vaesenc xmm8, xmm8, xmm9
  10512. vaesenc xmm8, xmm8, [rax+176]
  10513. cmp r8d, 13
  10514. vmovdqa xmm9, OWORD PTR [rax+192]
  10515. jl L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last
  10516. vaesenc xmm8, xmm8, xmm9
  10517. vaesenc xmm8, xmm8, [rax+208]
  10518. vmovdqa xmm9, OWORD PTR [rax+224]
  10519. L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last:
  10520. vaesenclast xmm8, xmm8, xmm9
  10521. vmovdqa xmm0, xmm13
  10522. vpxor xmm8, xmm8, xmm0
  10523. vmovdqu OWORD PTR [r10+rdi], xmm8
  10524. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10525. add edi, 16
  10526. vpxor xmm6, xmm6, xmm8
  10527. cmp edi, r13d
  10528. jl L_AES_GCM_encrypt_update_avx1_last_block_start
  10529. L_AES_GCM_encrypt_update_avx1_last_block_ghash:
  10530. ; ghash_gfmul_red_avx
  10531. vpshufd xmm9, xmm5, 78
  10532. vpshufd xmm10, xmm6, 78
  10533. vpclmulqdq xmm11, xmm6, xmm5, 17
  10534. vpclmulqdq xmm8, xmm6, xmm5, 0
  10535. vpxor xmm9, xmm9, xmm5
  10536. vpxor xmm10, xmm10, xmm6
  10537. vpclmulqdq xmm9, xmm9, xmm10, 0
  10538. vpxor xmm9, xmm9, xmm8
  10539. vpxor xmm9, xmm9, xmm11
  10540. vpslldq xmm10, xmm9, 8
  10541. vpsrldq xmm9, xmm9, 8
  10542. vpxor xmm8, xmm8, xmm10
  10543. vpxor xmm6, xmm11, xmm9
  10544. vpslld xmm12, xmm8, 31
  10545. vpslld xmm13, xmm8, 30
  10546. vpslld xmm14, xmm8, 25
  10547. vpxor xmm12, xmm12, xmm13
  10548. vpxor xmm12, xmm12, xmm14
  10549. vpsrldq xmm13, xmm12, 4
  10550. vpslldq xmm12, xmm12, 12
  10551. vpxor xmm8, xmm8, xmm12
  10552. vpsrld xmm14, xmm8, 1
  10553. vpsrld xmm10, xmm8, 2
  10554. vpsrld xmm9, xmm8, 7
  10555. vpxor xmm14, xmm14, xmm10
  10556. vpxor xmm14, xmm14, xmm9
  10557. vpxor xmm14, xmm14, xmm13
  10558. vpxor xmm14, xmm14, xmm8
  10559. vpxor xmm6, xmm6, xmm14
  10560. L_AES_GCM_encrypt_update_avx1_last_block_done:
  10561. L_AES_GCM_encrypt_update_avx1_done_enc:
  10562. vmovdqa OWORD PTR [r12], xmm6
  10563. vzeroupper
  10564. vmovdqu xmm6, OWORD PTR [rsp+160]
  10565. vmovdqu xmm7, OWORD PTR [rsp+176]
  10566. vmovdqu xmm8, OWORD PTR [rsp+192]
  10567. vmovdqu xmm9, OWORD PTR [rsp+208]
  10568. vmovdqu xmm10, OWORD PTR [rsp+224]
  10569. vmovdqu xmm11, OWORD PTR [rsp+240]
  10570. vmovdqu xmm12, OWORD PTR [rsp+256]
  10571. vmovdqu xmm13, OWORD PTR [rsp+272]
  10572. vmovdqu xmm14, OWORD PTR [rsp+288]
  10573. vmovdqu xmm15, OWORD PTR [rsp+304]
  10574. add rsp, 320
  10575. pop rdi
  10576. pop r15
  10577. pop r14
  10578. pop r12
  10579. pop r13
  10580. ret
  10581. AES_GCM_encrypt_update_avx1 ENDP
  10582. _text ENDS
  10583. _text SEGMENT READONLY PARA
  10584. AES_GCM_encrypt_final_avx1 PROC
  10585. push r13
  10586. push r12
  10587. push r14
  10588. mov rax, rcx
  10589. mov r10d, r9d
  10590. mov r9, rdx
  10591. mov r11d, DWORD PTR [rsp+64]
  10592. mov r12, QWORD PTR [rsp+72]
  10593. mov r14, QWORD PTR [rsp+80]
  10594. sub rsp, 144
  10595. vmovdqu OWORD PTR [rsp+16], xmm6
  10596. vmovdqu OWORD PTR [rsp+32], xmm7
  10597. vmovdqu OWORD PTR [rsp+48], xmm8
  10598. vmovdqu OWORD PTR [rsp+64], xmm9
  10599. vmovdqu OWORD PTR [rsp+80], xmm10
  10600. vmovdqu OWORD PTR [rsp+96], xmm11
  10601. vmovdqu OWORD PTR [rsp+112], xmm12
  10602. vmovdqu OWORD PTR [rsp+128], xmm13
  10603. vmovdqa xmm4, OWORD PTR [rax]
  10604. vmovdqa xmm5, OWORD PTR [r12]
  10605. vmovdqa xmm6, OWORD PTR [r14]
  10606. vpsrlq xmm8, xmm5, 63
  10607. vpsllq xmm7, xmm5, 1
  10608. vpslldq xmm8, xmm8, 8
  10609. vpor xmm7, xmm7, xmm8
  10610. vpshufd xmm5, xmm5, 255
  10611. vpsrad xmm5, xmm5, 31
  10612. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  10613. vpxor xmm5, xmm5, xmm7
  10614. mov edx, r10d
  10615. mov ecx, r11d
  10616. shl rdx, 3
  10617. shl rcx, 3
  10618. vmovq xmm0, rdx
  10619. vmovq xmm1, rcx
  10620. vpunpcklqdq xmm0, xmm0, xmm1
  10621. vpxor xmm4, xmm4, xmm0
  10622. ; ghash_gfmul_red_avx
  10623. vpshufd xmm8, xmm5, 78
  10624. vpshufd xmm9, xmm4, 78
  10625. vpclmulqdq xmm10, xmm4, xmm5, 17
  10626. vpclmulqdq xmm7, xmm4, xmm5, 0
  10627. vpxor xmm8, xmm8, xmm5
  10628. vpxor xmm9, xmm9, xmm4
  10629. vpclmulqdq xmm8, xmm8, xmm9, 0
  10630. vpxor xmm8, xmm8, xmm7
  10631. vpxor xmm8, xmm8, xmm10
  10632. vpslldq xmm9, xmm8, 8
  10633. vpsrldq xmm8, xmm8, 8
  10634. vpxor xmm7, xmm7, xmm9
  10635. vpxor xmm4, xmm10, xmm8
  10636. vpslld xmm11, xmm7, 31
  10637. vpslld xmm12, xmm7, 30
  10638. vpslld xmm13, xmm7, 25
  10639. vpxor xmm11, xmm11, xmm12
  10640. vpxor xmm11, xmm11, xmm13
  10641. vpsrldq xmm12, xmm11, 4
  10642. vpslldq xmm11, xmm11, 12
  10643. vpxor xmm7, xmm7, xmm11
  10644. vpsrld xmm13, xmm7, 1
  10645. vpsrld xmm9, xmm7, 2
  10646. vpsrld xmm8, xmm7, 7
  10647. vpxor xmm13, xmm13, xmm9
  10648. vpxor xmm13, xmm13, xmm8
  10649. vpxor xmm13, xmm13, xmm12
  10650. vpxor xmm13, xmm13, xmm7
  10651. vpxor xmm4, xmm4, xmm13
  10652. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10653. vpxor xmm0, xmm4, xmm6
  10654. cmp r8d, 16
  10655. je L_AES_GCM_encrypt_final_avx1_store_tag_16
  10656. xor rcx, rcx
  10657. vmovdqu OWORD PTR [rsp], xmm0
  10658. L_AES_GCM_encrypt_final_avx1_store_tag_loop:
  10659. movzx r13d, BYTE PTR [rsp+rcx]
  10660. mov BYTE PTR [r9+rcx], r13b
  10661. inc ecx
  10662. cmp ecx, r8d
  10663. jne L_AES_GCM_encrypt_final_avx1_store_tag_loop
  10664. jmp L_AES_GCM_encrypt_final_avx1_store_tag_done
  10665. L_AES_GCM_encrypt_final_avx1_store_tag_16:
  10666. vmovdqu OWORD PTR [r9], xmm0
  10667. L_AES_GCM_encrypt_final_avx1_store_tag_done:
  10668. vzeroupper
  10669. vmovdqu xmm6, OWORD PTR [rsp+16]
  10670. vmovdqu xmm7, OWORD PTR [rsp+32]
  10671. vmovdqu xmm8, OWORD PTR [rsp+48]
  10672. vmovdqu xmm9, OWORD PTR [rsp+64]
  10673. vmovdqu xmm10, OWORD PTR [rsp+80]
  10674. vmovdqu xmm11, OWORD PTR [rsp+96]
  10675. vmovdqu xmm12, OWORD PTR [rsp+112]
  10676. vmovdqu xmm13, OWORD PTR [rsp+128]
  10677. add rsp, 144
  10678. pop r14
  10679. pop r12
  10680. pop r13
  10681. ret
  10682. AES_GCM_encrypt_final_avx1 ENDP
  10683. _text ENDS
  10684. _text SEGMENT READONLY PARA
  10685. AES_GCM_decrypt_update_avx1 PROC
  10686. push r13
  10687. push r12
  10688. push r14
  10689. push r15
  10690. push rdi
  10691. mov rax, rcx
  10692. mov r10, r8
  10693. mov r8d, edx
  10694. mov r11, r9
  10695. mov r9d, DWORD PTR [rsp+80]
  10696. mov r12, QWORD PTR [rsp+88]
  10697. mov r14, QWORD PTR [rsp+96]
  10698. mov r15, QWORD PTR [rsp+104]
  10699. sub rsp, 328
  10700. vmovdqu OWORD PTR [rsp+168], xmm6
  10701. vmovdqu OWORD PTR [rsp+184], xmm7
  10702. vmovdqu OWORD PTR [rsp+200], xmm8
  10703. vmovdqu OWORD PTR [rsp+216], xmm9
  10704. vmovdqu OWORD PTR [rsp+232], xmm10
  10705. vmovdqu OWORD PTR [rsp+248], xmm11
  10706. vmovdqu OWORD PTR [rsp+264], xmm12
  10707. vmovdqu OWORD PTR [rsp+280], xmm13
  10708. vmovdqu OWORD PTR [rsp+296], xmm14
  10709. vmovdqu OWORD PTR [rsp+312], xmm15
  10710. vmovdqa xmm6, OWORD PTR [r12]
  10711. vmovdqa xmm5, OWORD PTR [r14]
  10712. vpsrlq xmm9, xmm5, 63
  10713. vpsllq xmm8, xmm5, 1
  10714. vpslldq xmm9, xmm9, 8
  10715. vpor xmm8, xmm8, xmm9
  10716. vpshufd xmm5, xmm5, 255
  10717. vpsrad xmm5, xmm5, 31
  10718. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  10719. vpxor xmm5, xmm5, xmm8
  10720. xor edi, edi
  10721. cmp r9d, 128
  10722. mov r13d, r9d
  10723. jl L_AES_GCM_decrypt_update_avx1_done_128
  10724. and r13d, 4294967168
  10725. vmovdqa xmm2, xmm6
  10726. ; H ^ 1
  10727. vmovdqu OWORD PTR [rsp], xmm5
  10728. ; H ^ 2
  10729. vpclmulqdq xmm8, xmm5, xmm5, 0
  10730. vpclmulqdq xmm0, xmm5, xmm5, 17
  10731. vpslld xmm12, xmm8, 31
  10732. vpslld xmm13, xmm8, 30
  10733. vpslld xmm14, xmm8, 25
  10734. vpxor xmm12, xmm12, xmm13
  10735. vpxor xmm12, xmm12, xmm14
  10736. vpsrldq xmm13, xmm12, 4
  10737. vpslldq xmm12, xmm12, 12
  10738. vpxor xmm8, xmm8, xmm12
  10739. vpsrld xmm14, xmm8, 1
  10740. vpsrld xmm10, xmm8, 2
  10741. vpsrld xmm9, xmm8, 7
  10742. vpxor xmm14, xmm14, xmm10
  10743. vpxor xmm14, xmm14, xmm9
  10744. vpxor xmm14, xmm14, xmm13
  10745. vpxor xmm14, xmm14, xmm8
  10746. vpxor xmm0, xmm0, xmm14
  10747. vmovdqu OWORD PTR [rsp+16], xmm0
  10748. ; H ^ 3
  10749. ; ghash_gfmul_red_avx
  10750. vpshufd xmm9, xmm5, 78
  10751. vpshufd xmm10, xmm0, 78
  10752. vpclmulqdq xmm11, xmm0, xmm5, 17
  10753. vpclmulqdq xmm8, xmm0, xmm5, 0
  10754. vpxor xmm9, xmm9, xmm5
  10755. vpxor xmm10, xmm10, xmm0
  10756. vpclmulqdq xmm9, xmm9, xmm10, 0
  10757. vpxor xmm9, xmm9, xmm8
  10758. vpxor xmm9, xmm9, xmm11
  10759. vpslldq xmm10, xmm9, 8
  10760. vpsrldq xmm9, xmm9, 8
  10761. vpxor xmm8, xmm8, xmm10
  10762. vpxor xmm1, xmm11, xmm9
  10763. vpslld xmm12, xmm8, 31
  10764. vpslld xmm13, xmm8, 30
  10765. vpslld xmm14, xmm8, 25
  10766. vpxor xmm12, xmm12, xmm13
  10767. vpxor xmm12, xmm12, xmm14
  10768. vpsrldq xmm13, xmm12, 4
  10769. vpslldq xmm12, xmm12, 12
  10770. vpxor xmm8, xmm8, xmm12
  10771. vpsrld xmm14, xmm8, 1
  10772. vpsrld xmm10, xmm8, 2
  10773. vpsrld xmm9, xmm8, 7
  10774. vpxor xmm14, xmm14, xmm10
  10775. vpxor xmm14, xmm14, xmm9
  10776. vpxor xmm14, xmm14, xmm13
  10777. vpxor xmm14, xmm14, xmm8
  10778. vpxor xmm1, xmm1, xmm14
  10779. vmovdqu OWORD PTR [rsp+32], xmm1
  10780. ; H ^ 4
  10781. vpclmulqdq xmm8, xmm0, xmm0, 0
  10782. vpclmulqdq xmm3, xmm0, xmm0, 17
  10783. vpslld xmm12, xmm8, 31
  10784. vpslld xmm13, xmm8, 30
  10785. vpslld xmm14, xmm8, 25
  10786. vpxor xmm12, xmm12, xmm13
  10787. vpxor xmm12, xmm12, xmm14
  10788. vpsrldq xmm13, xmm12, 4
  10789. vpslldq xmm12, xmm12, 12
  10790. vpxor xmm8, xmm8, xmm12
  10791. vpsrld xmm14, xmm8, 1
  10792. vpsrld xmm10, xmm8, 2
  10793. vpsrld xmm9, xmm8, 7
  10794. vpxor xmm14, xmm14, xmm10
  10795. vpxor xmm14, xmm14, xmm9
  10796. vpxor xmm14, xmm14, xmm13
  10797. vpxor xmm14, xmm14, xmm8
  10798. vpxor xmm3, xmm3, xmm14
  10799. vmovdqu OWORD PTR [rsp+48], xmm3
  10800. ; H ^ 5
  10801. ; ghash_gfmul_red_avx
  10802. vpshufd xmm9, xmm0, 78
  10803. vpshufd xmm10, xmm1, 78
  10804. vpclmulqdq xmm11, xmm1, xmm0, 17
  10805. vpclmulqdq xmm8, xmm1, xmm0, 0
  10806. vpxor xmm9, xmm9, xmm0
  10807. vpxor xmm10, xmm10, xmm1
  10808. vpclmulqdq xmm9, xmm9, xmm10, 0
  10809. vpxor xmm9, xmm9, xmm8
  10810. vpxor xmm9, xmm9, xmm11
  10811. vpslldq xmm10, xmm9, 8
  10812. vpsrldq xmm9, xmm9, 8
  10813. vpxor xmm8, xmm8, xmm10
  10814. vpxor xmm7, xmm11, xmm9
  10815. vpslld xmm12, xmm8, 31
  10816. vpslld xmm13, xmm8, 30
  10817. vpslld xmm14, xmm8, 25
  10818. vpxor xmm12, xmm12, xmm13
  10819. vpxor xmm12, xmm12, xmm14
  10820. vpsrldq xmm13, xmm12, 4
  10821. vpslldq xmm12, xmm12, 12
  10822. vpxor xmm8, xmm8, xmm12
  10823. vpsrld xmm14, xmm8, 1
  10824. vpsrld xmm10, xmm8, 2
  10825. vpsrld xmm9, xmm8, 7
  10826. vpxor xmm14, xmm14, xmm10
  10827. vpxor xmm14, xmm14, xmm9
  10828. vpxor xmm14, xmm14, xmm13
  10829. vpxor xmm14, xmm14, xmm8
  10830. vpxor xmm7, xmm7, xmm14
  10831. vmovdqu OWORD PTR [rsp+64], xmm7
  10832. ; H ^ 6
  10833. vpclmulqdq xmm8, xmm1, xmm1, 0
  10834. vpclmulqdq xmm7, xmm1, xmm1, 17
  10835. vpslld xmm12, xmm8, 31
  10836. vpslld xmm13, xmm8, 30
  10837. vpslld xmm14, xmm8, 25
  10838. vpxor xmm12, xmm12, xmm13
  10839. vpxor xmm12, xmm12, xmm14
  10840. vpsrldq xmm13, xmm12, 4
  10841. vpslldq xmm12, xmm12, 12
  10842. vpxor xmm8, xmm8, xmm12
  10843. vpsrld xmm14, xmm8, 1
  10844. vpsrld xmm10, xmm8, 2
  10845. vpsrld xmm9, xmm8, 7
  10846. vpxor xmm14, xmm14, xmm10
  10847. vpxor xmm14, xmm14, xmm9
  10848. vpxor xmm14, xmm14, xmm13
  10849. vpxor xmm14, xmm14, xmm8
  10850. vpxor xmm7, xmm7, xmm14
  10851. vmovdqu OWORD PTR [rsp+80], xmm7
  10852. ; H ^ 7
  10853. ; ghash_gfmul_red_avx
  10854. vpshufd xmm9, xmm1, 78
  10855. vpshufd xmm10, xmm3, 78
  10856. vpclmulqdq xmm11, xmm3, xmm1, 17
  10857. vpclmulqdq xmm8, xmm3, xmm1, 0
  10858. vpxor xmm9, xmm9, xmm1
  10859. vpxor xmm10, xmm10, xmm3
  10860. vpclmulqdq xmm9, xmm9, xmm10, 0
  10861. vpxor xmm9, xmm9, xmm8
  10862. vpxor xmm9, xmm9, xmm11
  10863. vpslldq xmm10, xmm9, 8
  10864. vpsrldq xmm9, xmm9, 8
  10865. vpxor xmm8, xmm8, xmm10
  10866. vpxor xmm7, xmm11, xmm9
  10867. vpslld xmm12, xmm8, 31
  10868. vpslld xmm13, xmm8, 30
  10869. vpslld xmm14, xmm8, 25
  10870. vpxor xmm12, xmm12, xmm13
  10871. vpxor xmm12, xmm12, xmm14
  10872. vpsrldq xmm13, xmm12, 4
  10873. vpslldq xmm12, xmm12, 12
  10874. vpxor xmm8, xmm8, xmm12
  10875. vpsrld xmm14, xmm8, 1
  10876. vpsrld xmm10, xmm8, 2
  10877. vpsrld xmm9, xmm8, 7
  10878. vpxor xmm14, xmm14, xmm10
  10879. vpxor xmm14, xmm14, xmm9
  10880. vpxor xmm14, xmm14, xmm13
  10881. vpxor xmm14, xmm14, xmm8
  10882. vpxor xmm7, xmm7, xmm14
  10883. vmovdqu OWORD PTR [rsp+96], xmm7
  10884. ; H ^ 8
  10885. vpclmulqdq xmm8, xmm3, xmm3, 0
  10886. vpclmulqdq xmm7, xmm3, xmm3, 17
  10887. vpslld xmm12, xmm8, 31
  10888. vpslld xmm13, xmm8, 30
  10889. vpslld xmm14, xmm8, 25
  10890. vpxor xmm12, xmm12, xmm13
  10891. vpxor xmm12, xmm12, xmm14
  10892. vpsrldq xmm13, xmm12, 4
  10893. vpslldq xmm12, xmm12, 12
  10894. vpxor xmm8, xmm8, xmm12
  10895. vpsrld xmm14, xmm8, 1
  10896. vpsrld xmm10, xmm8, 2
  10897. vpsrld xmm9, xmm8, 7
  10898. vpxor xmm14, xmm14, xmm10
  10899. vpxor xmm14, xmm14, xmm9
  10900. vpxor xmm14, xmm14, xmm13
  10901. vpxor xmm14, xmm14, xmm8
  10902. vpxor xmm7, xmm7, xmm14
  10903. vmovdqu OWORD PTR [rsp+112], xmm7
  10904. L_AES_GCM_decrypt_update_avx1_ghash_128:
  10905. lea rcx, QWORD PTR [r11+rdi]
  10906. lea rdx, QWORD PTR [r10+rdi]
  10907. vmovdqu xmm0, OWORD PTR [r15]
  10908. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  10909. vpshufb xmm8, xmm0, xmm1
  10910. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  10911. vpshufb xmm9, xmm9, xmm1
  10912. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  10913. vpshufb xmm10, xmm10, xmm1
  10914. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  10915. vpshufb xmm11, xmm11, xmm1
  10916. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  10917. vpshufb xmm12, xmm12, xmm1
  10918. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  10919. vpshufb xmm13, xmm13, xmm1
  10920. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  10921. vpshufb xmm14, xmm14, xmm1
  10922. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  10923. vpshufb xmm15, xmm15, xmm1
  10924. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  10925. vmovdqa xmm7, OWORD PTR [rax]
  10926. vmovdqu OWORD PTR [r15], xmm0
  10927. vpxor xmm8, xmm8, xmm7
  10928. vpxor xmm9, xmm9, xmm7
  10929. vpxor xmm10, xmm10, xmm7
  10930. vpxor xmm11, xmm11, xmm7
  10931. vpxor xmm12, xmm12, xmm7
  10932. vpxor xmm13, xmm13, xmm7
  10933. vpxor xmm14, xmm14, xmm7
  10934. vpxor xmm15, xmm15, xmm7
  10935. vmovdqu xmm7, OWORD PTR [rsp+112]
  10936. vmovdqu xmm0, OWORD PTR [rcx]
  10937. vaesenc xmm8, xmm8, [rax+16]
  10938. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10939. vpxor xmm0, xmm0, xmm2
  10940. vpshufd xmm1, xmm7, 78
  10941. vpshufd xmm5, xmm0, 78
  10942. vpxor xmm1, xmm1, xmm7
  10943. vpxor xmm5, xmm5, xmm0
  10944. vpclmulqdq xmm3, xmm0, xmm7, 17
  10945. vaesenc xmm9, xmm9, [rax+16]
  10946. vaesenc xmm10, xmm10, [rax+16]
  10947. vpclmulqdq xmm2, xmm0, xmm7, 0
  10948. vaesenc xmm11, xmm11, [rax+16]
  10949. vaesenc xmm12, xmm12, [rax+16]
  10950. vpclmulqdq xmm1, xmm1, xmm5, 0
  10951. vaesenc xmm13, xmm13, [rax+16]
  10952. vaesenc xmm14, xmm14, [rax+16]
  10953. vaesenc xmm15, xmm15, [rax+16]
  10954. vpxor xmm1, xmm1, xmm2
  10955. vpxor xmm1, xmm1, xmm3
  10956. vmovdqu xmm7, OWORD PTR [rsp+96]
  10957. vmovdqu xmm0, OWORD PTR [rcx+16]
  10958. vpshufd xmm4, xmm7, 78
  10959. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10960. vaesenc xmm8, xmm8, [rax+32]
  10961. vpxor xmm4, xmm4, xmm7
  10962. vpshufd xmm5, xmm0, 78
  10963. vpxor xmm5, xmm5, xmm0
  10964. vpclmulqdq xmm6, xmm0, xmm7, 17
  10965. vaesenc xmm9, xmm9, [rax+32]
  10966. vaesenc xmm10, xmm10, [rax+32]
  10967. vpclmulqdq xmm7, xmm0, xmm7, 0
  10968. vaesenc xmm11, xmm11, [rax+32]
  10969. vaesenc xmm12, xmm12, [rax+32]
  10970. vpclmulqdq xmm4, xmm4, xmm5, 0
  10971. vaesenc xmm13, xmm13, [rax+32]
  10972. vaesenc xmm14, xmm14, [rax+32]
  10973. vaesenc xmm15, xmm15, [rax+32]
  10974. vpxor xmm1, xmm1, xmm7
  10975. vpxor xmm2, xmm2, xmm7
  10976. vpxor xmm1, xmm1, xmm6
  10977. vpxor xmm3, xmm3, xmm6
  10978. vpxor xmm1, xmm1, xmm4
  10979. vmovdqu xmm7, OWORD PTR [rsp+80]
  10980. vmovdqu xmm0, OWORD PTR [rcx+32]
  10981. vpshufd xmm4, xmm7, 78
  10982. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10983. vaesenc xmm8, xmm8, [rax+48]
  10984. vpxor xmm4, xmm4, xmm7
  10985. vpshufd xmm5, xmm0, 78
  10986. vpxor xmm5, xmm5, xmm0
  10987. vpclmulqdq xmm6, xmm0, xmm7, 17
  10988. vaesenc xmm9, xmm9, [rax+48]
  10989. vaesenc xmm10, xmm10, [rax+48]
  10990. vpclmulqdq xmm7, xmm0, xmm7, 0
  10991. vaesenc xmm11, xmm11, [rax+48]
  10992. vaesenc xmm12, xmm12, [rax+48]
  10993. vpclmulqdq xmm4, xmm4, xmm5, 0
  10994. vaesenc xmm13, xmm13, [rax+48]
  10995. vaesenc xmm14, xmm14, [rax+48]
  10996. vaesenc xmm15, xmm15, [rax+48]
  10997. vpxor xmm1, xmm1, xmm7
  10998. vpxor xmm2, xmm2, xmm7
  10999. vpxor xmm1, xmm1, xmm6
  11000. vpxor xmm3, xmm3, xmm6
  11001. vpxor xmm1, xmm1, xmm4
  11002. vmovdqu xmm7, OWORD PTR [rsp+64]
  11003. vmovdqu xmm0, OWORD PTR [rcx+48]
  11004. vpshufd xmm4, xmm7, 78
  11005. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  11006. vaesenc xmm8, xmm8, [rax+64]
  11007. vpxor xmm4, xmm4, xmm7
  11008. vpshufd xmm5, xmm0, 78
  11009. vpxor xmm5, xmm5, xmm0
  11010. vpclmulqdq xmm6, xmm0, xmm7, 17
  11011. vaesenc xmm9, xmm9, [rax+64]
  11012. vaesenc xmm10, xmm10, [rax+64]
  11013. vpclmulqdq xmm7, xmm0, xmm7, 0
  11014. vaesenc xmm11, xmm11, [rax+64]
  11015. vaesenc xmm12, xmm12, [rax+64]
  11016. vpclmulqdq xmm4, xmm4, xmm5, 0
  11017. vaesenc xmm13, xmm13, [rax+64]
  11018. vaesenc xmm14, xmm14, [rax+64]
  11019. vaesenc xmm15, xmm15, [rax+64]
  11020. vpxor xmm1, xmm1, xmm7
  11021. vpxor xmm2, xmm2, xmm7
  11022. vpxor xmm1, xmm1, xmm6
  11023. vpxor xmm3, xmm3, xmm6
  11024. vpxor xmm1, xmm1, xmm4
  11025. vmovdqu xmm7, OWORD PTR [rsp+48]
  11026. vmovdqu xmm0, OWORD PTR [rcx+64]
  11027. vpshufd xmm4, xmm7, 78
  11028. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  11029. vaesenc xmm8, xmm8, [rax+80]
  11030. vpxor xmm4, xmm4, xmm7
  11031. vpshufd xmm5, xmm0, 78
  11032. vpxor xmm5, xmm5, xmm0
  11033. vpclmulqdq xmm6, xmm0, xmm7, 17
  11034. vaesenc xmm9, xmm9, [rax+80]
  11035. vaesenc xmm10, xmm10, [rax+80]
  11036. vpclmulqdq xmm7, xmm0, xmm7, 0
  11037. vaesenc xmm11, xmm11, [rax+80]
  11038. vaesenc xmm12, xmm12, [rax+80]
  11039. vpclmulqdq xmm4, xmm4, xmm5, 0
  11040. vaesenc xmm13, xmm13, [rax+80]
  11041. vaesenc xmm14, xmm14, [rax+80]
  11042. vaesenc xmm15, xmm15, [rax+80]
  11043. vpxor xmm1, xmm1, xmm7
  11044. vpxor xmm2, xmm2, xmm7
  11045. vpxor xmm1, xmm1, xmm6
  11046. vpxor xmm3, xmm3, xmm6
  11047. vpxor xmm1, xmm1, xmm4
  11048. vmovdqu xmm7, OWORD PTR [rsp+32]
  11049. vmovdqu xmm0, OWORD PTR [rcx+80]
  11050. vpshufd xmm4, xmm7, 78
  11051. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  11052. vaesenc xmm8, xmm8, [rax+96]
  11053. vpxor xmm4, xmm4, xmm7
  11054. vpshufd xmm5, xmm0, 78
  11055. vpxor xmm5, xmm5, xmm0
  11056. vpclmulqdq xmm6, xmm0, xmm7, 17
  11057. vaesenc xmm9, xmm9, [rax+96]
  11058. vaesenc xmm10, xmm10, [rax+96]
  11059. vpclmulqdq xmm7, xmm0, xmm7, 0
  11060. vaesenc xmm11, xmm11, [rax+96]
  11061. vaesenc xmm12, xmm12, [rax+96]
  11062. vpclmulqdq xmm4, xmm4, xmm5, 0
  11063. vaesenc xmm13, xmm13, [rax+96]
  11064. vaesenc xmm14, xmm14, [rax+96]
  11065. vaesenc xmm15, xmm15, [rax+96]
  11066. vpxor xmm1, xmm1, xmm7
  11067. vpxor xmm2, xmm2, xmm7
  11068. vpxor xmm1, xmm1, xmm6
  11069. vpxor xmm3, xmm3, xmm6
  11070. vpxor xmm1, xmm1, xmm4
  11071. vmovdqu xmm7, OWORD PTR [rsp+16]
  11072. vmovdqu xmm0, OWORD PTR [rcx+96]
  11073. vpshufd xmm4, xmm7, 78
  11074. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  11075. vaesenc xmm8, xmm8, [rax+112]
  11076. vpxor xmm4, xmm4, xmm7
  11077. vpshufd xmm5, xmm0, 78
  11078. vpxor xmm5, xmm5, xmm0
  11079. vpclmulqdq xmm6, xmm0, xmm7, 17
  11080. vaesenc xmm9, xmm9, [rax+112]
  11081. vaesenc xmm10, xmm10, [rax+112]
  11082. vpclmulqdq xmm7, xmm0, xmm7, 0
  11083. vaesenc xmm11, xmm11, [rax+112]
  11084. vaesenc xmm12, xmm12, [rax+112]
  11085. vpclmulqdq xmm4, xmm4, xmm5, 0
  11086. vaesenc xmm13, xmm13, [rax+112]
  11087. vaesenc xmm14, xmm14, [rax+112]
  11088. vaesenc xmm15, xmm15, [rax+112]
  11089. vpxor xmm1, xmm1, xmm7
  11090. vpxor xmm2, xmm2, xmm7
  11091. vpxor xmm1, xmm1, xmm6
  11092. vpxor xmm3, xmm3, xmm6
  11093. vpxor xmm1, xmm1, xmm4
  11094. vmovdqu xmm7, OWORD PTR [rsp]
  11095. vmovdqu xmm0, OWORD PTR [rcx+112]
  11096. vpshufd xmm4, xmm7, 78
  11097. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  11098. vaesenc xmm8, xmm8, [rax+128]
  11099. vpxor xmm4, xmm4, xmm7
  11100. vpshufd xmm5, xmm0, 78
  11101. vpxor xmm5, xmm5, xmm0
  11102. vpclmulqdq xmm6, xmm0, xmm7, 17
  11103. vaesenc xmm9, xmm9, [rax+128]
  11104. vaesenc xmm10, xmm10, [rax+128]
  11105. vpclmulqdq xmm7, xmm0, xmm7, 0
  11106. vaesenc xmm11, xmm11, [rax+128]
  11107. vaesenc xmm12, xmm12, [rax+128]
  11108. vpclmulqdq xmm4, xmm4, xmm5, 0
  11109. vaesenc xmm13, xmm13, [rax+128]
  11110. vaesenc xmm14, xmm14, [rax+128]
  11111. vaesenc xmm15, xmm15, [rax+128]
  11112. vpxor xmm1, xmm1, xmm7
  11113. vpxor xmm2, xmm2, xmm7
  11114. vpxor xmm1, xmm1, xmm6
  11115. vpxor xmm3, xmm3, xmm6
  11116. vpxor xmm1, xmm1, xmm4
  11117. vpslldq xmm5, xmm1, 8
  11118. vpsrldq xmm1, xmm1, 8
  11119. vaesenc xmm8, xmm8, [rax+144]
  11120. vpxor xmm2, xmm2, xmm5
  11121. vpxor xmm3, xmm3, xmm1
  11122. vaesenc xmm9, xmm9, [rax+144]
  11123. vpslld xmm7, xmm2, 31
  11124. vpslld xmm4, xmm2, 30
  11125. vpslld xmm5, xmm2, 25
  11126. vaesenc xmm10, xmm10, [rax+144]
  11127. vpxor xmm7, xmm7, xmm4
  11128. vpxor xmm7, xmm7, xmm5
  11129. vaesenc xmm11, xmm11, [rax+144]
  11130. vpsrldq xmm4, xmm7, 4
  11131. vpslldq xmm7, xmm7, 12
  11132. vaesenc xmm12, xmm12, [rax+144]
  11133. vpxor xmm2, xmm2, xmm7
  11134. vpsrld xmm5, xmm2, 1
  11135. vaesenc xmm13, xmm13, [rax+144]
  11136. vpsrld xmm1, xmm2, 2
  11137. vpsrld xmm0, xmm2, 7
  11138. vaesenc xmm14, xmm14, [rax+144]
  11139. vpxor xmm5, xmm5, xmm1
  11140. vpxor xmm5, xmm5, xmm0
  11141. vaesenc xmm15, xmm15, [rax+144]
  11142. vpxor xmm5, xmm5, xmm4
  11143. vpxor xmm2, xmm2, xmm5
  11144. vpxor xmm2, xmm2, xmm3
  11145. cmp r8d, 11
  11146. vmovdqa xmm7, OWORD PTR [rax+160]
  11147. jl L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done
  11148. vaesenc xmm8, xmm8, xmm7
  11149. vaesenc xmm9, xmm9, xmm7
  11150. vaesenc xmm10, xmm10, xmm7
  11151. vaesenc xmm11, xmm11, xmm7
  11152. vaesenc xmm12, xmm12, xmm7
  11153. vaesenc xmm13, xmm13, xmm7
  11154. vaesenc xmm14, xmm14, xmm7
  11155. vaesenc xmm15, xmm15, xmm7
  11156. vmovdqa xmm7, OWORD PTR [rax+176]
  11157. vaesenc xmm8, xmm8, xmm7
  11158. vaesenc xmm9, xmm9, xmm7
  11159. vaesenc xmm10, xmm10, xmm7
  11160. vaesenc xmm11, xmm11, xmm7
  11161. vaesenc xmm12, xmm12, xmm7
  11162. vaesenc xmm13, xmm13, xmm7
  11163. vaesenc xmm14, xmm14, xmm7
  11164. vaesenc xmm15, xmm15, xmm7
  11165. cmp r8d, 13
  11166. vmovdqa xmm7, OWORD PTR [rax+192]
  11167. jl L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done
  11168. vaesenc xmm8, xmm8, xmm7
  11169. vaesenc xmm9, xmm9, xmm7
  11170. vaesenc xmm10, xmm10, xmm7
  11171. vaesenc xmm11, xmm11, xmm7
  11172. vaesenc xmm12, xmm12, xmm7
  11173. vaesenc xmm13, xmm13, xmm7
  11174. vaesenc xmm14, xmm14, xmm7
  11175. vaesenc xmm15, xmm15, xmm7
  11176. vmovdqa xmm7, OWORD PTR [rax+208]
  11177. vaesenc xmm8, xmm8, xmm7
  11178. vaesenc xmm9, xmm9, xmm7
  11179. vaesenc xmm10, xmm10, xmm7
  11180. vaesenc xmm11, xmm11, xmm7
  11181. vaesenc xmm12, xmm12, xmm7
  11182. vaesenc xmm13, xmm13, xmm7
  11183. vaesenc xmm14, xmm14, xmm7
  11184. vaesenc xmm15, xmm15, xmm7
  11185. vmovdqa xmm7, OWORD PTR [rax+224]
  11186. L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done:
  11187. vaesenclast xmm8, xmm8, xmm7
  11188. vaesenclast xmm9, xmm9, xmm7
  11189. vmovdqu xmm0, OWORD PTR [rcx]
  11190. vmovdqu xmm1, OWORD PTR [rcx+16]
  11191. vpxor xmm8, xmm8, xmm0
  11192. vpxor xmm9, xmm9, xmm1
  11193. vmovdqu OWORD PTR [rdx], xmm8
  11194. vmovdqu OWORD PTR [rdx+16], xmm9
  11195. vaesenclast xmm10, xmm10, xmm7
  11196. vaesenclast xmm11, xmm11, xmm7
  11197. vmovdqu xmm0, OWORD PTR [rcx+32]
  11198. vmovdqu xmm1, OWORD PTR [rcx+48]
  11199. vpxor xmm10, xmm10, xmm0
  11200. vpxor xmm11, xmm11, xmm1
  11201. vmovdqu OWORD PTR [rdx+32], xmm10
  11202. vmovdqu OWORD PTR [rdx+48], xmm11
  11203. vaesenclast xmm12, xmm12, xmm7
  11204. vaesenclast xmm13, xmm13, xmm7
  11205. vmovdqu xmm0, OWORD PTR [rcx+64]
  11206. vmovdqu xmm1, OWORD PTR [rcx+80]
  11207. vpxor xmm12, xmm12, xmm0
  11208. vpxor xmm13, xmm13, xmm1
  11209. vmovdqu OWORD PTR [rdx+64], xmm12
  11210. vmovdqu OWORD PTR [rdx+80], xmm13
  11211. vaesenclast xmm14, xmm14, xmm7
  11212. vaesenclast xmm15, xmm15, xmm7
  11213. vmovdqu xmm0, OWORD PTR [rcx+96]
  11214. vmovdqu xmm1, OWORD PTR [rcx+112]
  11215. vpxor xmm14, xmm14, xmm0
  11216. vpxor xmm15, xmm15, xmm1
  11217. vmovdqu OWORD PTR [rdx+96], xmm14
  11218. vmovdqu OWORD PTR [rdx+112], xmm15
  11219. add edi, 128
  11220. cmp edi, r13d
  11221. jl L_AES_GCM_decrypt_update_avx1_ghash_128
  11222. vmovdqa xmm6, xmm2
  11223. vmovdqu xmm5, OWORD PTR [rsp]
  11224. L_AES_GCM_decrypt_update_avx1_done_128:
  11225. mov edx, r9d
  11226. cmp edi, edx
  11227. jge L_AES_GCM_decrypt_update_avx1_done_dec
  11228. mov r13d, r9d
  11229. and r13d, 4294967280
  11230. cmp edi, r13d
  11231. jge L_AES_GCM_decrypt_update_avx1_last_block_done
  11232. L_AES_GCM_decrypt_update_avx1_last_block_start:
  11233. vmovdqu xmm13, OWORD PTR [r11+rdi]
  11234. vmovdqa xmm0, xmm5
  11235. vpshufb xmm1, xmm13, OWORD PTR L_avx1_aes_gcm_bswap_mask
  11236. vpxor xmm1, xmm1, xmm6
  11237. vmovdqu xmm9, OWORD PTR [r15]
  11238. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  11239. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  11240. vmovdqu OWORD PTR [r15], xmm9
  11241. vpxor xmm8, xmm8, [rax]
  11242. vpclmulqdq xmm10, xmm1, xmm0, 16
  11243. vaesenc xmm8, xmm8, [rax+16]
  11244. vaesenc xmm8, xmm8, [rax+32]
  11245. vpclmulqdq xmm11, xmm1, xmm0, 1
  11246. vaesenc xmm8, xmm8, [rax+48]
  11247. vaesenc xmm8, xmm8, [rax+64]
  11248. vpclmulqdq xmm12, xmm1, xmm0, 0
  11249. vaesenc xmm8, xmm8, [rax+80]
  11250. vpclmulqdq xmm1, xmm1, xmm0, 17
  11251. vaesenc xmm8, xmm8, [rax+96]
  11252. vpxor xmm10, xmm10, xmm11
  11253. vpslldq xmm2, xmm10, 8
  11254. vpsrldq xmm10, xmm10, 8
  11255. vaesenc xmm8, xmm8, [rax+112]
  11256. vpxor xmm2, xmm2, xmm12
  11257. vpxor xmm3, xmm1, xmm10
  11258. vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
  11259. vpclmulqdq xmm11, xmm2, xmm0, 16
  11260. vaesenc xmm8, xmm8, [rax+128]
  11261. vpshufd xmm10, xmm2, 78
  11262. vpxor xmm10, xmm10, xmm11
  11263. vpclmulqdq xmm11, xmm10, xmm0, 16
  11264. vaesenc xmm8, xmm8, [rax+144]
  11265. vpshufd xmm10, xmm10, 78
  11266. vpxor xmm10, xmm10, xmm11
  11267. vpxor xmm6, xmm10, xmm3
  11268. cmp r8d, 11
  11269. vmovdqa xmm9, OWORD PTR [rax+160]
  11270. jl L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last
  11271. vaesenc xmm8, xmm8, xmm9
  11272. vaesenc xmm8, xmm8, [rax+176]
  11273. cmp r8d, 13
  11274. vmovdqa xmm9, OWORD PTR [rax+192]
  11275. jl L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last
  11276. vaesenc xmm8, xmm8, xmm9
  11277. vaesenc xmm8, xmm8, [rax+208]
  11278. vmovdqa xmm9, OWORD PTR [rax+224]
  11279. L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last:
  11280. vaesenclast xmm8, xmm8, xmm9
  11281. vmovdqa xmm0, xmm13
  11282. vpxor xmm8, xmm8, xmm0
  11283. vmovdqu OWORD PTR [r10+rdi], xmm8
  11284. add edi, 16
  11285. cmp edi, r13d
  11286. jl L_AES_GCM_decrypt_update_avx1_last_block_start
  11287. L_AES_GCM_decrypt_update_avx1_last_block_done:
  11288. L_AES_GCM_decrypt_update_avx1_done_dec:
  11289. vmovdqa OWORD PTR [r12], xmm6
  11290. vzeroupper
  11291. vmovdqu xmm6, OWORD PTR [rsp+168]
  11292. vmovdqu xmm7, OWORD PTR [rsp+184]
  11293. vmovdqu xmm8, OWORD PTR [rsp+200]
  11294. vmovdqu xmm9, OWORD PTR [rsp+216]
  11295. vmovdqu xmm10, OWORD PTR [rsp+232]
  11296. vmovdqu xmm11, OWORD PTR [rsp+248]
  11297. vmovdqu xmm12, OWORD PTR [rsp+264]
  11298. vmovdqu xmm13, OWORD PTR [rsp+280]
  11299. vmovdqu xmm14, OWORD PTR [rsp+296]
  11300. vmovdqu xmm15, OWORD PTR [rsp+312]
  11301. add rsp, 328
  11302. pop rdi
  11303. pop r15
  11304. pop r14
  11305. pop r12
  11306. pop r13
  11307. ret
  11308. AES_GCM_decrypt_update_avx1 ENDP
  11309. _text ENDS
  11310. _text SEGMENT READONLY PARA
  11311. AES_GCM_decrypt_final_avx1 PROC
  11312. push r13
  11313. push r12
  11314. push r14
  11315. push rbp
  11316. push r15
  11317. mov rax, rcx
  11318. mov r10d, r9d
  11319. mov r9, rdx
  11320. mov r11d, DWORD PTR [rsp+80]
  11321. mov r12, QWORD PTR [rsp+88]
  11322. mov r14, QWORD PTR [rsp+96]
  11323. mov rbp, QWORD PTR [rsp+104]
  11324. sub rsp, 160
  11325. vmovdqu OWORD PTR [rsp+16], xmm6
  11326. vmovdqu OWORD PTR [rsp+32], xmm7
  11327. vmovdqu OWORD PTR [rsp+48], xmm8
  11328. vmovdqu OWORD PTR [rsp+64], xmm9
  11329. vmovdqu OWORD PTR [rsp+80], xmm10
  11330. vmovdqu OWORD PTR [rsp+96], xmm11
  11331. vmovdqu OWORD PTR [rsp+112], xmm12
  11332. vmovdqu OWORD PTR [rsp+128], xmm13
  11333. vmovdqu OWORD PTR [rsp+144], xmm15
  11334. vmovdqa xmm6, OWORD PTR [rax]
  11335. vmovdqa xmm5, OWORD PTR [r12]
  11336. vmovdqa xmm15, OWORD PTR [r14]
  11337. vpsrlq xmm8, xmm5, 63
  11338. vpsllq xmm7, xmm5, 1
  11339. vpslldq xmm8, xmm8, 8
  11340. vpor xmm7, xmm7, xmm8
  11341. vpshufd xmm5, xmm5, 255
  11342. vpsrad xmm5, xmm5, 31
  11343. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  11344. vpxor xmm5, xmm5, xmm7
  11345. mov edx, r10d
  11346. mov ecx, r11d
  11347. shl rdx, 3
  11348. shl rcx, 3
  11349. vmovq xmm0, rdx
  11350. vmovq xmm1, rcx
  11351. vpunpcklqdq xmm0, xmm0, xmm1
  11352. vpxor xmm6, xmm6, xmm0
  11353. ; ghash_gfmul_red_avx
  11354. vpshufd xmm8, xmm5, 78
  11355. vpshufd xmm9, xmm6, 78
  11356. vpclmulqdq xmm10, xmm6, xmm5, 17
  11357. vpclmulqdq xmm7, xmm6, xmm5, 0
  11358. vpxor xmm8, xmm8, xmm5
  11359. vpxor xmm9, xmm9, xmm6
  11360. vpclmulqdq xmm8, xmm8, xmm9, 0
  11361. vpxor xmm8, xmm8, xmm7
  11362. vpxor xmm8, xmm8, xmm10
  11363. vpslldq xmm9, xmm8, 8
  11364. vpsrldq xmm8, xmm8, 8
  11365. vpxor xmm7, xmm7, xmm9
  11366. vpxor xmm6, xmm10, xmm8
  11367. vpslld xmm11, xmm7, 31
  11368. vpslld xmm12, xmm7, 30
  11369. vpslld xmm13, xmm7, 25
  11370. vpxor xmm11, xmm11, xmm12
  11371. vpxor xmm11, xmm11, xmm13
  11372. vpsrldq xmm12, xmm11, 4
  11373. vpslldq xmm11, xmm11, 12
  11374. vpxor xmm7, xmm7, xmm11
  11375. vpsrld xmm13, xmm7, 1
  11376. vpsrld xmm9, xmm7, 2
  11377. vpsrld xmm8, xmm7, 7
  11378. vpxor xmm13, xmm13, xmm9
  11379. vpxor xmm13, xmm13, xmm8
  11380. vpxor xmm13, xmm13, xmm12
  11381. vpxor xmm13, xmm13, xmm7
  11382. vpxor xmm6, xmm6, xmm13
  11383. vpshufb xmm6, xmm6, OWORD PTR L_avx1_aes_gcm_bswap_mask
  11384. vpxor xmm0, xmm6, xmm15
  11385. cmp r8d, 16
  11386. je L_AES_GCM_decrypt_final_avx1_cmp_tag_16
  11387. sub rsp, 16
  11388. xor rcx, rcx
  11389. xor r15, r15
  11390. vmovdqu OWORD PTR [rsp], xmm0
  11391. L_AES_GCM_decrypt_final_avx1_cmp_tag_loop:
  11392. movzx r13d, BYTE PTR [rsp+rcx]
  11393. xor r13b, BYTE PTR [r9+rcx]
  11394. or r15b, r13b
  11395. inc ecx
  11396. cmp ecx, r8d
  11397. jne L_AES_GCM_decrypt_final_avx1_cmp_tag_loop
  11398. cmp r15, 0
  11399. sete r15b
  11400. add rsp, 16
  11401. xor rcx, rcx
  11402. jmp L_AES_GCM_decrypt_final_avx1_cmp_tag_done
  11403. L_AES_GCM_decrypt_final_avx1_cmp_tag_16:
  11404. vmovdqu xmm1, OWORD PTR [r9]
  11405. vpcmpeqb xmm0, xmm0, xmm1
  11406. vpmovmskb rdx, xmm0
  11407. ; %%edx == 0xFFFF then return 1 else => return 0
  11408. xor r15d, r15d
  11409. cmp edx, 65535
  11410. sete r15b
  11411. L_AES_GCM_decrypt_final_avx1_cmp_tag_done:
  11412. mov DWORD PTR [rbp], r15d
  11413. vzeroupper
  11414. vmovdqu xmm6, OWORD PTR [rsp+16]
  11415. vmovdqu xmm7, OWORD PTR [rsp+32]
  11416. vmovdqu xmm8, OWORD PTR [rsp+48]
  11417. vmovdqu xmm9, OWORD PTR [rsp+64]
  11418. vmovdqu xmm10, OWORD PTR [rsp+80]
  11419. vmovdqu xmm11, OWORD PTR [rsp+96]
  11420. vmovdqu xmm12, OWORD PTR [rsp+112]
  11421. vmovdqu xmm13, OWORD PTR [rsp+128]
  11422. vmovdqu xmm15, OWORD PTR [rsp+144]
  11423. add rsp, 160
  11424. pop r15
  11425. pop rbp
  11426. pop r14
  11427. pop r12
  11428. pop r13
  11429. ret
  11430. AES_GCM_decrypt_final_avx1 ENDP
  11431. _text ENDS
  11432. ENDIF
  11433. IFDEF HAVE_INTEL_AVX2
  11434. _DATA SEGMENT
  11435. ALIGN 16
  11436. L_avx2_aes_gcm_one QWORD 0, 1
  11437. ptr_L_avx2_aes_gcm_one QWORD L_avx2_aes_gcm_one
  11438. _DATA ENDS
  11439. _DATA SEGMENT
  11440. ALIGN 16
  11441. L_avx2_aes_gcm_two QWORD 0, 2
  11442. ptr_L_avx2_aes_gcm_two QWORD L_avx2_aes_gcm_two
  11443. _DATA ENDS
  11444. _DATA SEGMENT
  11445. ALIGN 16
  11446. L_avx2_aes_gcm_three QWORD 0, 3
  11447. ptr_L_avx2_aes_gcm_three QWORD L_avx2_aes_gcm_three
  11448. _DATA ENDS
  11449. _DATA SEGMENT
  11450. ALIGN 16
  11451. L_avx2_aes_gcm_four QWORD 0, 4
  11452. ptr_L_avx2_aes_gcm_four QWORD L_avx2_aes_gcm_four
  11453. _DATA ENDS
  11454. _DATA SEGMENT
  11455. ALIGN 16
  11456. L_avx2_aes_gcm_five QWORD 0, 5
  11457. ptr_L_avx2_aes_gcm_five QWORD L_avx2_aes_gcm_five
  11458. _DATA ENDS
  11459. _DATA SEGMENT
  11460. ALIGN 16
  11461. L_avx2_aes_gcm_six QWORD 0, 6
  11462. ptr_L_avx2_aes_gcm_six QWORD L_avx2_aes_gcm_six
  11463. _DATA ENDS
  11464. _DATA SEGMENT
  11465. ALIGN 16
  11466. L_avx2_aes_gcm_seven QWORD 0, 7
  11467. ptr_L_avx2_aes_gcm_seven QWORD L_avx2_aes_gcm_seven
  11468. _DATA ENDS
  11469. _DATA SEGMENT
  11470. ALIGN 16
  11471. L_avx2_aes_gcm_eight QWORD 0, 8
  11472. ptr_L_avx2_aes_gcm_eight QWORD L_avx2_aes_gcm_eight
  11473. _DATA ENDS
  11474. _DATA SEGMENT
  11475. ALIGN 16
  11476. L_avx2_aes_gcm_bswap_one QWORD 0, 72057594037927936
  11477. ptr_L_avx2_aes_gcm_bswap_one QWORD L_avx2_aes_gcm_bswap_one
  11478. _DATA ENDS
  11479. _DATA SEGMENT
  11480. ALIGN 16
  11481. L_avx2_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567
  11482. ptr_L_avx2_aes_gcm_bswap_epi64 QWORD L_avx2_aes_gcm_bswap_epi64
  11483. _DATA ENDS
  11484. _DATA SEGMENT
  11485. ALIGN 16
  11486. L_avx2_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183
  11487. ptr_L_avx2_aes_gcm_bswap_mask QWORD L_avx2_aes_gcm_bswap_mask
  11488. _DATA ENDS
  11489. _DATA SEGMENT
  11490. ALIGN 16
  11491. L_avx2_aes_gcm_mod2_128 QWORD 1, 13979173243358019584
  11492. ptr_L_avx2_aes_gcm_mod2_128 QWORD L_avx2_aes_gcm_mod2_128
  11493. _DATA ENDS
  11494. _text SEGMENT READONLY PARA
  11495. AES_GCM_encrypt_avx2 PROC
  11496. push r13
  11497. push rdi
  11498. push r12
  11499. push r15
  11500. push rbx
  11501. push r14
  11502. push rsi
  11503. mov rdi, rcx
  11504. mov r12, r8
  11505. mov rax, r9
  11506. mov r15, QWORD PTR [rsp+96]
  11507. mov r8, rdx
  11508. mov r10d, DWORD PTR [rsp+104]
  11509. mov r11d, DWORD PTR [rsp+112]
  11510. mov ebx, DWORD PTR [rsp+120]
  11511. mov r14d, DWORD PTR [rsp+128]
  11512. mov rsi, QWORD PTR [rsp+136]
  11513. mov r9d, DWORD PTR [rsp+144]
  11514. sub rsp, 320
  11515. vmovdqu OWORD PTR [rsp+160], xmm6
  11516. vmovdqu OWORD PTR [rsp+176], xmm7
  11517. vmovdqu OWORD PTR [rsp+192], xmm8
  11518. vmovdqu OWORD PTR [rsp+208], xmm9
  11519. vmovdqu OWORD PTR [rsp+224], xmm10
  11520. vmovdqu OWORD PTR [rsp+240], xmm11
  11521. vmovdqu OWORD PTR [rsp+256], xmm12
  11522. vmovdqu OWORD PTR [rsp+272], xmm13
  11523. vmovdqu OWORD PTR [rsp+288], xmm14
  11524. vmovdqu OWORD PTR [rsp+304], xmm15
  11525. vpxor xmm4, xmm4, xmm4
  11526. vpxor xmm6, xmm6, xmm6
  11527. mov edx, ebx
  11528. cmp edx, 12
  11529. je L_AES_GCM_encrypt_avx2_iv_12
  11530. ; Calculate values when IV is not 12 bytes
  11531. ; H = Encrypt X(=0)
  11532. vmovdqu xmm5, OWORD PTR [rsi]
  11533. vaesenc xmm5, xmm5, [rsi+16]
  11534. vaesenc xmm5, xmm5, [rsi+32]
  11535. vaesenc xmm5, xmm5, [rsi+48]
  11536. vaesenc xmm5, xmm5, [rsi+64]
  11537. vaesenc xmm5, xmm5, [rsi+80]
  11538. vaesenc xmm5, xmm5, [rsi+96]
  11539. vaesenc xmm5, xmm5, [rsi+112]
  11540. vaesenc xmm5, xmm5, [rsi+128]
  11541. vaesenc xmm5, xmm5, [rsi+144]
  11542. cmp r9d, 11
  11543. vmovdqu xmm0, OWORD PTR [rsi+160]
  11544. jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last
  11545. vaesenc xmm5, xmm5, xmm0
  11546. vaesenc xmm5, xmm5, [rsi+176]
  11547. cmp r9d, 13
  11548. vmovdqu xmm0, OWORD PTR [rsi+192]
  11549. jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last
  11550. vaesenc xmm5, xmm5, xmm0
  11551. vaesenc xmm5, xmm5, [rsi+208]
  11552. vmovdqu xmm0, OWORD PTR [rsi+224]
  11553. L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last:
  11554. vaesenclast xmm5, xmm5, xmm0
  11555. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11556. ; Calc counter
  11557. ; Initialization vector
  11558. cmp edx, 0
  11559. mov rcx, 0
  11560. je L_AES_GCM_encrypt_avx2_calc_iv_done
  11561. cmp edx, 16
  11562. jl L_AES_GCM_encrypt_avx2_calc_iv_lt16
  11563. and edx, 4294967280
  11564. L_AES_GCM_encrypt_avx2_calc_iv_16_loop:
  11565. vmovdqu xmm0, OWORD PTR [rax+rcx]
  11566. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11567. vpxor xmm4, xmm4, xmm0
  11568. ; ghash_gfmul_avx
  11569. vpclmulqdq xmm2, xmm5, xmm4, 16
  11570. vpclmulqdq xmm1, xmm5, xmm4, 1
  11571. vpclmulqdq xmm0, xmm5, xmm4, 0
  11572. vpclmulqdq xmm3, xmm5, xmm4, 17
  11573. vpxor xmm2, xmm2, xmm1
  11574. vpslldq xmm1, xmm2, 8
  11575. vpsrldq xmm2, xmm2, 8
  11576. vpxor xmm7, xmm0, xmm1
  11577. vpxor xmm4, xmm3, xmm2
  11578. ; ghash_mid
  11579. vpsrld xmm0, xmm7, 31
  11580. vpsrld xmm1, xmm4, 31
  11581. vpslld xmm7, xmm7, 1
  11582. vpslld xmm4, xmm4, 1
  11583. vpsrldq xmm2, xmm0, 12
  11584. vpslldq xmm0, xmm0, 4
  11585. vpslldq xmm1, xmm1, 4
  11586. vpor xmm4, xmm4, xmm2
  11587. vpor xmm7, xmm7, xmm0
  11588. vpor xmm4, xmm4, xmm1
  11589. ; ghash_red
  11590. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  11591. vpclmulqdq xmm0, xmm7, xmm2, 16
  11592. vpshufd xmm1, xmm7, 78
  11593. vpxor xmm1, xmm1, xmm0
  11594. vpclmulqdq xmm0, xmm1, xmm2, 16
  11595. vpshufd xmm1, xmm1, 78
  11596. vpxor xmm1, xmm1, xmm0
  11597. vpxor xmm4, xmm4, xmm1
  11598. add ecx, 16
  11599. cmp ecx, edx
  11600. jl L_AES_GCM_encrypt_avx2_calc_iv_16_loop
  11601. mov edx, ebx
  11602. cmp ecx, edx
  11603. je L_AES_GCM_encrypt_avx2_calc_iv_done
  11604. L_AES_GCM_encrypt_avx2_calc_iv_lt16:
  11605. vpxor xmm0, xmm0, xmm0
  11606. xor ebx, ebx
  11607. vmovdqu OWORD PTR [rsp], xmm0
  11608. L_AES_GCM_encrypt_avx2_calc_iv_loop:
  11609. movzx r13d, BYTE PTR [rax+rcx]
  11610. mov BYTE PTR [rsp+rbx], r13b
  11611. inc ecx
  11612. inc ebx
  11613. cmp ecx, edx
  11614. jl L_AES_GCM_encrypt_avx2_calc_iv_loop
  11615. vmovdqu xmm0, OWORD PTR [rsp]
  11616. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11617. vpxor xmm4, xmm4, xmm0
  11618. ; ghash_gfmul_avx
  11619. vpclmulqdq xmm2, xmm5, xmm4, 16
  11620. vpclmulqdq xmm1, xmm5, xmm4, 1
  11621. vpclmulqdq xmm0, xmm5, xmm4, 0
  11622. vpclmulqdq xmm3, xmm5, xmm4, 17
  11623. vpxor xmm2, xmm2, xmm1
  11624. vpslldq xmm1, xmm2, 8
  11625. vpsrldq xmm2, xmm2, 8
  11626. vpxor xmm7, xmm0, xmm1
  11627. vpxor xmm4, xmm3, xmm2
  11628. ; ghash_mid
  11629. vpsrld xmm0, xmm7, 31
  11630. vpsrld xmm1, xmm4, 31
  11631. vpslld xmm7, xmm7, 1
  11632. vpslld xmm4, xmm4, 1
  11633. vpsrldq xmm2, xmm0, 12
  11634. vpslldq xmm0, xmm0, 4
  11635. vpslldq xmm1, xmm1, 4
  11636. vpor xmm4, xmm4, xmm2
  11637. vpor xmm7, xmm7, xmm0
  11638. vpor xmm4, xmm4, xmm1
  11639. ; ghash_red
  11640. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  11641. vpclmulqdq xmm0, xmm7, xmm2, 16
  11642. vpshufd xmm1, xmm7, 78
  11643. vpxor xmm1, xmm1, xmm0
  11644. vpclmulqdq xmm0, xmm1, xmm2, 16
  11645. vpshufd xmm1, xmm1, 78
  11646. vpxor xmm1, xmm1, xmm0
  11647. vpxor xmm4, xmm4, xmm1
  11648. L_AES_GCM_encrypt_avx2_calc_iv_done:
  11649. ; T = Encrypt counter
  11650. vpxor xmm0, xmm0, xmm0
  11651. shl edx, 3
  11652. vmovq xmm0, rdx
  11653. vpxor xmm4, xmm4, xmm0
  11654. ; ghash_gfmul_avx
  11655. vpclmulqdq xmm2, xmm5, xmm4, 16
  11656. vpclmulqdq xmm1, xmm5, xmm4, 1
  11657. vpclmulqdq xmm0, xmm5, xmm4, 0
  11658. vpclmulqdq xmm3, xmm5, xmm4, 17
  11659. vpxor xmm2, xmm2, xmm1
  11660. vpslldq xmm1, xmm2, 8
  11661. vpsrldq xmm2, xmm2, 8
  11662. vpxor xmm7, xmm0, xmm1
  11663. vpxor xmm4, xmm3, xmm2
  11664. ; ghash_mid
  11665. vpsrld xmm0, xmm7, 31
  11666. vpsrld xmm1, xmm4, 31
  11667. vpslld xmm7, xmm7, 1
  11668. vpslld xmm4, xmm4, 1
  11669. vpsrldq xmm2, xmm0, 12
  11670. vpslldq xmm0, xmm0, 4
  11671. vpslldq xmm1, xmm1, 4
  11672. vpor xmm4, xmm4, xmm2
  11673. vpor xmm7, xmm7, xmm0
  11674. vpor xmm4, xmm4, xmm1
  11675. ; ghash_red
  11676. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  11677. vpclmulqdq xmm0, xmm7, xmm2, 16
  11678. vpshufd xmm1, xmm7, 78
  11679. vpxor xmm1, xmm1, xmm0
  11680. vpclmulqdq xmm0, xmm1, xmm2, 16
  11681. vpshufd xmm1, xmm1, 78
  11682. vpxor xmm1, xmm1, xmm0
  11683. vpxor xmm4, xmm4, xmm1
  11684. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11685. ; Encrypt counter
  11686. vmovdqu xmm15, OWORD PTR [rsi]
  11687. vpxor xmm15, xmm15, xmm4
  11688. vaesenc xmm15, xmm15, [rsi+16]
  11689. vaesenc xmm15, xmm15, [rsi+32]
  11690. vaesenc xmm15, xmm15, [rsi+48]
  11691. vaesenc xmm15, xmm15, [rsi+64]
  11692. vaesenc xmm15, xmm15, [rsi+80]
  11693. vaesenc xmm15, xmm15, [rsi+96]
  11694. vaesenc xmm15, xmm15, [rsi+112]
  11695. vaesenc xmm15, xmm15, [rsi+128]
  11696. vaesenc xmm15, xmm15, [rsi+144]
  11697. cmp r9d, 11
  11698. vmovdqu xmm0, OWORD PTR [rsi+160]
  11699. jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last
  11700. vaesenc xmm15, xmm15, xmm0
  11701. vaesenc xmm15, xmm15, [rsi+176]
  11702. cmp r9d, 13
  11703. vmovdqu xmm0, OWORD PTR [rsi+192]
  11704. jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last
  11705. vaesenc xmm15, xmm15, xmm0
  11706. vaesenc xmm15, xmm15, [rsi+208]
  11707. vmovdqu xmm0, OWORD PTR [rsi+224]
  11708. L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last:
  11709. vaesenclast xmm15, xmm15, xmm0
  11710. jmp L_AES_GCM_encrypt_avx2_iv_done
  11711. L_AES_GCM_encrypt_avx2_iv_12:
  11712. ; # Calculate values when IV is 12 bytes
  11713. ; Set counter based on IV
  11714. vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_one
  11715. vmovdqu xmm5, OWORD PTR [rsi]
  11716. vpblendd xmm4, xmm4, [rax], 7
  11717. ; H = Encrypt X(=0) and T = Encrypt counter
  11718. vmovdqu xmm7, OWORD PTR [rsi+16]
  11719. vpxor xmm15, xmm4, xmm5
  11720. vaesenc xmm5, xmm5, xmm7
  11721. vaesenc xmm15, xmm15, xmm7
  11722. vmovdqu xmm0, OWORD PTR [rsi+32]
  11723. vaesenc xmm5, xmm5, xmm0
  11724. vaesenc xmm15, xmm15, xmm0
  11725. vmovdqu xmm0, OWORD PTR [rsi+48]
  11726. vaesenc xmm5, xmm5, xmm0
  11727. vaesenc xmm15, xmm15, xmm0
  11728. vmovdqu xmm0, OWORD PTR [rsi+64]
  11729. vaesenc xmm5, xmm5, xmm0
  11730. vaesenc xmm15, xmm15, xmm0
  11731. vmovdqu xmm0, OWORD PTR [rsi+80]
  11732. vaesenc xmm5, xmm5, xmm0
  11733. vaesenc xmm15, xmm15, xmm0
  11734. vmovdqu xmm0, OWORD PTR [rsi+96]
  11735. vaesenc xmm5, xmm5, xmm0
  11736. vaesenc xmm15, xmm15, xmm0
  11737. vmovdqu xmm0, OWORD PTR [rsi+112]
  11738. vaesenc xmm5, xmm5, xmm0
  11739. vaesenc xmm15, xmm15, xmm0
  11740. vmovdqu xmm0, OWORD PTR [rsi+128]
  11741. vaesenc xmm5, xmm5, xmm0
  11742. vaesenc xmm15, xmm15, xmm0
  11743. vmovdqu xmm0, OWORD PTR [rsi+144]
  11744. vaesenc xmm5, xmm5, xmm0
  11745. vaesenc xmm15, xmm15, xmm0
  11746. cmp r9d, 11
  11747. vmovdqu xmm0, OWORD PTR [rsi+160]
  11748. jl L_AES_GCM_encrypt_avx2_calc_iv_12_last
  11749. vaesenc xmm5, xmm5, xmm0
  11750. vaesenc xmm15, xmm15, xmm0
  11751. vmovdqu xmm0, OWORD PTR [rsi+176]
  11752. vaesenc xmm5, xmm5, xmm0
  11753. vaesenc xmm15, xmm15, xmm0
  11754. cmp r9d, 13
  11755. vmovdqu xmm0, OWORD PTR [rsi+192]
  11756. jl L_AES_GCM_encrypt_avx2_calc_iv_12_last
  11757. vaesenc xmm5, xmm5, xmm0
  11758. vaesenc xmm15, xmm15, xmm0
  11759. vmovdqu xmm0, OWORD PTR [rsi+208]
  11760. vaesenc xmm5, xmm5, xmm0
  11761. vaesenc xmm15, xmm15, xmm0
  11762. vmovdqu xmm0, OWORD PTR [rsi+224]
  11763. L_AES_GCM_encrypt_avx2_calc_iv_12_last:
  11764. vaesenclast xmm5, xmm5, xmm0
  11765. vaesenclast xmm15, xmm15, xmm0
  11766. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11767. L_AES_GCM_encrypt_avx2_iv_done:
  11768. ; Additional authentication data
  11769. mov edx, r11d
  11770. cmp edx, 0
  11771. je L_AES_GCM_encrypt_avx2_calc_aad_done
  11772. xor ecx, ecx
  11773. cmp edx, 16
  11774. jl L_AES_GCM_encrypt_avx2_calc_aad_lt16
  11775. and edx, 4294967280
  11776. L_AES_GCM_encrypt_avx2_calc_aad_16_loop:
  11777. vmovdqu xmm0, OWORD PTR [r12+rcx]
  11778. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11779. vpxor xmm6, xmm6, xmm0
  11780. ; ghash_gfmul_avx
  11781. vpclmulqdq xmm2, xmm5, xmm6, 16
  11782. vpclmulqdq xmm1, xmm5, xmm6, 1
  11783. vpclmulqdq xmm0, xmm5, xmm6, 0
  11784. vpclmulqdq xmm3, xmm5, xmm6, 17
  11785. vpxor xmm2, xmm2, xmm1
  11786. vpslldq xmm1, xmm2, 8
  11787. vpsrldq xmm2, xmm2, 8
  11788. vpxor xmm7, xmm0, xmm1
  11789. vpxor xmm6, xmm3, xmm2
  11790. ; ghash_mid
  11791. vpsrld xmm0, xmm7, 31
  11792. vpsrld xmm1, xmm6, 31
  11793. vpslld xmm7, xmm7, 1
  11794. vpslld xmm6, xmm6, 1
  11795. vpsrldq xmm2, xmm0, 12
  11796. vpslldq xmm0, xmm0, 4
  11797. vpslldq xmm1, xmm1, 4
  11798. vpor xmm6, xmm6, xmm2
  11799. vpor xmm7, xmm7, xmm0
  11800. vpor xmm6, xmm6, xmm1
  11801. ; ghash_red
  11802. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  11803. vpclmulqdq xmm0, xmm7, xmm2, 16
  11804. vpshufd xmm1, xmm7, 78
  11805. vpxor xmm1, xmm1, xmm0
  11806. vpclmulqdq xmm0, xmm1, xmm2, 16
  11807. vpshufd xmm1, xmm1, 78
  11808. vpxor xmm1, xmm1, xmm0
  11809. vpxor xmm6, xmm6, xmm1
  11810. add ecx, 16
  11811. cmp ecx, edx
  11812. jl L_AES_GCM_encrypt_avx2_calc_aad_16_loop
  11813. mov edx, r11d
  11814. cmp ecx, edx
  11815. je L_AES_GCM_encrypt_avx2_calc_aad_done
  11816. L_AES_GCM_encrypt_avx2_calc_aad_lt16:
  11817. vpxor xmm0, xmm0, xmm0
  11818. xor ebx, ebx
  11819. vmovdqu OWORD PTR [rsp], xmm0
  11820. L_AES_GCM_encrypt_avx2_calc_aad_loop:
  11821. movzx r13d, BYTE PTR [r12+rcx]
  11822. mov BYTE PTR [rsp+rbx], r13b
  11823. inc ecx
  11824. inc ebx
  11825. cmp ecx, edx
  11826. jl L_AES_GCM_encrypt_avx2_calc_aad_loop
  11827. vmovdqu xmm0, OWORD PTR [rsp]
  11828. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11829. vpxor xmm6, xmm6, xmm0
  11830. ; ghash_gfmul_avx
  11831. vpclmulqdq xmm2, xmm5, xmm6, 16
  11832. vpclmulqdq xmm1, xmm5, xmm6, 1
  11833. vpclmulqdq xmm0, xmm5, xmm6, 0
  11834. vpclmulqdq xmm3, xmm5, xmm6, 17
  11835. vpxor xmm2, xmm2, xmm1
  11836. vpslldq xmm1, xmm2, 8
  11837. vpsrldq xmm2, xmm2, 8
  11838. vpxor xmm7, xmm0, xmm1
  11839. vpxor xmm6, xmm3, xmm2
  11840. ; ghash_mid
  11841. vpsrld xmm0, xmm7, 31
  11842. vpsrld xmm1, xmm6, 31
  11843. vpslld xmm7, xmm7, 1
  11844. vpslld xmm6, xmm6, 1
  11845. vpsrldq xmm2, xmm0, 12
  11846. vpslldq xmm0, xmm0, 4
  11847. vpslldq xmm1, xmm1, 4
  11848. vpor xmm6, xmm6, xmm2
  11849. vpor xmm7, xmm7, xmm0
  11850. vpor xmm6, xmm6, xmm1
  11851. ; ghash_red
  11852. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  11853. vpclmulqdq xmm0, xmm7, xmm2, 16
  11854. vpshufd xmm1, xmm7, 78
  11855. vpxor xmm1, xmm1, xmm0
  11856. vpclmulqdq xmm0, xmm1, xmm2, 16
  11857. vpshufd xmm1, xmm1, 78
  11858. vpxor xmm1, xmm1, xmm0
  11859. vpxor xmm6, xmm6, xmm1
  11860. L_AES_GCM_encrypt_avx2_calc_aad_done:
  11861. ; Calculate counter and H
  11862. vpsrlq xmm1, xmm5, 63
  11863. vpsllq xmm0, xmm5, 1
  11864. vpslldq xmm1, xmm1, 8
  11865. vpor xmm0, xmm0, xmm1
  11866. vpshufd xmm5, xmm5, 255
  11867. vpsrad xmm5, xmm5, 31
  11868. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  11869. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  11870. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  11871. vpxor xmm5, xmm5, xmm0
  11872. xor ebx, ebx
  11873. cmp r10d, 128
  11874. mov r13d, r10d
  11875. jl L_AES_GCM_encrypt_avx2_done_128
  11876. and r13d, 4294967168
  11877. vmovdqu OWORD PTR [rsp+128], xmm4
  11878. vmovdqu OWORD PTR [rsp+144], xmm15
  11879. vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
  11880. ; H ^ 1 and H ^ 2
  11881. vpclmulqdq xmm9, xmm5, xmm5, 0
  11882. vpclmulqdq xmm10, xmm5, xmm5, 17
  11883. vpclmulqdq xmm8, xmm9, xmm3, 16
  11884. vpshufd xmm9, xmm9, 78
  11885. vpxor xmm9, xmm9, xmm8
  11886. vpclmulqdq xmm8, xmm9, xmm3, 16
  11887. vpshufd xmm9, xmm9, 78
  11888. vpxor xmm9, xmm9, xmm8
  11889. vpxor xmm0, xmm10, xmm9
  11890. vmovdqu OWORD PTR [rsp], xmm5
  11891. vmovdqu OWORD PTR [rsp+16], xmm0
  11892. ; H ^ 3 and H ^ 4
  11893. vpclmulqdq xmm11, xmm0, xmm5, 16
  11894. vpclmulqdq xmm10, xmm0, xmm5, 1
  11895. vpclmulqdq xmm9, xmm0, xmm5, 0
  11896. vpclmulqdq xmm12, xmm0, xmm5, 17
  11897. vpclmulqdq xmm13, xmm0, xmm0, 0
  11898. vpclmulqdq xmm14, xmm0, xmm0, 17
  11899. vpxor xmm11, xmm11, xmm10
  11900. vpslldq xmm10, xmm11, 8
  11901. vpsrldq xmm11, xmm11, 8
  11902. vpxor xmm10, xmm10, xmm9
  11903. vpclmulqdq xmm8, xmm13, xmm3, 16
  11904. vpclmulqdq xmm9, xmm10, xmm3, 16
  11905. vpshufd xmm10, xmm10, 78
  11906. vpshufd xmm13, xmm13, 78
  11907. vpxor xmm10, xmm10, xmm9
  11908. vpxor xmm13, xmm13, xmm8
  11909. vpclmulqdq xmm9, xmm10, xmm3, 16
  11910. vpclmulqdq xmm8, xmm13, xmm3, 16
  11911. vpshufd xmm10, xmm10, 78
  11912. vpshufd xmm13, xmm13, 78
  11913. vpxor xmm12, xmm12, xmm11
  11914. vpxor xmm13, xmm13, xmm8
  11915. vpxor xmm10, xmm10, xmm12
  11916. vpxor xmm2, xmm13, xmm14
  11917. vpxor xmm1, xmm10, xmm9
  11918. vmovdqu OWORD PTR [rsp+32], xmm1
  11919. vmovdqu OWORD PTR [rsp+48], xmm2
  11920. ; H ^ 5 and H ^ 6
  11921. vpclmulqdq xmm11, xmm1, xmm0, 16
  11922. vpclmulqdq xmm10, xmm1, xmm0, 1
  11923. vpclmulqdq xmm9, xmm1, xmm0, 0
  11924. vpclmulqdq xmm12, xmm1, xmm0, 17
  11925. vpclmulqdq xmm13, xmm1, xmm1, 0
  11926. vpclmulqdq xmm14, xmm1, xmm1, 17
  11927. vpxor xmm11, xmm11, xmm10
  11928. vpslldq xmm10, xmm11, 8
  11929. vpsrldq xmm11, xmm11, 8
  11930. vpxor xmm10, xmm10, xmm9
  11931. vpclmulqdq xmm8, xmm13, xmm3, 16
  11932. vpclmulqdq xmm9, xmm10, xmm3, 16
  11933. vpshufd xmm10, xmm10, 78
  11934. vpshufd xmm13, xmm13, 78
  11935. vpxor xmm10, xmm10, xmm9
  11936. vpxor xmm13, xmm13, xmm8
  11937. vpclmulqdq xmm9, xmm10, xmm3, 16
  11938. vpclmulqdq xmm8, xmm13, xmm3, 16
  11939. vpshufd xmm10, xmm10, 78
  11940. vpshufd xmm13, xmm13, 78
  11941. vpxor xmm12, xmm12, xmm11
  11942. vpxor xmm13, xmm13, xmm8
  11943. vpxor xmm10, xmm10, xmm12
  11944. vpxor xmm0, xmm13, xmm14
  11945. vpxor xmm7, xmm10, xmm9
  11946. vmovdqu OWORD PTR [rsp+64], xmm7
  11947. vmovdqu OWORD PTR [rsp+80], xmm0
  11948. ; H ^ 7 and H ^ 8
  11949. vpclmulqdq xmm11, xmm2, xmm1, 16
  11950. vpclmulqdq xmm10, xmm2, xmm1, 1
  11951. vpclmulqdq xmm9, xmm2, xmm1, 0
  11952. vpclmulqdq xmm12, xmm2, xmm1, 17
  11953. vpclmulqdq xmm13, xmm2, xmm2, 0
  11954. vpclmulqdq xmm14, xmm2, xmm2, 17
  11955. vpxor xmm11, xmm11, xmm10
  11956. vpslldq xmm10, xmm11, 8
  11957. vpsrldq xmm11, xmm11, 8
  11958. vpxor xmm10, xmm10, xmm9
  11959. vpclmulqdq xmm8, xmm13, xmm3, 16
  11960. vpclmulqdq xmm9, xmm10, xmm3, 16
  11961. vpshufd xmm10, xmm10, 78
  11962. vpshufd xmm13, xmm13, 78
  11963. vpxor xmm10, xmm10, xmm9
  11964. vpxor xmm13, xmm13, xmm8
  11965. vpclmulqdq xmm9, xmm10, xmm3, 16
  11966. vpclmulqdq xmm8, xmm13, xmm3, 16
  11967. vpshufd xmm10, xmm10, 78
  11968. vpshufd xmm13, xmm13, 78
  11969. vpxor xmm12, xmm12, xmm11
  11970. vpxor xmm13, xmm13, xmm8
  11971. vpxor xmm10, xmm10, xmm12
  11972. vpxor xmm0, xmm13, xmm14
  11973. vpxor xmm7, xmm10, xmm9
  11974. vmovdqu OWORD PTR [rsp+96], xmm7
  11975. vmovdqu OWORD PTR [rsp+112], xmm0
  11976. ; First 128 bytes of input
  11977. ; aesenc_128
  11978. ; aesenc_ctr
  11979. vmovdqu xmm0, OWORD PTR [rsp+128]
  11980. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  11981. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  11982. vpshufb xmm8, xmm0, xmm1
  11983. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  11984. vpshufb xmm9, xmm9, xmm1
  11985. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  11986. vpshufb xmm10, xmm10, xmm1
  11987. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  11988. vpshufb xmm11, xmm11, xmm1
  11989. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  11990. vpshufb xmm12, xmm12, xmm1
  11991. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  11992. vpshufb xmm13, xmm13, xmm1
  11993. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  11994. vpshufb xmm14, xmm14, xmm1
  11995. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  11996. vpshufb xmm15, xmm15, xmm1
  11997. ; aesenc_xor
  11998. vmovdqu xmm7, OWORD PTR [rsi]
  11999. vmovdqu OWORD PTR [rsp+128], xmm0
  12000. vpxor xmm8, xmm8, xmm7
  12001. vpxor xmm9, xmm9, xmm7
  12002. vpxor xmm10, xmm10, xmm7
  12003. vpxor xmm11, xmm11, xmm7
  12004. vpxor xmm12, xmm12, xmm7
  12005. vpxor xmm13, xmm13, xmm7
  12006. vpxor xmm14, xmm14, xmm7
  12007. vpxor xmm15, xmm15, xmm7
  12008. vmovdqu xmm7, OWORD PTR [rsi+16]
  12009. vaesenc xmm8, xmm8, xmm7
  12010. vaesenc xmm9, xmm9, xmm7
  12011. vaesenc xmm10, xmm10, xmm7
  12012. vaesenc xmm11, xmm11, xmm7
  12013. vaesenc xmm12, xmm12, xmm7
  12014. vaesenc xmm13, xmm13, xmm7
  12015. vaesenc xmm14, xmm14, xmm7
  12016. vaesenc xmm15, xmm15, xmm7
  12017. vmovdqu xmm7, OWORD PTR [rsi+32]
  12018. vaesenc xmm8, xmm8, xmm7
  12019. vaesenc xmm9, xmm9, xmm7
  12020. vaesenc xmm10, xmm10, xmm7
  12021. vaesenc xmm11, xmm11, xmm7
  12022. vaesenc xmm12, xmm12, xmm7
  12023. vaesenc xmm13, xmm13, xmm7
  12024. vaesenc xmm14, xmm14, xmm7
  12025. vaesenc xmm15, xmm15, xmm7
  12026. vmovdqu xmm7, OWORD PTR [rsi+48]
  12027. vaesenc xmm8, xmm8, xmm7
  12028. vaesenc xmm9, xmm9, xmm7
  12029. vaesenc xmm10, xmm10, xmm7
  12030. vaesenc xmm11, xmm11, xmm7
  12031. vaesenc xmm12, xmm12, xmm7
  12032. vaesenc xmm13, xmm13, xmm7
  12033. vaesenc xmm14, xmm14, xmm7
  12034. vaesenc xmm15, xmm15, xmm7
  12035. vmovdqu xmm7, OWORD PTR [rsi+64]
  12036. vaesenc xmm8, xmm8, xmm7
  12037. vaesenc xmm9, xmm9, xmm7
  12038. vaesenc xmm10, xmm10, xmm7
  12039. vaesenc xmm11, xmm11, xmm7
  12040. vaesenc xmm12, xmm12, xmm7
  12041. vaesenc xmm13, xmm13, xmm7
  12042. vaesenc xmm14, xmm14, xmm7
  12043. vaesenc xmm15, xmm15, xmm7
  12044. vmovdqu xmm7, OWORD PTR [rsi+80]
  12045. vaesenc xmm8, xmm8, xmm7
  12046. vaesenc xmm9, xmm9, xmm7
  12047. vaesenc xmm10, xmm10, xmm7
  12048. vaesenc xmm11, xmm11, xmm7
  12049. vaesenc xmm12, xmm12, xmm7
  12050. vaesenc xmm13, xmm13, xmm7
  12051. vaesenc xmm14, xmm14, xmm7
  12052. vaesenc xmm15, xmm15, xmm7
  12053. vmovdqu xmm7, OWORD PTR [rsi+96]
  12054. vaesenc xmm8, xmm8, xmm7
  12055. vaesenc xmm9, xmm9, xmm7
  12056. vaesenc xmm10, xmm10, xmm7
  12057. vaesenc xmm11, xmm11, xmm7
  12058. vaesenc xmm12, xmm12, xmm7
  12059. vaesenc xmm13, xmm13, xmm7
  12060. vaesenc xmm14, xmm14, xmm7
  12061. vaesenc xmm15, xmm15, xmm7
  12062. vmovdqu xmm7, OWORD PTR [rsi+112]
  12063. vaesenc xmm8, xmm8, xmm7
  12064. vaesenc xmm9, xmm9, xmm7
  12065. vaesenc xmm10, xmm10, xmm7
  12066. vaesenc xmm11, xmm11, xmm7
  12067. vaesenc xmm12, xmm12, xmm7
  12068. vaesenc xmm13, xmm13, xmm7
  12069. vaesenc xmm14, xmm14, xmm7
  12070. vaesenc xmm15, xmm15, xmm7
  12071. vmovdqu xmm7, OWORD PTR [rsi+128]
  12072. vaesenc xmm8, xmm8, xmm7
  12073. vaesenc xmm9, xmm9, xmm7
  12074. vaesenc xmm10, xmm10, xmm7
  12075. vaesenc xmm11, xmm11, xmm7
  12076. vaesenc xmm12, xmm12, xmm7
  12077. vaesenc xmm13, xmm13, xmm7
  12078. vaesenc xmm14, xmm14, xmm7
  12079. vaesenc xmm15, xmm15, xmm7
  12080. vmovdqu xmm7, OWORD PTR [rsi+144]
  12081. vaesenc xmm8, xmm8, xmm7
  12082. vaesenc xmm9, xmm9, xmm7
  12083. vaesenc xmm10, xmm10, xmm7
  12084. vaesenc xmm11, xmm11, xmm7
  12085. vaesenc xmm12, xmm12, xmm7
  12086. vaesenc xmm13, xmm13, xmm7
  12087. vaesenc xmm14, xmm14, xmm7
  12088. vaesenc xmm15, xmm15, xmm7
  12089. cmp r9d, 11
  12090. vmovdqu xmm7, OWORD PTR [rsi+160]
  12091. jl L_AES_GCM_encrypt_avx2_aesenc_128_enc_done
  12092. vaesenc xmm8, xmm8, xmm7
  12093. vaesenc xmm9, xmm9, xmm7
  12094. vaesenc xmm10, xmm10, xmm7
  12095. vaesenc xmm11, xmm11, xmm7
  12096. vaesenc xmm12, xmm12, xmm7
  12097. vaesenc xmm13, xmm13, xmm7
  12098. vaesenc xmm14, xmm14, xmm7
  12099. vaesenc xmm15, xmm15, xmm7
  12100. vmovdqu xmm7, OWORD PTR [rsi+176]
  12101. vaesenc xmm8, xmm8, xmm7
  12102. vaesenc xmm9, xmm9, xmm7
  12103. vaesenc xmm10, xmm10, xmm7
  12104. vaesenc xmm11, xmm11, xmm7
  12105. vaesenc xmm12, xmm12, xmm7
  12106. vaesenc xmm13, xmm13, xmm7
  12107. vaesenc xmm14, xmm14, xmm7
  12108. vaesenc xmm15, xmm15, xmm7
  12109. cmp r9d, 13
  12110. vmovdqu xmm7, OWORD PTR [rsi+192]
  12111. jl L_AES_GCM_encrypt_avx2_aesenc_128_enc_done
  12112. vaesenc xmm8, xmm8, xmm7
  12113. vaesenc xmm9, xmm9, xmm7
  12114. vaesenc xmm10, xmm10, xmm7
  12115. vaesenc xmm11, xmm11, xmm7
  12116. vaesenc xmm12, xmm12, xmm7
  12117. vaesenc xmm13, xmm13, xmm7
  12118. vaesenc xmm14, xmm14, xmm7
  12119. vaesenc xmm15, xmm15, xmm7
  12120. vmovdqu xmm7, OWORD PTR [rsi+208]
  12121. vaesenc xmm8, xmm8, xmm7
  12122. vaesenc xmm9, xmm9, xmm7
  12123. vaesenc xmm10, xmm10, xmm7
  12124. vaesenc xmm11, xmm11, xmm7
  12125. vaesenc xmm12, xmm12, xmm7
  12126. vaesenc xmm13, xmm13, xmm7
  12127. vaesenc xmm14, xmm14, xmm7
  12128. vaesenc xmm15, xmm15, xmm7
  12129. vmovdqu xmm7, OWORD PTR [rsi+224]
  12130. L_AES_GCM_encrypt_avx2_aesenc_128_enc_done:
  12131. ; aesenc_last
  12132. vaesenclast xmm8, xmm8, xmm7
  12133. vaesenclast xmm9, xmm9, xmm7
  12134. vaesenclast xmm10, xmm10, xmm7
  12135. vaesenclast xmm11, xmm11, xmm7
  12136. vmovdqu xmm0, OWORD PTR [rdi]
  12137. vmovdqu xmm1, OWORD PTR [rdi+16]
  12138. vmovdqu xmm2, OWORD PTR [rdi+32]
  12139. vmovdqu xmm3, OWORD PTR [rdi+48]
  12140. vpxor xmm8, xmm8, xmm0
  12141. vpxor xmm9, xmm9, xmm1
  12142. vpxor xmm10, xmm10, xmm2
  12143. vpxor xmm11, xmm11, xmm3
  12144. vmovdqu OWORD PTR [r8], xmm8
  12145. vmovdqu OWORD PTR [r8+16], xmm9
  12146. vmovdqu OWORD PTR [r8+32], xmm10
  12147. vmovdqu OWORD PTR [r8+48], xmm11
  12148. vaesenclast xmm12, xmm12, xmm7
  12149. vaesenclast xmm13, xmm13, xmm7
  12150. vaesenclast xmm14, xmm14, xmm7
  12151. vaesenclast xmm15, xmm15, xmm7
  12152. vmovdqu xmm0, OWORD PTR [rdi+64]
  12153. vmovdqu xmm1, OWORD PTR [rdi+80]
  12154. vmovdqu xmm2, OWORD PTR [rdi+96]
  12155. vmovdqu xmm3, OWORD PTR [rdi+112]
  12156. vpxor xmm12, xmm12, xmm0
  12157. vpxor xmm13, xmm13, xmm1
  12158. vpxor xmm14, xmm14, xmm2
  12159. vpxor xmm15, xmm15, xmm3
  12160. vmovdqu OWORD PTR [r8+64], xmm12
  12161. vmovdqu OWORD PTR [r8+80], xmm13
  12162. vmovdqu OWORD PTR [r8+96], xmm14
  12163. vmovdqu OWORD PTR [r8+112], xmm15
  12164. cmp r13d, 128
  12165. mov ebx, 128
  12166. jle L_AES_GCM_encrypt_avx2_end_128
  12167. ; More 128 bytes of input
  12168. L_AES_GCM_encrypt_avx2_ghash_128:
  12169. ; aesenc_128_ghash
  12170. lea rcx, QWORD PTR [rdi+rbx]
  12171. lea rdx, QWORD PTR [r8+rbx]
  12172. ; aesenc_ctr
  12173. vmovdqu xmm0, OWORD PTR [rsp+128]
  12174. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  12175. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  12176. vpshufb xmm8, xmm0, xmm1
  12177. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  12178. vpshufb xmm9, xmm9, xmm1
  12179. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  12180. vpshufb xmm10, xmm10, xmm1
  12181. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  12182. vpshufb xmm11, xmm11, xmm1
  12183. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  12184. vpshufb xmm12, xmm12, xmm1
  12185. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  12186. vpshufb xmm13, xmm13, xmm1
  12187. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  12188. vpshufb xmm14, xmm14, xmm1
  12189. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  12190. vpshufb xmm15, xmm15, xmm1
  12191. ; aesenc_xor
  12192. vmovdqu xmm7, OWORD PTR [rsi]
  12193. vmovdqu OWORD PTR [rsp+128], xmm0
  12194. vpxor xmm8, xmm8, xmm7
  12195. vpxor xmm9, xmm9, xmm7
  12196. vpxor xmm10, xmm10, xmm7
  12197. vpxor xmm11, xmm11, xmm7
  12198. vpxor xmm12, xmm12, xmm7
  12199. vpxor xmm13, xmm13, xmm7
  12200. vpxor xmm14, xmm14, xmm7
  12201. vpxor xmm15, xmm15, xmm7
  12202. ; aesenc_pclmul_1
  12203. vmovdqu xmm1, OWORD PTR [rdx+-128]
  12204. vmovdqu xmm0, OWORD PTR [rsi+16]
  12205. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12206. vmovdqu xmm2, OWORD PTR [rsp+112]
  12207. vpxor xmm1, xmm1, xmm6
  12208. vpclmulqdq xmm5, xmm1, xmm2, 16
  12209. vpclmulqdq xmm3, xmm1, xmm2, 1
  12210. vpclmulqdq xmm6, xmm1, xmm2, 0
  12211. vpclmulqdq xmm7, xmm1, xmm2, 17
  12212. vaesenc xmm8, xmm8, xmm0
  12213. vaesenc xmm9, xmm9, xmm0
  12214. vaesenc xmm10, xmm10, xmm0
  12215. vaesenc xmm11, xmm11, xmm0
  12216. vaesenc xmm12, xmm12, xmm0
  12217. vaesenc xmm13, xmm13, xmm0
  12218. vaesenc xmm14, xmm14, xmm0
  12219. vaesenc xmm15, xmm15, xmm0
  12220. ; aesenc_pclmul_2
  12221. vmovdqu xmm1, OWORD PTR [rdx+-112]
  12222. vmovdqu xmm0, OWORD PTR [rsp+96]
  12223. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12224. vpxor xmm5, xmm5, xmm3
  12225. vpclmulqdq xmm2, xmm1, xmm0, 16
  12226. vpclmulqdq xmm3, xmm1, xmm0, 1
  12227. vpclmulqdq xmm4, xmm1, xmm0, 0
  12228. vpclmulqdq xmm1, xmm1, xmm0, 17
  12229. vmovdqu xmm0, OWORD PTR [rsi+32]
  12230. vpxor xmm7, xmm7, xmm1
  12231. vaesenc xmm8, xmm8, xmm0
  12232. vaesenc xmm9, xmm9, xmm0
  12233. vaesenc xmm10, xmm10, xmm0
  12234. vaesenc xmm11, xmm11, xmm0
  12235. vaesenc xmm12, xmm12, xmm0
  12236. vaesenc xmm13, xmm13, xmm0
  12237. vaesenc xmm14, xmm14, xmm0
  12238. vaesenc xmm15, xmm15, xmm0
  12239. ; aesenc_pclmul_n
  12240. vmovdqu xmm1, OWORD PTR [rdx+-96]
  12241. vmovdqu xmm0, OWORD PTR [rsp+80]
  12242. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12243. vpxor xmm5, xmm5, xmm2
  12244. vpclmulqdq xmm2, xmm1, xmm0, 16
  12245. vpxor xmm5, xmm5, xmm3
  12246. vpclmulqdq xmm3, xmm1, xmm0, 1
  12247. vpxor xmm6, xmm6, xmm4
  12248. vpclmulqdq xmm4, xmm1, xmm0, 0
  12249. vpclmulqdq xmm1, xmm1, xmm0, 17
  12250. vmovdqu xmm0, OWORD PTR [rsi+48]
  12251. vpxor xmm7, xmm7, xmm1
  12252. vaesenc xmm8, xmm8, xmm0
  12253. vaesenc xmm9, xmm9, xmm0
  12254. vaesenc xmm10, xmm10, xmm0
  12255. vaesenc xmm11, xmm11, xmm0
  12256. vaesenc xmm12, xmm12, xmm0
  12257. vaesenc xmm13, xmm13, xmm0
  12258. vaesenc xmm14, xmm14, xmm0
  12259. vaesenc xmm15, xmm15, xmm0
  12260. ; aesenc_pclmul_n
  12261. vmovdqu xmm1, OWORD PTR [rdx+-80]
  12262. vmovdqu xmm0, OWORD PTR [rsp+64]
  12263. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12264. vpxor xmm5, xmm5, xmm2
  12265. vpclmulqdq xmm2, xmm1, xmm0, 16
  12266. vpxor xmm5, xmm5, xmm3
  12267. vpclmulqdq xmm3, xmm1, xmm0, 1
  12268. vpxor xmm6, xmm6, xmm4
  12269. vpclmulqdq xmm4, xmm1, xmm0, 0
  12270. vpclmulqdq xmm1, xmm1, xmm0, 17
  12271. vmovdqu xmm0, OWORD PTR [rsi+64]
  12272. vpxor xmm7, xmm7, xmm1
  12273. vaesenc xmm8, xmm8, xmm0
  12274. vaesenc xmm9, xmm9, xmm0
  12275. vaesenc xmm10, xmm10, xmm0
  12276. vaesenc xmm11, xmm11, xmm0
  12277. vaesenc xmm12, xmm12, xmm0
  12278. vaesenc xmm13, xmm13, xmm0
  12279. vaesenc xmm14, xmm14, xmm0
  12280. vaesenc xmm15, xmm15, xmm0
  12281. ; aesenc_pclmul_n
  12282. vmovdqu xmm1, OWORD PTR [rdx+-64]
  12283. vmovdqu xmm0, OWORD PTR [rsp+48]
  12284. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12285. vpxor xmm5, xmm5, xmm2
  12286. vpclmulqdq xmm2, xmm1, xmm0, 16
  12287. vpxor xmm5, xmm5, xmm3
  12288. vpclmulqdq xmm3, xmm1, xmm0, 1
  12289. vpxor xmm6, xmm6, xmm4
  12290. vpclmulqdq xmm4, xmm1, xmm0, 0
  12291. vpclmulqdq xmm1, xmm1, xmm0, 17
  12292. vmovdqu xmm0, OWORD PTR [rsi+80]
  12293. vpxor xmm7, xmm7, xmm1
  12294. vaesenc xmm8, xmm8, xmm0
  12295. vaesenc xmm9, xmm9, xmm0
  12296. vaesenc xmm10, xmm10, xmm0
  12297. vaesenc xmm11, xmm11, xmm0
  12298. vaesenc xmm12, xmm12, xmm0
  12299. vaesenc xmm13, xmm13, xmm0
  12300. vaesenc xmm14, xmm14, xmm0
  12301. vaesenc xmm15, xmm15, xmm0
  12302. ; aesenc_pclmul_n
  12303. vmovdqu xmm1, OWORD PTR [rdx+-48]
  12304. vmovdqu xmm0, OWORD PTR [rsp+32]
  12305. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12306. vpxor xmm5, xmm5, xmm2
  12307. vpclmulqdq xmm2, xmm1, xmm0, 16
  12308. vpxor xmm5, xmm5, xmm3
  12309. vpclmulqdq xmm3, xmm1, xmm0, 1
  12310. vpxor xmm6, xmm6, xmm4
  12311. vpclmulqdq xmm4, xmm1, xmm0, 0
  12312. vpclmulqdq xmm1, xmm1, xmm0, 17
  12313. vmovdqu xmm0, OWORD PTR [rsi+96]
  12314. vpxor xmm7, xmm7, xmm1
  12315. vaesenc xmm8, xmm8, xmm0
  12316. vaesenc xmm9, xmm9, xmm0
  12317. vaesenc xmm10, xmm10, xmm0
  12318. vaesenc xmm11, xmm11, xmm0
  12319. vaesenc xmm12, xmm12, xmm0
  12320. vaesenc xmm13, xmm13, xmm0
  12321. vaesenc xmm14, xmm14, xmm0
  12322. vaesenc xmm15, xmm15, xmm0
  12323. ; aesenc_pclmul_n
  12324. vmovdqu xmm1, OWORD PTR [rdx+-32]
  12325. vmovdqu xmm0, OWORD PTR [rsp+16]
  12326. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12327. vpxor xmm5, xmm5, xmm2
  12328. vpclmulqdq xmm2, xmm1, xmm0, 16
  12329. vpxor xmm5, xmm5, xmm3
  12330. vpclmulqdq xmm3, xmm1, xmm0, 1
  12331. vpxor xmm6, xmm6, xmm4
  12332. vpclmulqdq xmm4, xmm1, xmm0, 0
  12333. vpclmulqdq xmm1, xmm1, xmm0, 17
  12334. vmovdqu xmm0, OWORD PTR [rsi+112]
  12335. vpxor xmm7, xmm7, xmm1
  12336. vaesenc xmm8, xmm8, xmm0
  12337. vaesenc xmm9, xmm9, xmm0
  12338. vaesenc xmm10, xmm10, xmm0
  12339. vaesenc xmm11, xmm11, xmm0
  12340. vaesenc xmm12, xmm12, xmm0
  12341. vaesenc xmm13, xmm13, xmm0
  12342. vaesenc xmm14, xmm14, xmm0
  12343. vaesenc xmm15, xmm15, xmm0
  12344. ; aesenc_pclmul_n
  12345. vmovdqu xmm1, OWORD PTR [rdx+-16]
  12346. vmovdqu xmm0, OWORD PTR [rsp]
  12347. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12348. vpxor xmm5, xmm5, xmm2
  12349. vpclmulqdq xmm2, xmm1, xmm0, 16
  12350. vpxor xmm5, xmm5, xmm3
  12351. vpclmulqdq xmm3, xmm1, xmm0, 1
  12352. vpxor xmm6, xmm6, xmm4
  12353. vpclmulqdq xmm4, xmm1, xmm0, 0
  12354. vpclmulqdq xmm1, xmm1, xmm0, 17
  12355. vmovdqu xmm0, OWORD PTR [rsi+128]
  12356. vpxor xmm7, xmm7, xmm1
  12357. vaesenc xmm8, xmm8, xmm0
  12358. vaesenc xmm9, xmm9, xmm0
  12359. vaesenc xmm10, xmm10, xmm0
  12360. vaesenc xmm11, xmm11, xmm0
  12361. vaesenc xmm12, xmm12, xmm0
  12362. vaesenc xmm13, xmm13, xmm0
  12363. vaesenc xmm14, xmm14, xmm0
  12364. vaesenc xmm15, xmm15, xmm0
  12365. ; aesenc_pclmul_l
  12366. vpxor xmm5, xmm5, xmm2
  12367. vpxor xmm6, xmm6, xmm4
  12368. vpxor xmm5, xmm5, xmm3
  12369. vpslldq xmm1, xmm5, 8
  12370. vpsrldq xmm5, xmm5, 8
  12371. vmovdqu xmm4, OWORD PTR [rsi+144]
  12372. vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
  12373. vaesenc xmm8, xmm8, xmm4
  12374. vpxor xmm6, xmm6, xmm1
  12375. vpxor xmm7, xmm7, xmm5
  12376. vpclmulqdq xmm3, xmm6, xmm0, 16
  12377. vaesenc xmm9, xmm9, xmm4
  12378. vaesenc xmm10, xmm10, xmm4
  12379. vaesenc xmm11, xmm11, xmm4
  12380. vpshufd xmm6, xmm6, 78
  12381. vpxor xmm6, xmm6, xmm3
  12382. vpclmulqdq xmm3, xmm6, xmm0, 16
  12383. vaesenc xmm12, xmm12, xmm4
  12384. vaesenc xmm13, xmm13, xmm4
  12385. vaesenc xmm14, xmm14, xmm4
  12386. vpshufd xmm6, xmm6, 78
  12387. vpxor xmm6, xmm6, xmm3
  12388. vpxor xmm6, xmm6, xmm7
  12389. vaesenc xmm15, xmm15, xmm4
  12390. cmp r9d, 11
  12391. vmovdqu xmm7, OWORD PTR [rsi+160]
  12392. jl L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done
  12393. vaesenc xmm8, xmm8, xmm7
  12394. vaesenc xmm9, xmm9, xmm7
  12395. vaesenc xmm10, xmm10, xmm7
  12396. vaesenc xmm11, xmm11, xmm7
  12397. vaesenc xmm12, xmm12, xmm7
  12398. vaesenc xmm13, xmm13, xmm7
  12399. vaesenc xmm14, xmm14, xmm7
  12400. vaesenc xmm15, xmm15, xmm7
  12401. vmovdqu xmm7, OWORD PTR [rsi+176]
  12402. vaesenc xmm8, xmm8, xmm7
  12403. vaesenc xmm9, xmm9, xmm7
  12404. vaesenc xmm10, xmm10, xmm7
  12405. vaesenc xmm11, xmm11, xmm7
  12406. vaesenc xmm12, xmm12, xmm7
  12407. vaesenc xmm13, xmm13, xmm7
  12408. vaesenc xmm14, xmm14, xmm7
  12409. vaesenc xmm15, xmm15, xmm7
  12410. cmp r9d, 13
  12411. vmovdqu xmm7, OWORD PTR [rsi+192]
  12412. jl L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done
  12413. vaesenc xmm8, xmm8, xmm7
  12414. vaesenc xmm9, xmm9, xmm7
  12415. vaesenc xmm10, xmm10, xmm7
  12416. vaesenc xmm11, xmm11, xmm7
  12417. vaesenc xmm12, xmm12, xmm7
  12418. vaesenc xmm13, xmm13, xmm7
  12419. vaesenc xmm14, xmm14, xmm7
  12420. vaesenc xmm15, xmm15, xmm7
  12421. vmovdqu xmm7, OWORD PTR [rsi+208]
  12422. vaesenc xmm8, xmm8, xmm7
  12423. vaesenc xmm9, xmm9, xmm7
  12424. vaesenc xmm10, xmm10, xmm7
  12425. vaesenc xmm11, xmm11, xmm7
  12426. vaesenc xmm12, xmm12, xmm7
  12427. vaesenc xmm13, xmm13, xmm7
  12428. vaesenc xmm14, xmm14, xmm7
  12429. vaesenc xmm15, xmm15, xmm7
  12430. vmovdqu xmm7, OWORD PTR [rsi+224]
  12431. L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done:
  12432. ; aesenc_last
  12433. vaesenclast xmm8, xmm8, xmm7
  12434. vaesenclast xmm9, xmm9, xmm7
  12435. vaesenclast xmm10, xmm10, xmm7
  12436. vaesenclast xmm11, xmm11, xmm7
  12437. vmovdqu xmm0, OWORD PTR [rcx]
  12438. vmovdqu xmm1, OWORD PTR [rcx+16]
  12439. vmovdqu xmm2, OWORD PTR [rcx+32]
  12440. vmovdqu xmm3, OWORD PTR [rcx+48]
  12441. vpxor xmm8, xmm8, xmm0
  12442. vpxor xmm9, xmm9, xmm1
  12443. vpxor xmm10, xmm10, xmm2
  12444. vpxor xmm11, xmm11, xmm3
  12445. vmovdqu OWORD PTR [rdx], xmm8
  12446. vmovdqu OWORD PTR [rdx+16], xmm9
  12447. vmovdqu OWORD PTR [rdx+32], xmm10
  12448. vmovdqu OWORD PTR [rdx+48], xmm11
  12449. vaesenclast xmm12, xmm12, xmm7
  12450. vaesenclast xmm13, xmm13, xmm7
  12451. vaesenclast xmm14, xmm14, xmm7
  12452. vaesenclast xmm15, xmm15, xmm7
  12453. vmovdqu xmm0, OWORD PTR [rcx+64]
  12454. vmovdqu xmm1, OWORD PTR [rcx+80]
  12455. vmovdqu xmm2, OWORD PTR [rcx+96]
  12456. vmovdqu xmm3, OWORD PTR [rcx+112]
  12457. vpxor xmm12, xmm12, xmm0
  12458. vpxor xmm13, xmm13, xmm1
  12459. vpxor xmm14, xmm14, xmm2
  12460. vpxor xmm15, xmm15, xmm3
  12461. vmovdqu OWORD PTR [rdx+64], xmm12
  12462. vmovdqu OWORD PTR [rdx+80], xmm13
  12463. vmovdqu OWORD PTR [rdx+96], xmm14
  12464. vmovdqu OWORD PTR [rdx+112], xmm15
  12465. ; aesenc_128_ghash - end
  12466. add ebx, 128
  12467. cmp ebx, r13d
  12468. jl L_AES_GCM_encrypt_avx2_ghash_128
  12469. L_AES_GCM_encrypt_avx2_end_128:
  12470. vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12471. vpshufb xmm8, xmm8, xmm4
  12472. vpshufb xmm9, xmm9, xmm4
  12473. vpshufb xmm10, xmm10, xmm4
  12474. vpshufb xmm11, xmm11, xmm4
  12475. vpshufb xmm12, xmm12, xmm4
  12476. vpshufb xmm13, xmm13, xmm4
  12477. vpshufb xmm14, xmm14, xmm4
  12478. vpshufb xmm15, xmm15, xmm4
  12479. vpxor xmm8, xmm8, xmm6
  12480. vmovdqu xmm7, OWORD PTR [rsp]
  12481. vpclmulqdq xmm5, xmm7, xmm15, 16
  12482. vpclmulqdq xmm1, xmm7, xmm15, 1
  12483. vpclmulqdq xmm4, xmm7, xmm15, 0
  12484. vpclmulqdq xmm6, xmm7, xmm15, 17
  12485. vpxor xmm5, xmm5, xmm1
  12486. vmovdqu xmm7, OWORD PTR [rsp+16]
  12487. vpclmulqdq xmm2, xmm7, xmm14, 16
  12488. vpclmulqdq xmm1, xmm7, xmm14, 1
  12489. vpclmulqdq xmm0, xmm7, xmm14, 0
  12490. vpclmulqdq xmm3, xmm7, xmm14, 17
  12491. vpxor xmm2, xmm2, xmm1
  12492. vpxor xmm6, xmm6, xmm3
  12493. vpxor xmm5, xmm5, xmm2
  12494. vpxor xmm4, xmm4, xmm0
  12495. vmovdqu xmm15, OWORD PTR [rsp+32]
  12496. vmovdqu xmm7, OWORD PTR [rsp+48]
  12497. vpclmulqdq xmm2, xmm15, xmm13, 16
  12498. vpclmulqdq xmm1, xmm15, xmm13, 1
  12499. vpclmulqdq xmm0, xmm15, xmm13, 0
  12500. vpclmulqdq xmm3, xmm15, xmm13, 17
  12501. vpxor xmm2, xmm2, xmm1
  12502. vpxor xmm6, xmm6, xmm3
  12503. vpxor xmm5, xmm5, xmm2
  12504. vpxor xmm4, xmm4, xmm0
  12505. vpclmulqdq xmm2, xmm7, xmm12, 16
  12506. vpclmulqdq xmm1, xmm7, xmm12, 1
  12507. vpclmulqdq xmm0, xmm7, xmm12, 0
  12508. vpclmulqdq xmm3, xmm7, xmm12, 17
  12509. vpxor xmm2, xmm2, xmm1
  12510. vpxor xmm6, xmm6, xmm3
  12511. vpxor xmm5, xmm5, xmm2
  12512. vpxor xmm4, xmm4, xmm0
  12513. vmovdqu xmm15, OWORD PTR [rsp+64]
  12514. vmovdqu xmm7, OWORD PTR [rsp+80]
  12515. vpclmulqdq xmm2, xmm15, xmm11, 16
  12516. vpclmulqdq xmm1, xmm15, xmm11, 1
  12517. vpclmulqdq xmm0, xmm15, xmm11, 0
  12518. vpclmulqdq xmm3, xmm15, xmm11, 17
  12519. vpxor xmm2, xmm2, xmm1
  12520. vpxor xmm6, xmm6, xmm3
  12521. vpxor xmm5, xmm5, xmm2
  12522. vpxor xmm4, xmm4, xmm0
  12523. vpclmulqdq xmm2, xmm7, xmm10, 16
  12524. vpclmulqdq xmm1, xmm7, xmm10, 1
  12525. vpclmulqdq xmm0, xmm7, xmm10, 0
  12526. vpclmulqdq xmm3, xmm7, xmm10, 17
  12527. vpxor xmm2, xmm2, xmm1
  12528. vpxor xmm6, xmm6, xmm3
  12529. vpxor xmm5, xmm5, xmm2
  12530. vpxor xmm4, xmm4, xmm0
  12531. vmovdqu xmm15, OWORD PTR [rsp+96]
  12532. vmovdqu xmm7, OWORD PTR [rsp+112]
  12533. vpclmulqdq xmm2, xmm15, xmm9, 16
  12534. vpclmulqdq xmm1, xmm15, xmm9, 1
  12535. vpclmulqdq xmm0, xmm15, xmm9, 0
  12536. vpclmulqdq xmm3, xmm15, xmm9, 17
  12537. vpxor xmm2, xmm2, xmm1
  12538. vpxor xmm6, xmm6, xmm3
  12539. vpxor xmm5, xmm5, xmm2
  12540. vpxor xmm4, xmm4, xmm0
  12541. vpclmulqdq xmm2, xmm7, xmm8, 16
  12542. vpclmulqdq xmm1, xmm7, xmm8, 1
  12543. vpclmulqdq xmm0, xmm7, xmm8, 0
  12544. vpclmulqdq xmm3, xmm7, xmm8, 17
  12545. vpxor xmm2, xmm2, xmm1
  12546. vpxor xmm6, xmm6, xmm3
  12547. vpxor xmm5, xmm5, xmm2
  12548. vpxor xmm4, xmm4, xmm0
  12549. vpslldq xmm7, xmm5, 8
  12550. vpsrldq xmm5, xmm5, 8
  12551. vpxor xmm4, xmm4, xmm7
  12552. vpxor xmm6, xmm6, xmm5
  12553. ; ghash_red
  12554. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  12555. vpclmulqdq xmm0, xmm4, xmm2, 16
  12556. vpshufd xmm1, xmm4, 78
  12557. vpxor xmm1, xmm1, xmm0
  12558. vpclmulqdq xmm0, xmm1, xmm2, 16
  12559. vpshufd xmm1, xmm1, 78
  12560. vpxor xmm1, xmm1, xmm0
  12561. vpxor xmm6, xmm6, xmm1
  12562. vmovdqu xmm5, OWORD PTR [rsp]
  12563. vmovdqu xmm4, OWORD PTR [rsp+128]
  12564. vmovdqu xmm15, OWORD PTR [rsp+144]
  12565. L_AES_GCM_encrypt_avx2_done_128:
  12566. cmp ebx, r10d
  12567. je L_AES_GCM_encrypt_avx2_done_enc
  12568. mov r13d, r10d
  12569. and r13d, 4294967280
  12570. cmp ebx, r13d
  12571. jge L_AES_GCM_encrypt_avx2_last_block_done
  12572. ; aesenc_block
  12573. vmovdqu xmm1, xmm4
  12574. vpshufb xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  12575. vpaddd xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_one
  12576. vpxor xmm0, xmm0, [rsi]
  12577. vmovdqu xmm2, OWORD PTR [rsi+16]
  12578. vaesenc xmm0, xmm0, xmm2
  12579. vmovdqu xmm2, OWORD PTR [rsi+32]
  12580. vaesenc xmm0, xmm0, xmm2
  12581. vmovdqu xmm2, OWORD PTR [rsi+48]
  12582. vaesenc xmm0, xmm0, xmm2
  12583. vmovdqu xmm2, OWORD PTR [rsi+64]
  12584. vaesenc xmm0, xmm0, xmm2
  12585. vmovdqu xmm2, OWORD PTR [rsi+80]
  12586. vaesenc xmm0, xmm0, xmm2
  12587. vmovdqu xmm2, OWORD PTR [rsi+96]
  12588. vaesenc xmm0, xmm0, xmm2
  12589. vmovdqu xmm2, OWORD PTR [rsi+112]
  12590. vaesenc xmm0, xmm0, xmm2
  12591. vmovdqu xmm2, OWORD PTR [rsi+128]
  12592. vaesenc xmm0, xmm0, xmm2
  12593. vmovdqu xmm2, OWORD PTR [rsi+144]
  12594. vaesenc xmm0, xmm0, xmm2
  12595. vmovdqu xmm4, xmm1
  12596. cmp r9d, 11
  12597. vmovdqu xmm1, OWORD PTR [rsi+160]
  12598. jl L_AES_GCM_encrypt_avx2_aesenc_block_last
  12599. vaesenc xmm0, xmm0, xmm1
  12600. vmovdqu xmm2, OWORD PTR [rsi+176]
  12601. vaesenc xmm0, xmm0, xmm2
  12602. cmp r9d, 13
  12603. vmovdqu xmm1, OWORD PTR [rsi+192]
  12604. jl L_AES_GCM_encrypt_avx2_aesenc_block_last
  12605. vaesenc xmm0, xmm0, xmm1
  12606. vmovdqu xmm2, OWORD PTR [rsi+208]
  12607. vaesenc xmm0, xmm0, xmm2
  12608. vmovdqu xmm1, OWORD PTR [rsi+224]
  12609. L_AES_GCM_encrypt_avx2_aesenc_block_last:
  12610. vaesenclast xmm0, xmm0, xmm1
  12611. vmovdqu xmm1, OWORD PTR [rdi+rbx]
  12612. vpxor xmm0, xmm0, xmm1
  12613. vmovdqu OWORD PTR [r8+rbx], xmm0
  12614. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12615. vpxor xmm6, xmm6, xmm0
  12616. add ebx, 16
  12617. cmp ebx, r13d
  12618. jge L_AES_GCM_encrypt_avx2_last_block_ghash
  12619. L_AES_GCM_encrypt_avx2_last_block_start:
  12620. vmovdqu xmm12, OWORD PTR [rdi+rbx]
  12621. vpshufb xmm11, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  12622. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  12623. ; aesenc_gfmul_sb
  12624. vpclmulqdq xmm2, xmm6, xmm5, 1
  12625. vpclmulqdq xmm3, xmm6, xmm5, 16
  12626. vpclmulqdq xmm1, xmm6, xmm5, 0
  12627. vpclmulqdq xmm8, xmm6, xmm5, 17
  12628. vpxor xmm11, xmm11, [rsi]
  12629. vaesenc xmm11, xmm11, [rsi+16]
  12630. vpxor xmm3, xmm3, xmm2
  12631. vpslldq xmm2, xmm3, 8
  12632. vpsrldq xmm3, xmm3, 8
  12633. vaesenc xmm11, xmm11, [rsi+32]
  12634. vpxor xmm2, xmm2, xmm1
  12635. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12636. vaesenc xmm11, xmm11, [rsi+48]
  12637. vaesenc xmm11, xmm11, [rsi+64]
  12638. vaesenc xmm11, xmm11, [rsi+80]
  12639. vpshufd xmm2, xmm2, 78
  12640. vpxor xmm2, xmm2, xmm1
  12641. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12642. vaesenc xmm11, xmm11, [rsi+96]
  12643. vaesenc xmm11, xmm11, [rsi+112]
  12644. vaesenc xmm11, xmm11, [rsi+128]
  12645. vpshufd xmm2, xmm2, 78
  12646. vaesenc xmm11, xmm11, [rsi+144]
  12647. vpxor xmm8, xmm8, xmm3
  12648. vpxor xmm2, xmm2, xmm8
  12649. vmovdqu xmm0, OWORD PTR [rsi+160]
  12650. cmp r9d, 11
  12651. jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last
  12652. vaesenc xmm11, xmm11, xmm0
  12653. vaesenc xmm11, xmm11, [rsi+176]
  12654. vmovdqu xmm0, OWORD PTR [rsi+192]
  12655. cmp r9d, 13
  12656. jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last
  12657. vaesenc xmm11, xmm11, xmm0
  12658. vaesenc xmm11, xmm11, [rsi+208]
  12659. vmovdqu xmm0, OWORD PTR [rsi+224]
  12660. L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last:
  12661. vaesenclast xmm11, xmm11, xmm0
  12662. vpxor xmm6, xmm2, xmm1
  12663. vpxor xmm11, xmm11, xmm12
  12664. vmovdqu OWORD PTR [r8+rbx], xmm11
  12665. vpshufb xmm11, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12666. vpxor xmm6, xmm6, xmm11
  12667. add ebx, 16
  12668. cmp ebx, r13d
  12669. jl L_AES_GCM_encrypt_avx2_last_block_start
  12670. L_AES_GCM_encrypt_avx2_last_block_ghash:
  12671. ; ghash_gfmul_red
  12672. vpclmulqdq xmm10, xmm6, xmm5, 16
  12673. vpclmulqdq xmm9, xmm6, xmm5, 1
  12674. vpclmulqdq xmm8, xmm6, xmm5, 0
  12675. vpxor xmm10, xmm10, xmm9
  12676. vpslldq xmm9, xmm10, 8
  12677. vpsrldq xmm10, xmm10, 8
  12678. vpxor xmm9, xmm9, xmm8
  12679. vpclmulqdq xmm6, xmm6, xmm5, 17
  12680. vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12681. vpshufd xmm9, xmm9, 78
  12682. vpxor xmm9, xmm9, xmm8
  12683. vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12684. vpshufd xmm9, xmm9, 78
  12685. vpxor xmm6, xmm6, xmm10
  12686. vpxor xmm6, xmm6, xmm9
  12687. vpxor xmm6, xmm6, xmm8
  12688. L_AES_GCM_encrypt_avx2_last_block_done:
  12689. mov ecx, r10d
  12690. mov edx, r10d
  12691. and ecx, 15
  12692. jz L_AES_GCM_encrypt_avx2_done_enc
  12693. ; aesenc_last15_enc
  12694. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  12695. vpxor xmm4, xmm4, [rsi]
  12696. vaesenc xmm4, xmm4, [rsi+16]
  12697. vaesenc xmm4, xmm4, [rsi+32]
  12698. vaesenc xmm4, xmm4, [rsi+48]
  12699. vaesenc xmm4, xmm4, [rsi+64]
  12700. vaesenc xmm4, xmm4, [rsi+80]
  12701. vaesenc xmm4, xmm4, [rsi+96]
  12702. vaesenc xmm4, xmm4, [rsi+112]
  12703. vaesenc xmm4, xmm4, [rsi+128]
  12704. vaesenc xmm4, xmm4, [rsi+144]
  12705. cmp r9d, 11
  12706. vmovdqu xmm0, OWORD PTR [rsi+160]
  12707. jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last
  12708. vaesenc xmm4, xmm4, xmm0
  12709. vaesenc xmm4, xmm4, [rsi+176]
  12710. cmp r9d, 13
  12711. vmovdqu xmm0, OWORD PTR [rsi+192]
  12712. jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last
  12713. vaesenc xmm4, xmm4, xmm0
  12714. vaesenc xmm4, xmm4, [rsi+208]
  12715. vmovdqu xmm0, OWORD PTR [rsi+224]
  12716. L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last:
  12717. vaesenclast xmm4, xmm4, xmm0
  12718. xor ecx, ecx
  12719. vpxor xmm0, xmm0, xmm0
  12720. vmovdqu OWORD PTR [rsp], xmm4
  12721. vmovdqu OWORD PTR [rsp+16], xmm0
  12722. L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop:
  12723. movzx r13d, BYTE PTR [rdi+rbx]
  12724. xor r13b, BYTE PTR [rsp+rcx]
  12725. mov BYTE PTR [rsp+rcx+16], r13b
  12726. mov BYTE PTR [r8+rbx], r13b
  12727. inc ebx
  12728. inc ecx
  12729. cmp ebx, edx
  12730. jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop
  12731. L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_finish_enc:
  12732. vmovdqu xmm4, OWORD PTR [rsp+16]
  12733. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12734. vpxor xmm6, xmm6, xmm4
  12735. ; ghash_gfmul_red
  12736. vpclmulqdq xmm2, xmm6, xmm5, 16
  12737. vpclmulqdq xmm1, xmm6, xmm5, 1
  12738. vpclmulqdq xmm0, xmm6, xmm5, 0
  12739. vpxor xmm2, xmm2, xmm1
  12740. vpslldq xmm1, xmm2, 8
  12741. vpsrldq xmm2, xmm2, 8
  12742. vpxor xmm1, xmm1, xmm0
  12743. vpclmulqdq xmm6, xmm6, xmm5, 17
  12744. vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12745. vpshufd xmm1, xmm1, 78
  12746. vpxor xmm1, xmm1, xmm0
  12747. vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12748. vpshufd xmm1, xmm1, 78
  12749. vpxor xmm6, xmm6, xmm2
  12750. vpxor xmm6, xmm6, xmm1
  12751. vpxor xmm6, xmm6, xmm0
  12752. L_AES_GCM_encrypt_avx2_done_enc:
  12753. ; calc_tag
  12754. shl r10, 3
  12755. shl r11, 3
  12756. vmovq xmm0, r10
  12757. vmovq xmm1, r11
  12758. vpunpcklqdq xmm0, xmm0, xmm1
  12759. vpxor xmm0, xmm0, xmm6
  12760. ; ghash_gfmul_red
  12761. vpclmulqdq xmm4, xmm0, xmm5, 16
  12762. vpclmulqdq xmm3, xmm0, xmm5, 1
  12763. vpclmulqdq xmm2, xmm0, xmm5, 0
  12764. vpxor xmm4, xmm4, xmm3
  12765. vpslldq xmm3, xmm4, 8
  12766. vpsrldq xmm4, xmm4, 8
  12767. vpxor xmm3, xmm3, xmm2
  12768. vpclmulqdq xmm0, xmm0, xmm5, 17
  12769. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12770. vpshufd xmm3, xmm3, 78
  12771. vpxor xmm3, xmm3, xmm2
  12772. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12773. vpshufd xmm3, xmm3, 78
  12774. vpxor xmm0, xmm0, xmm4
  12775. vpxor xmm0, xmm0, xmm3
  12776. vpxor xmm0, xmm0, xmm2
  12777. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12778. vpxor xmm0, xmm0, xmm15
  12779. ; store_tag
  12780. cmp r14d, 16
  12781. je L_AES_GCM_encrypt_avx2_store_tag_16
  12782. xor rcx, rcx
  12783. vmovdqu OWORD PTR [rsp], xmm0
  12784. L_AES_GCM_encrypt_avx2_store_tag_loop:
  12785. movzx r13d, BYTE PTR [rsp+rcx]
  12786. mov BYTE PTR [r15+rcx], r13b
  12787. inc ecx
  12788. cmp ecx, r14d
  12789. jne L_AES_GCM_encrypt_avx2_store_tag_loop
  12790. jmp L_AES_GCM_encrypt_avx2_store_tag_done
  12791. L_AES_GCM_encrypt_avx2_store_tag_16:
  12792. vmovdqu OWORD PTR [r15], xmm0
  12793. L_AES_GCM_encrypt_avx2_store_tag_done:
  12794. vzeroupper
  12795. vmovdqu xmm6, OWORD PTR [rsp+160]
  12796. vmovdqu xmm7, OWORD PTR [rsp+176]
  12797. vmovdqu xmm8, OWORD PTR [rsp+192]
  12798. vmovdqu xmm9, OWORD PTR [rsp+208]
  12799. vmovdqu xmm10, OWORD PTR [rsp+224]
  12800. vmovdqu xmm11, OWORD PTR [rsp+240]
  12801. vmovdqu xmm12, OWORD PTR [rsp+256]
  12802. vmovdqu xmm13, OWORD PTR [rsp+272]
  12803. vmovdqu xmm14, OWORD PTR [rsp+288]
  12804. vmovdqu xmm15, OWORD PTR [rsp+304]
  12805. add rsp, 320
  12806. pop rsi
  12807. pop r14
  12808. pop rbx
  12809. pop r15
  12810. pop r12
  12811. pop rdi
  12812. pop r13
  12813. ret
  12814. AES_GCM_encrypt_avx2 ENDP
  12815. _text ENDS
  12816. _text SEGMENT READONLY PARA
  12817. AES_GCM_decrypt_avx2 PROC
  12818. push r13
  12819. push rdi
  12820. push r12
  12821. push r14
  12822. push rbx
  12823. push r15
  12824. push rsi
  12825. push rbp
  12826. mov rdi, rcx
  12827. mov r12, r8
  12828. mov rax, r9
  12829. mov r14, QWORD PTR [rsp+104]
  12830. mov r8, rdx
  12831. mov r10d, DWORD PTR [rsp+112]
  12832. mov r11d, DWORD PTR [rsp+120]
  12833. mov ebx, DWORD PTR [rsp+128]
  12834. mov r15d, DWORD PTR [rsp+136]
  12835. mov rsi, QWORD PTR [rsp+144]
  12836. mov r9d, DWORD PTR [rsp+152]
  12837. mov rbp, QWORD PTR [rsp+160]
  12838. sub rsp, 328
  12839. vmovdqu OWORD PTR [rsp+168], xmm6
  12840. vmovdqu OWORD PTR [rsp+184], xmm7
  12841. vmovdqu OWORD PTR [rsp+200], xmm8
  12842. vmovdqu OWORD PTR [rsp+216], xmm9
  12843. vmovdqu OWORD PTR [rsp+232], xmm10
  12844. vmovdqu OWORD PTR [rsp+248], xmm11
  12845. vmovdqu OWORD PTR [rsp+264], xmm12
  12846. vmovdqu OWORD PTR [rsp+280], xmm13
  12847. vmovdqu OWORD PTR [rsp+296], xmm14
  12848. vmovdqu OWORD PTR [rsp+312], xmm15
  12849. vpxor xmm4, xmm4, xmm4
  12850. vpxor xmm6, xmm6, xmm6
  12851. mov edx, ebx
  12852. cmp edx, 12
  12853. je L_AES_GCM_decrypt_avx2_iv_12
  12854. ; Calculate values when IV is not 12 bytes
  12855. ; H = Encrypt X(=0)
  12856. vmovdqu xmm5, OWORD PTR [rsi]
  12857. vaesenc xmm5, xmm5, [rsi+16]
  12858. vaesenc xmm5, xmm5, [rsi+32]
  12859. vaesenc xmm5, xmm5, [rsi+48]
  12860. vaesenc xmm5, xmm5, [rsi+64]
  12861. vaesenc xmm5, xmm5, [rsi+80]
  12862. vaesenc xmm5, xmm5, [rsi+96]
  12863. vaesenc xmm5, xmm5, [rsi+112]
  12864. vaesenc xmm5, xmm5, [rsi+128]
  12865. vaesenc xmm5, xmm5, [rsi+144]
  12866. cmp r9d, 11
  12867. vmovdqu xmm0, OWORD PTR [rsi+160]
  12868. jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last
  12869. vaesenc xmm5, xmm5, xmm0
  12870. vaesenc xmm5, xmm5, [rsi+176]
  12871. cmp r9d, 13
  12872. vmovdqu xmm0, OWORD PTR [rsi+192]
  12873. jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last
  12874. vaesenc xmm5, xmm5, xmm0
  12875. vaesenc xmm5, xmm5, [rsi+208]
  12876. vmovdqu xmm0, OWORD PTR [rsi+224]
  12877. L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last:
  12878. vaesenclast xmm5, xmm5, xmm0
  12879. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12880. ; Calc counter
  12881. ; Initialization vector
  12882. cmp edx, 0
  12883. mov rcx, 0
  12884. je L_AES_GCM_decrypt_avx2_calc_iv_done
  12885. cmp edx, 16
  12886. jl L_AES_GCM_decrypt_avx2_calc_iv_lt16
  12887. and edx, 4294967280
  12888. L_AES_GCM_decrypt_avx2_calc_iv_16_loop:
  12889. vmovdqu xmm0, OWORD PTR [rax+rcx]
  12890. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12891. vpxor xmm4, xmm4, xmm0
  12892. ; ghash_gfmul_avx
  12893. vpclmulqdq xmm2, xmm5, xmm4, 16
  12894. vpclmulqdq xmm1, xmm5, xmm4, 1
  12895. vpclmulqdq xmm0, xmm5, xmm4, 0
  12896. vpclmulqdq xmm3, xmm5, xmm4, 17
  12897. vpxor xmm2, xmm2, xmm1
  12898. vpslldq xmm1, xmm2, 8
  12899. vpsrldq xmm2, xmm2, 8
  12900. vpxor xmm7, xmm0, xmm1
  12901. vpxor xmm4, xmm3, xmm2
  12902. ; ghash_mid
  12903. vpsrld xmm0, xmm7, 31
  12904. vpsrld xmm1, xmm4, 31
  12905. vpslld xmm7, xmm7, 1
  12906. vpslld xmm4, xmm4, 1
  12907. vpsrldq xmm2, xmm0, 12
  12908. vpslldq xmm0, xmm0, 4
  12909. vpslldq xmm1, xmm1, 4
  12910. vpor xmm4, xmm4, xmm2
  12911. vpor xmm7, xmm7, xmm0
  12912. vpor xmm4, xmm4, xmm1
  12913. ; ghash_red
  12914. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  12915. vpclmulqdq xmm0, xmm7, xmm2, 16
  12916. vpshufd xmm1, xmm7, 78
  12917. vpxor xmm1, xmm1, xmm0
  12918. vpclmulqdq xmm0, xmm1, xmm2, 16
  12919. vpshufd xmm1, xmm1, 78
  12920. vpxor xmm1, xmm1, xmm0
  12921. vpxor xmm4, xmm4, xmm1
  12922. add ecx, 16
  12923. cmp ecx, edx
  12924. jl L_AES_GCM_decrypt_avx2_calc_iv_16_loop
  12925. mov edx, ebx
  12926. cmp ecx, edx
  12927. je L_AES_GCM_decrypt_avx2_calc_iv_done
  12928. L_AES_GCM_decrypt_avx2_calc_iv_lt16:
  12929. vpxor xmm0, xmm0, xmm0
  12930. xor ebx, ebx
  12931. vmovdqu OWORD PTR [rsp], xmm0
  12932. L_AES_GCM_decrypt_avx2_calc_iv_loop:
  12933. movzx r13d, BYTE PTR [rax+rcx]
  12934. mov BYTE PTR [rsp+rbx], r13b
  12935. inc ecx
  12936. inc ebx
  12937. cmp ecx, edx
  12938. jl L_AES_GCM_decrypt_avx2_calc_iv_loop
  12939. vmovdqu xmm0, OWORD PTR [rsp]
  12940. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12941. vpxor xmm4, xmm4, xmm0
  12942. ; ghash_gfmul_avx
  12943. vpclmulqdq xmm2, xmm5, xmm4, 16
  12944. vpclmulqdq xmm1, xmm5, xmm4, 1
  12945. vpclmulqdq xmm0, xmm5, xmm4, 0
  12946. vpclmulqdq xmm3, xmm5, xmm4, 17
  12947. vpxor xmm2, xmm2, xmm1
  12948. vpslldq xmm1, xmm2, 8
  12949. vpsrldq xmm2, xmm2, 8
  12950. vpxor xmm7, xmm0, xmm1
  12951. vpxor xmm4, xmm3, xmm2
  12952. ; ghash_mid
  12953. vpsrld xmm0, xmm7, 31
  12954. vpsrld xmm1, xmm4, 31
  12955. vpslld xmm7, xmm7, 1
  12956. vpslld xmm4, xmm4, 1
  12957. vpsrldq xmm2, xmm0, 12
  12958. vpslldq xmm0, xmm0, 4
  12959. vpslldq xmm1, xmm1, 4
  12960. vpor xmm4, xmm4, xmm2
  12961. vpor xmm7, xmm7, xmm0
  12962. vpor xmm4, xmm4, xmm1
  12963. ; ghash_red
  12964. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  12965. vpclmulqdq xmm0, xmm7, xmm2, 16
  12966. vpshufd xmm1, xmm7, 78
  12967. vpxor xmm1, xmm1, xmm0
  12968. vpclmulqdq xmm0, xmm1, xmm2, 16
  12969. vpshufd xmm1, xmm1, 78
  12970. vpxor xmm1, xmm1, xmm0
  12971. vpxor xmm4, xmm4, xmm1
  12972. L_AES_GCM_decrypt_avx2_calc_iv_done:
  12973. ; T = Encrypt counter
  12974. vpxor xmm0, xmm0, xmm0
  12975. shl edx, 3
  12976. vmovq xmm0, rdx
  12977. vpxor xmm4, xmm4, xmm0
  12978. ; ghash_gfmul_avx
  12979. vpclmulqdq xmm2, xmm5, xmm4, 16
  12980. vpclmulqdq xmm1, xmm5, xmm4, 1
  12981. vpclmulqdq xmm0, xmm5, xmm4, 0
  12982. vpclmulqdq xmm3, xmm5, xmm4, 17
  12983. vpxor xmm2, xmm2, xmm1
  12984. vpslldq xmm1, xmm2, 8
  12985. vpsrldq xmm2, xmm2, 8
  12986. vpxor xmm7, xmm0, xmm1
  12987. vpxor xmm4, xmm3, xmm2
  12988. ; ghash_mid
  12989. vpsrld xmm0, xmm7, 31
  12990. vpsrld xmm1, xmm4, 31
  12991. vpslld xmm7, xmm7, 1
  12992. vpslld xmm4, xmm4, 1
  12993. vpsrldq xmm2, xmm0, 12
  12994. vpslldq xmm0, xmm0, 4
  12995. vpslldq xmm1, xmm1, 4
  12996. vpor xmm4, xmm4, xmm2
  12997. vpor xmm7, xmm7, xmm0
  12998. vpor xmm4, xmm4, xmm1
  12999. ; ghash_red
  13000. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  13001. vpclmulqdq xmm0, xmm7, xmm2, 16
  13002. vpshufd xmm1, xmm7, 78
  13003. vpxor xmm1, xmm1, xmm0
  13004. vpclmulqdq xmm0, xmm1, xmm2, 16
  13005. vpshufd xmm1, xmm1, 78
  13006. vpxor xmm1, xmm1, xmm0
  13007. vpxor xmm4, xmm4, xmm1
  13008. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13009. ; Encrypt counter
  13010. vmovdqu xmm15, OWORD PTR [rsi]
  13011. vpxor xmm15, xmm15, xmm4
  13012. vaesenc xmm15, xmm15, [rsi+16]
  13013. vaesenc xmm15, xmm15, [rsi+32]
  13014. vaesenc xmm15, xmm15, [rsi+48]
  13015. vaesenc xmm15, xmm15, [rsi+64]
  13016. vaesenc xmm15, xmm15, [rsi+80]
  13017. vaesenc xmm15, xmm15, [rsi+96]
  13018. vaesenc xmm15, xmm15, [rsi+112]
  13019. vaesenc xmm15, xmm15, [rsi+128]
  13020. vaesenc xmm15, xmm15, [rsi+144]
  13021. cmp r9d, 11
  13022. vmovdqu xmm0, OWORD PTR [rsi+160]
  13023. jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last
  13024. vaesenc xmm15, xmm15, xmm0
  13025. vaesenc xmm15, xmm15, [rsi+176]
  13026. cmp r9d, 13
  13027. vmovdqu xmm0, OWORD PTR [rsi+192]
  13028. jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last
  13029. vaesenc xmm15, xmm15, xmm0
  13030. vaesenc xmm15, xmm15, [rsi+208]
  13031. vmovdqu xmm0, OWORD PTR [rsi+224]
  13032. L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last:
  13033. vaesenclast xmm15, xmm15, xmm0
  13034. jmp L_AES_GCM_decrypt_avx2_iv_done
  13035. L_AES_GCM_decrypt_avx2_iv_12:
  13036. ; # Calculate values when IV is 12 bytes
  13037. ; Set counter based on IV
  13038. vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_one
  13039. vmovdqu xmm5, OWORD PTR [rsi]
  13040. vpblendd xmm4, xmm4, [rax], 7
  13041. ; H = Encrypt X(=0) and T = Encrypt counter
  13042. vmovdqu xmm7, OWORD PTR [rsi+16]
  13043. vpxor xmm15, xmm4, xmm5
  13044. vaesenc xmm5, xmm5, xmm7
  13045. vaesenc xmm15, xmm15, xmm7
  13046. vmovdqu xmm0, OWORD PTR [rsi+32]
  13047. vaesenc xmm5, xmm5, xmm0
  13048. vaesenc xmm15, xmm15, xmm0
  13049. vmovdqu xmm0, OWORD PTR [rsi+48]
  13050. vaesenc xmm5, xmm5, xmm0
  13051. vaesenc xmm15, xmm15, xmm0
  13052. vmovdqu xmm0, OWORD PTR [rsi+64]
  13053. vaesenc xmm5, xmm5, xmm0
  13054. vaesenc xmm15, xmm15, xmm0
  13055. vmovdqu xmm0, OWORD PTR [rsi+80]
  13056. vaesenc xmm5, xmm5, xmm0
  13057. vaesenc xmm15, xmm15, xmm0
  13058. vmovdqu xmm0, OWORD PTR [rsi+96]
  13059. vaesenc xmm5, xmm5, xmm0
  13060. vaesenc xmm15, xmm15, xmm0
  13061. vmovdqu xmm0, OWORD PTR [rsi+112]
  13062. vaesenc xmm5, xmm5, xmm0
  13063. vaesenc xmm15, xmm15, xmm0
  13064. vmovdqu xmm0, OWORD PTR [rsi+128]
  13065. vaesenc xmm5, xmm5, xmm0
  13066. vaesenc xmm15, xmm15, xmm0
  13067. vmovdqu xmm0, OWORD PTR [rsi+144]
  13068. vaesenc xmm5, xmm5, xmm0
  13069. vaesenc xmm15, xmm15, xmm0
  13070. cmp r9d, 11
  13071. vmovdqu xmm0, OWORD PTR [rsi+160]
  13072. jl L_AES_GCM_decrypt_avx2_calc_iv_12_last
  13073. vaesenc xmm5, xmm5, xmm0
  13074. vaesenc xmm15, xmm15, xmm0
  13075. vmovdqu xmm0, OWORD PTR [rsi+176]
  13076. vaesenc xmm5, xmm5, xmm0
  13077. vaesenc xmm15, xmm15, xmm0
  13078. cmp r9d, 13
  13079. vmovdqu xmm0, OWORD PTR [rsi+192]
  13080. jl L_AES_GCM_decrypt_avx2_calc_iv_12_last
  13081. vaesenc xmm5, xmm5, xmm0
  13082. vaesenc xmm15, xmm15, xmm0
  13083. vmovdqu xmm0, OWORD PTR [rsi+208]
  13084. vaesenc xmm5, xmm5, xmm0
  13085. vaesenc xmm15, xmm15, xmm0
  13086. vmovdqu xmm0, OWORD PTR [rsi+224]
  13087. L_AES_GCM_decrypt_avx2_calc_iv_12_last:
  13088. vaesenclast xmm5, xmm5, xmm0
  13089. vaesenclast xmm15, xmm15, xmm0
  13090. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13091. L_AES_GCM_decrypt_avx2_iv_done:
  13092. ; Additional authentication data
  13093. mov edx, r11d
  13094. cmp edx, 0
  13095. je L_AES_GCM_decrypt_avx2_calc_aad_done
  13096. xor ecx, ecx
  13097. cmp edx, 16
  13098. jl L_AES_GCM_decrypt_avx2_calc_aad_lt16
  13099. and edx, 4294967280
  13100. L_AES_GCM_decrypt_avx2_calc_aad_16_loop:
  13101. vmovdqu xmm0, OWORD PTR [r12+rcx]
  13102. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13103. vpxor xmm6, xmm6, xmm0
  13104. ; ghash_gfmul_avx
  13105. vpclmulqdq xmm2, xmm5, xmm6, 16
  13106. vpclmulqdq xmm1, xmm5, xmm6, 1
  13107. vpclmulqdq xmm0, xmm5, xmm6, 0
  13108. vpclmulqdq xmm3, xmm5, xmm6, 17
  13109. vpxor xmm2, xmm2, xmm1
  13110. vpslldq xmm1, xmm2, 8
  13111. vpsrldq xmm2, xmm2, 8
  13112. vpxor xmm7, xmm0, xmm1
  13113. vpxor xmm6, xmm3, xmm2
  13114. ; ghash_mid
  13115. vpsrld xmm0, xmm7, 31
  13116. vpsrld xmm1, xmm6, 31
  13117. vpslld xmm7, xmm7, 1
  13118. vpslld xmm6, xmm6, 1
  13119. vpsrldq xmm2, xmm0, 12
  13120. vpslldq xmm0, xmm0, 4
  13121. vpslldq xmm1, xmm1, 4
  13122. vpor xmm6, xmm6, xmm2
  13123. vpor xmm7, xmm7, xmm0
  13124. vpor xmm6, xmm6, xmm1
  13125. ; ghash_red
  13126. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  13127. vpclmulqdq xmm0, xmm7, xmm2, 16
  13128. vpshufd xmm1, xmm7, 78
  13129. vpxor xmm1, xmm1, xmm0
  13130. vpclmulqdq xmm0, xmm1, xmm2, 16
  13131. vpshufd xmm1, xmm1, 78
  13132. vpxor xmm1, xmm1, xmm0
  13133. vpxor xmm6, xmm6, xmm1
  13134. add ecx, 16
  13135. cmp ecx, edx
  13136. jl L_AES_GCM_decrypt_avx2_calc_aad_16_loop
  13137. mov edx, r11d
  13138. cmp ecx, edx
  13139. je L_AES_GCM_decrypt_avx2_calc_aad_done
  13140. L_AES_GCM_decrypt_avx2_calc_aad_lt16:
  13141. vpxor xmm0, xmm0, xmm0
  13142. xor ebx, ebx
  13143. vmovdqu OWORD PTR [rsp], xmm0
  13144. L_AES_GCM_decrypt_avx2_calc_aad_loop:
  13145. movzx r13d, BYTE PTR [r12+rcx]
  13146. mov BYTE PTR [rsp+rbx], r13b
  13147. inc ecx
  13148. inc ebx
  13149. cmp ecx, edx
  13150. jl L_AES_GCM_decrypt_avx2_calc_aad_loop
  13151. vmovdqu xmm0, OWORD PTR [rsp]
  13152. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13153. vpxor xmm6, xmm6, xmm0
  13154. ; ghash_gfmul_avx
  13155. vpclmulqdq xmm2, xmm5, xmm6, 16
  13156. vpclmulqdq xmm1, xmm5, xmm6, 1
  13157. vpclmulqdq xmm0, xmm5, xmm6, 0
  13158. vpclmulqdq xmm3, xmm5, xmm6, 17
  13159. vpxor xmm2, xmm2, xmm1
  13160. vpslldq xmm1, xmm2, 8
  13161. vpsrldq xmm2, xmm2, 8
  13162. vpxor xmm7, xmm0, xmm1
  13163. vpxor xmm6, xmm3, xmm2
  13164. ; ghash_mid
  13165. vpsrld xmm0, xmm7, 31
  13166. vpsrld xmm1, xmm6, 31
  13167. vpslld xmm7, xmm7, 1
  13168. vpslld xmm6, xmm6, 1
  13169. vpsrldq xmm2, xmm0, 12
  13170. vpslldq xmm0, xmm0, 4
  13171. vpslldq xmm1, xmm1, 4
  13172. vpor xmm6, xmm6, xmm2
  13173. vpor xmm7, xmm7, xmm0
  13174. vpor xmm6, xmm6, xmm1
  13175. ; ghash_red
  13176. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  13177. vpclmulqdq xmm0, xmm7, xmm2, 16
  13178. vpshufd xmm1, xmm7, 78
  13179. vpxor xmm1, xmm1, xmm0
  13180. vpclmulqdq xmm0, xmm1, xmm2, 16
  13181. vpshufd xmm1, xmm1, 78
  13182. vpxor xmm1, xmm1, xmm0
  13183. vpxor xmm6, xmm6, xmm1
  13184. L_AES_GCM_decrypt_avx2_calc_aad_done:
  13185. ; Calculate counter and H
  13186. vpsrlq xmm1, xmm5, 63
  13187. vpsllq xmm0, xmm5, 1
  13188. vpslldq xmm1, xmm1, 8
  13189. vpor xmm0, xmm0, xmm1
  13190. vpshufd xmm5, xmm5, 255
  13191. vpsrad xmm5, xmm5, 31
  13192. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  13193. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  13194. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  13195. vpxor xmm5, xmm5, xmm0
  13196. xor ebx, ebx
  13197. cmp r10d, 128
  13198. mov r13d, r10d
  13199. jl L_AES_GCM_decrypt_avx2_done_128
  13200. and r13d, 4294967168
  13201. vmovdqu OWORD PTR [rsp+128], xmm4
  13202. vmovdqu OWORD PTR [rsp+144], xmm15
  13203. vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
  13204. ; H ^ 1 and H ^ 2
  13205. vpclmulqdq xmm9, xmm5, xmm5, 0
  13206. vpclmulqdq xmm10, xmm5, xmm5, 17
  13207. vpclmulqdq xmm8, xmm9, xmm3, 16
  13208. vpshufd xmm9, xmm9, 78
  13209. vpxor xmm9, xmm9, xmm8
  13210. vpclmulqdq xmm8, xmm9, xmm3, 16
  13211. vpshufd xmm9, xmm9, 78
  13212. vpxor xmm9, xmm9, xmm8
  13213. vpxor xmm0, xmm10, xmm9
  13214. vmovdqu OWORD PTR [rsp], xmm5
  13215. vmovdqu OWORD PTR [rsp+16], xmm0
  13216. ; H ^ 3 and H ^ 4
  13217. vpclmulqdq xmm11, xmm0, xmm5, 16
  13218. vpclmulqdq xmm10, xmm0, xmm5, 1
  13219. vpclmulqdq xmm9, xmm0, xmm5, 0
  13220. vpclmulqdq xmm12, xmm0, xmm5, 17
  13221. vpclmulqdq xmm13, xmm0, xmm0, 0
  13222. vpclmulqdq xmm14, xmm0, xmm0, 17
  13223. vpxor xmm11, xmm11, xmm10
  13224. vpslldq xmm10, xmm11, 8
  13225. vpsrldq xmm11, xmm11, 8
  13226. vpxor xmm10, xmm10, xmm9
  13227. vpclmulqdq xmm8, xmm13, xmm3, 16
  13228. vpclmulqdq xmm9, xmm10, xmm3, 16
  13229. vpshufd xmm10, xmm10, 78
  13230. vpshufd xmm13, xmm13, 78
  13231. vpxor xmm10, xmm10, xmm9
  13232. vpxor xmm13, xmm13, xmm8
  13233. vpclmulqdq xmm9, xmm10, xmm3, 16
  13234. vpclmulqdq xmm8, xmm13, xmm3, 16
  13235. vpshufd xmm10, xmm10, 78
  13236. vpshufd xmm13, xmm13, 78
  13237. vpxor xmm12, xmm12, xmm11
  13238. vpxor xmm13, xmm13, xmm8
  13239. vpxor xmm10, xmm10, xmm12
  13240. vpxor xmm2, xmm13, xmm14
  13241. vpxor xmm1, xmm10, xmm9
  13242. vmovdqu OWORD PTR [rsp+32], xmm1
  13243. vmovdqu OWORD PTR [rsp+48], xmm2
  13244. ; H ^ 5 and H ^ 6
  13245. vpclmulqdq xmm11, xmm1, xmm0, 16
  13246. vpclmulqdq xmm10, xmm1, xmm0, 1
  13247. vpclmulqdq xmm9, xmm1, xmm0, 0
  13248. vpclmulqdq xmm12, xmm1, xmm0, 17
  13249. vpclmulqdq xmm13, xmm1, xmm1, 0
  13250. vpclmulqdq xmm14, xmm1, xmm1, 17
  13251. vpxor xmm11, xmm11, xmm10
  13252. vpslldq xmm10, xmm11, 8
  13253. vpsrldq xmm11, xmm11, 8
  13254. vpxor xmm10, xmm10, xmm9
  13255. vpclmulqdq xmm8, xmm13, xmm3, 16
  13256. vpclmulqdq xmm9, xmm10, xmm3, 16
  13257. vpshufd xmm10, xmm10, 78
  13258. vpshufd xmm13, xmm13, 78
  13259. vpxor xmm10, xmm10, xmm9
  13260. vpxor xmm13, xmm13, xmm8
  13261. vpclmulqdq xmm9, xmm10, xmm3, 16
  13262. vpclmulqdq xmm8, xmm13, xmm3, 16
  13263. vpshufd xmm10, xmm10, 78
  13264. vpshufd xmm13, xmm13, 78
  13265. vpxor xmm12, xmm12, xmm11
  13266. vpxor xmm13, xmm13, xmm8
  13267. vpxor xmm10, xmm10, xmm12
  13268. vpxor xmm0, xmm13, xmm14
  13269. vpxor xmm7, xmm10, xmm9
  13270. vmovdqu OWORD PTR [rsp+64], xmm7
  13271. vmovdqu OWORD PTR [rsp+80], xmm0
  13272. ; H ^ 7 and H ^ 8
  13273. vpclmulqdq xmm11, xmm2, xmm1, 16
  13274. vpclmulqdq xmm10, xmm2, xmm1, 1
  13275. vpclmulqdq xmm9, xmm2, xmm1, 0
  13276. vpclmulqdq xmm12, xmm2, xmm1, 17
  13277. vpclmulqdq xmm13, xmm2, xmm2, 0
  13278. vpclmulqdq xmm14, xmm2, xmm2, 17
  13279. vpxor xmm11, xmm11, xmm10
  13280. vpslldq xmm10, xmm11, 8
  13281. vpsrldq xmm11, xmm11, 8
  13282. vpxor xmm10, xmm10, xmm9
  13283. vpclmulqdq xmm8, xmm13, xmm3, 16
  13284. vpclmulqdq xmm9, xmm10, xmm3, 16
  13285. vpshufd xmm10, xmm10, 78
  13286. vpshufd xmm13, xmm13, 78
  13287. vpxor xmm10, xmm10, xmm9
  13288. vpxor xmm13, xmm13, xmm8
  13289. vpclmulqdq xmm9, xmm10, xmm3, 16
  13290. vpclmulqdq xmm8, xmm13, xmm3, 16
  13291. vpshufd xmm10, xmm10, 78
  13292. vpshufd xmm13, xmm13, 78
  13293. vpxor xmm12, xmm12, xmm11
  13294. vpxor xmm13, xmm13, xmm8
  13295. vpxor xmm10, xmm10, xmm12
  13296. vpxor xmm0, xmm13, xmm14
  13297. vpxor xmm7, xmm10, xmm9
  13298. vmovdqu OWORD PTR [rsp+96], xmm7
  13299. vmovdqu OWORD PTR [rsp+112], xmm0
  13300. L_AES_GCM_decrypt_avx2_ghash_128:
  13301. ; aesenc_128_ghash
  13302. lea rcx, QWORD PTR [rdi+rbx]
  13303. lea rdx, QWORD PTR [r8+rbx]
  13304. ; aesenc_ctr
  13305. vmovdqu xmm0, OWORD PTR [rsp+128]
  13306. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  13307. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  13308. vpshufb xmm8, xmm0, xmm1
  13309. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  13310. vpshufb xmm9, xmm9, xmm1
  13311. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  13312. vpshufb xmm10, xmm10, xmm1
  13313. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  13314. vpshufb xmm11, xmm11, xmm1
  13315. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  13316. vpshufb xmm12, xmm12, xmm1
  13317. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  13318. vpshufb xmm13, xmm13, xmm1
  13319. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  13320. vpshufb xmm14, xmm14, xmm1
  13321. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  13322. vpshufb xmm15, xmm15, xmm1
  13323. ; aesenc_xor
  13324. vmovdqu xmm7, OWORD PTR [rsi]
  13325. vmovdqu OWORD PTR [rsp+128], xmm0
  13326. vpxor xmm8, xmm8, xmm7
  13327. vpxor xmm9, xmm9, xmm7
  13328. vpxor xmm10, xmm10, xmm7
  13329. vpxor xmm11, xmm11, xmm7
  13330. vpxor xmm12, xmm12, xmm7
  13331. vpxor xmm13, xmm13, xmm7
  13332. vpxor xmm14, xmm14, xmm7
  13333. vpxor xmm15, xmm15, xmm7
  13334. ; aesenc_pclmul_1
  13335. vmovdqu xmm1, OWORD PTR [rcx]
  13336. vmovdqu xmm0, OWORD PTR [rsi+16]
  13337. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13338. vmovdqu xmm2, OWORD PTR [rsp+112]
  13339. vpxor xmm1, xmm1, xmm6
  13340. vpclmulqdq xmm5, xmm1, xmm2, 16
  13341. vpclmulqdq xmm3, xmm1, xmm2, 1
  13342. vpclmulqdq xmm6, xmm1, xmm2, 0
  13343. vpclmulqdq xmm7, xmm1, xmm2, 17
  13344. vaesenc xmm8, xmm8, xmm0
  13345. vaesenc xmm9, xmm9, xmm0
  13346. vaesenc xmm10, xmm10, xmm0
  13347. vaesenc xmm11, xmm11, xmm0
  13348. vaesenc xmm12, xmm12, xmm0
  13349. vaesenc xmm13, xmm13, xmm0
  13350. vaesenc xmm14, xmm14, xmm0
  13351. vaesenc xmm15, xmm15, xmm0
  13352. ; aesenc_pclmul_2
  13353. vmovdqu xmm1, OWORD PTR [rcx+16]
  13354. vmovdqu xmm0, OWORD PTR [rsp+96]
  13355. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13356. vpxor xmm5, xmm5, xmm3
  13357. vpclmulqdq xmm2, xmm1, xmm0, 16
  13358. vpclmulqdq xmm3, xmm1, xmm0, 1
  13359. vpclmulqdq xmm4, xmm1, xmm0, 0
  13360. vpclmulqdq xmm1, xmm1, xmm0, 17
  13361. vmovdqu xmm0, OWORD PTR [rsi+32]
  13362. vpxor xmm7, xmm7, xmm1
  13363. vaesenc xmm8, xmm8, xmm0
  13364. vaesenc xmm9, xmm9, xmm0
  13365. vaesenc xmm10, xmm10, xmm0
  13366. vaesenc xmm11, xmm11, xmm0
  13367. vaesenc xmm12, xmm12, xmm0
  13368. vaesenc xmm13, xmm13, xmm0
  13369. vaesenc xmm14, xmm14, xmm0
  13370. vaesenc xmm15, xmm15, xmm0
  13371. ; aesenc_pclmul_n
  13372. vmovdqu xmm1, OWORD PTR [rcx+32]
  13373. vmovdqu xmm0, OWORD PTR [rsp+80]
  13374. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13375. vpxor xmm5, xmm5, xmm2
  13376. vpclmulqdq xmm2, xmm1, xmm0, 16
  13377. vpxor xmm5, xmm5, xmm3
  13378. vpclmulqdq xmm3, xmm1, xmm0, 1
  13379. vpxor xmm6, xmm6, xmm4
  13380. vpclmulqdq xmm4, xmm1, xmm0, 0
  13381. vpclmulqdq xmm1, xmm1, xmm0, 17
  13382. vmovdqu xmm0, OWORD PTR [rsi+48]
  13383. vpxor xmm7, xmm7, xmm1
  13384. vaesenc xmm8, xmm8, xmm0
  13385. vaesenc xmm9, xmm9, xmm0
  13386. vaesenc xmm10, xmm10, xmm0
  13387. vaesenc xmm11, xmm11, xmm0
  13388. vaesenc xmm12, xmm12, xmm0
  13389. vaesenc xmm13, xmm13, xmm0
  13390. vaesenc xmm14, xmm14, xmm0
  13391. vaesenc xmm15, xmm15, xmm0
  13392. ; aesenc_pclmul_n
  13393. vmovdqu xmm1, OWORD PTR [rcx+48]
  13394. vmovdqu xmm0, OWORD PTR [rsp+64]
  13395. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13396. vpxor xmm5, xmm5, xmm2
  13397. vpclmulqdq xmm2, xmm1, xmm0, 16
  13398. vpxor xmm5, xmm5, xmm3
  13399. vpclmulqdq xmm3, xmm1, xmm0, 1
  13400. vpxor xmm6, xmm6, xmm4
  13401. vpclmulqdq xmm4, xmm1, xmm0, 0
  13402. vpclmulqdq xmm1, xmm1, xmm0, 17
  13403. vmovdqu xmm0, OWORD PTR [rsi+64]
  13404. vpxor xmm7, xmm7, xmm1
  13405. vaesenc xmm8, xmm8, xmm0
  13406. vaesenc xmm9, xmm9, xmm0
  13407. vaesenc xmm10, xmm10, xmm0
  13408. vaesenc xmm11, xmm11, xmm0
  13409. vaesenc xmm12, xmm12, xmm0
  13410. vaesenc xmm13, xmm13, xmm0
  13411. vaesenc xmm14, xmm14, xmm0
  13412. vaesenc xmm15, xmm15, xmm0
  13413. ; aesenc_pclmul_n
  13414. vmovdqu xmm1, OWORD PTR [rcx+64]
  13415. vmovdqu xmm0, OWORD PTR [rsp+48]
  13416. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13417. vpxor xmm5, xmm5, xmm2
  13418. vpclmulqdq xmm2, xmm1, xmm0, 16
  13419. vpxor xmm5, xmm5, xmm3
  13420. vpclmulqdq xmm3, xmm1, xmm0, 1
  13421. vpxor xmm6, xmm6, xmm4
  13422. vpclmulqdq xmm4, xmm1, xmm0, 0
  13423. vpclmulqdq xmm1, xmm1, xmm0, 17
  13424. vmovdqu xmm0, OWORD PTR [rsi+80]
  13425. vpxor xmm7, xmm7, xmm1
  13426. vaesenc xmm8, xmm8, xmm0
  13427. vaesenc xmm9, xmm9, xmm0
  13428. vaesenc xmm10, xmm10, xmm0
  13429. vaesenc xmm11, xmm11, xmm0
  13430. vaesenc xmm12, xmm12, xmm0
  13431. vaesenc xmm13, xmm13, xmm0
  13432. vaesenc xmm14, xmm14, xmm0
  13433. vaesenc xmm15, xmm15, xmm0
  13434. ; aesenc_pclmul_n
  13435. vmovdqu xmm1, OWORD PTR [rcx+80]
  13436. vmovdqu xmm0, OWORD PTR [rsp+32]
  13437. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13438. vpxor xmm5, xmm5, xmm2
  13439. vpclmulqdq xmm2, xmm1, xmm0, 16
  13440. vpxor xmm5, xmm5, xmm3
  13441. vpclmulqdq xmm3, xmm1, xmm0, 1
  13442. vpxor xmm6, xmm6, xmm4
  13443. vpclmulqdq xmm4, xmm1, xmm0, 0
  13444. vpclmulqdq xmm1, xmm1, xmm0, 17
  13445. vmovdqu xmm0, OWORD PTR [rsi+96]
  13446. vpxor xmm7, xmm7, xmm1
  13447. vaesenc xmm8, xmm8, xmm0
  13448. vaesenc xmm9, xmm9, xmm0
  13449. vaesenc xmm10, xmm10, xmm0
  13450. vaesenc xmm11, xmm11, xmm0
  13451. vaesenc xmm12, xmm12, xmm0
  13452. vaesenc xmm13, xmm13, xmm0
  13453. vaesenc xmm14, xmm14, xmm0
  13454. vaesenc xmm15, xmm15, xmm0
  13455. ; aesenc_pclmul_n
  13456. vmovdqu xmm1, OWORD PTR [rcx+96]
  13457. vmovdqu xmm0, OWORD PTR [rsp+16]
  13458. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13459. vpxor xmm5, xmm5, xmm2
  13460. vpclmulqdq xmm2, xmm1, xmm0, 16
  13461. vpxor xmm5, xmm5, xmm3
  13462. vpclmulqdq xmm3, xmm1, xmm0, 1
  13463. vpxor xmm6, xmm6, xmm4
  13464. vpclmulqdq xmm4, xmm1, xmm0, 0
  13465. vpclmulqdq xmm1, xmm1, xmm0, 17
  13466. vmovdqu xmm0, OWORD PTR [rsi+112]
  13467. vpxor xmm7, xmm7, xmm1
  13468. vaesenc xmm8, xmm8, xmm0
  13469. vaesenc xmm9, xmm9, xmm0
  13470. vaesenc xmm10, xmm10, xmm0
  13471. vaesenc xmm11, xmm11, xmm0
  13472. vaesenc xmm12, xmm12, xmm0
  13473. vaesenc xmm13, xmm13, xmm0
  13474. vaesenc xmm14, xmm14, xmm0
  13475. vaesenc xmm15, xmm15, xmm0
  13476. ; aesenc_pclmul_n
  13477. vmovdqu xmm1, OWORD PTR [rcx+112]
  13478. vmovdqu xmm0, OWORD PTR [rsp]
  13479. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13480. vpxor xmm5, xmm5, xmm2
  13481. vpclmulqdq xmm2, xmm1, xmm0, 16
  13482. vpxor xmm5, xmm5, xmm3
  13483. vpclmulqdq xmm3, xmm1, xmm0, 1
  13484. vpxor xmm6, xmm6, xmm4
  13485. vpclmulqdq xmm4, xmm1, xmm0, 0
  13486. vpclmulqdq xmm1, xmm1, xmm0, 17
  13487. vmovdqu xmm0, OWORD PTR [rsi+128]
  13488. vpxor xmm7, xmm7, xmm1
  13489. vaesenc xmm8, xmm8, xmm0
  13490. vaesenc xmm9, xmm9, xmm0
  13491. vaesenc xmm10, xmm10, xmm0
  13492. vaesenc xmm11, xmm11, xmm0
  13493. vaesenc xmm12, xmm12, xmm0
  13494. vaesenc xmm13, xmm13, xmm0
  13495. vaesenc xmm14, xmm14, xmm0
  13496. vaesenc xmm15, xmm15, xmm0
  13497. ; aesenc_pclmul_l
  13498. vpxor xmm5, xmm5, xmm2
  13499. vpxor xmm6, xmm6, xmm4
  13500. vpxor xmm5, xmm5, xmm3
  13501. vpslldq xmm1, xmm5, 8
  13502. vpsrldq xmm5, xmm5, 8
  13503. vmovdqu xmm4, OWORD PTR [rsi+144]
  13504. vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
  13505. vaesenc xmm8, xmm8, xmm4
  13506. vpxor xmm6, xmm6, xmm1
  13507. vpxor xmm7, xmm7, xmm5
  13508. vpclmulqdq xmm3, xmm6, xmm0, 16
  13509. vaesenc xmm9, xmm9, xmm4
  13510. vaesenc xmm10, xmm10, xmm4
  13511. vaesenc xmm11, xmm11, xmm4
  13512. vpshufd xmm6, xmm6, 78
  13513. vpxor xmm6, xmm6, xmm3
  13514. vpclmulqdq xmm3, xmm6, xmm0, 16
  13515. vaesenc xmm12, xmm12, xmm4
  13516. vaesenc xmm13, xmm13, xmm4
  13517. vaesenc xmm14, xmm14, xmm4
  13518. vpshufd xmm6, xmm6, 78
  13519. vpxor xmm6, xmm6, xmm3
  13520. vpxor xmm6, xmm6, xmm7
  13521. vaesenc xmm15, xmm15, xmm4
  13522. cmp r9d, 11
  13523. vmovdqu xmm7, OWORD PTR [rsi+160]
  13524. jl L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done
  13525. vaesenc xmm8, xmm8, xmm7
  13526. vaesenc xmm9, xmm9, xmm7
  13527. vaesenc xmm10, xmm10, xmm7
  13528. vaesenc xmm11, xmm11, xmm7
  13529. vaesenc xmm12, xmm12, xmm7
  13530. vaesenc xmm13, xmm13, xmm7
  13531. vaesenc xmm14, xmm14, xmm7
  13532. vaesenc xmm15, xmm15, xmm7
  13533. vmovdqu xmm7, OWORD PTR [rsi+176]
  13534. vaesenc xmm8, xmm8, xmm7
  13535. vaesenc xmm9, xmm9, xmm7
  13536. vaesenc xmm10, xmm10, xmm7
  13537. vaesenc xmm11, xmm11, xmm7
  13538. vaesenc xmm12, xmm12, xmm7
  13539. vaesenc xmm13, xmm13, xmm7
  13540. vaesenc xmm14, xmm14, xmm7
  13541. vaesenc xmm15, xmm15, xmm7
  13542. cmp r9d, 13
  13543. vmovdqu xmm7, OWORD PTR [rsi+192]
  13544. jl L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done
  13545. vaesenc xmm8, xmm8, xmm7
  13546. vaesenc xmm9, xmm9, xmm7
  13547. vaesenc xmm10, xmm10, xmm7
  13548. vaesenc xmm11, xmm11, xmm7
  13549. vaesenc xmm12, xmm12, xmm7
  13550. vaesenc xmm13, xmm13, xmm7
  13551. vaesenc xmm14, xmm14, xmm7
  13552. vaesenc xmm15, xmm15, xmm7
  13553. vmovdqu xmm7, OWORD PTR [rsi+208]
  13554. vaesenc xmm8, xmm8, xmm7
  13555. vaesenc xmm9, xmm9, xmm7
  13556. vaesenc xmm10, xmm10, xmm7
  13557. vaesenc xmm11, xmm11, xmm7
  13558. vaesenc xmm12, xmm12, xmm7
  13559. vaesenc xmm13, xmm13, xmm7
  13560. vaesenc xmm14, xmm14, xmm7
  13561. vaesenc xmm15, xmm15, xmm7
  13562. vmovdqu xmm7, OWORD PTR [rsi+224]
  13563. L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done:
  13564. ; aesenc_last
  13565. vaesenclast xmm8, xmm8, xmm7
  13566. vaesenclast xmm9, xmm9, xmm7
  13567. vaesenclast xmm10, xmm10, xmm7
  13568. vaesenclast xmm11, xmm11, xmm7
  13569. vmovdqu xmm0, OWORD PTR [rcx]
  13570. vmovdqu xmm1, OWORD PTR [rcx+16]
  13571. vmovdqu xmm2, OWORD PTR [rcx+32]
  13572. vmovdqu xmm3, OWORD PTR [rcx+48]
  13573. vpxor xmm8, xmm8, xmm0
  13574. vpxor xmm9, xmm9, xmm1
  13575. vpxor xmm10, xmm10, xmm2
  13576. vpxor xmm11, xmm11, xmm3
  13577. vmovdqu OWORD PTR [rdx], xmm8
  13578. vmovdqu OWORD PTR [rdx+16], xmm9
  13579. vmovdqu OWORD PTR [rdx+32], xmm10
  13580. vmovdqu OWORD PTR [rdx+48], xmm11
  13581. vaesenclast xmm12, xmm12, xmm7
  13582. vaesenclast xmm13, xmm13, xmm7
  13583. vaesenclast xmm14, xmm14, xmm7
  13584. vaesenclast xmm15, xmm15, xmm7
  13585. vmovdqu xmm0, OWORD PTR [rcx+64]
  13586. vmovdqu xmm1, OWORD PTR [rcx+80]
  13587. vmovdqu xmm2, OWORD PTR [rcx+96]
  13588. vmovdqu xmm3, OWORD PTR [rcx+112]
  13589. vpxor xmm12, xmm12, xmm0
  13590. vpxor xmm13, xmm13, xmm1
  13591. vpxor xmm14, xmm14, xmm2
  13592. vpxor xmm15, xmm15, xmm3
  13593. vmovdqu OWORD PTR [rdx+64], xmm12
  13594. vmovdqu OWORD PTR [rdx+80], xmm13
  13595. vmovdqu OWORD PTR [rdx+96], xmm14
  13596. vmovdqu OWORD PTR [rdx+112], xmm15
  13597. ; aesenc_128_ghash - end
  13598. add ebx, 128
  13599. cmp ebx, r13d
  13600. jl L_AES_GCM_decrypt_avx2_ghash_128
  13601. vmovdqu xmm5, OWORD PTR [rsp]
  13602. vmovdqu xmm4, OWORD PTR [rsp+128]
  13603. vmovdqu xmm15, OWORD PTR [rsp+144]
  13604. L_AES_GCM_decrypt_avx2_done_128:
  13605. cmp ebx, r10d
  13606. jge L_AES_GCM_decrypt_avx2_done_dec
  13607. mov r13d, r10d
  13608. and r13d, 4294967280
  13609. cmp ebx, r13d
  13610. jge L_AES_GCM_decrypt_avx2_last_block_done
  13611. L_AES_GCM_decrypt_avx2_last_block_start:
  13612. vmovdqu xmm11, OWORD PTR [rdi+rbx]
  13613. vpshufb xmm10, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  13614. vpshufb xmm12, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13615. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  13616. vpxor xmm12, xmm12, xmm6
  13617. ; aesenc_gfmul_sb
  13618. vpclmulqdq xmm2, xmm12, xmm5, 1
  13619. vpclmulqdq xmm3, xmm12, xmm5, 16
  13620. vpclmulqdq xmm1, xmm12, xmm5, 0
  13621. vpclmulqdq xmm8, xmm12, xmm5, 17
  13622. vpxor xmm10, xmm10, [rsi]
  13623. vaesenc xmm10, xmm10, [rsi+16]
  13624. vpxor xmm3, xmm3, xmm2
  13625. vpslldq xmm2, xmm3, 8
  13626. vpsrldq xmm3, xmm3, 8
  13627. vaesenc xmm10, xmm10, [rsi+32]
  13628. vpxor xmm2, xmm2, xmm1
  13629. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13630. vaesenc xmm10, xmm10, [rsi+48]
  13631. vaesenc xmm10, xmm10, [rsi+64]
  13632. vaesenc xmm10, xmm10, [rsi+80]
  13633. vpshufd xmm2, xmm2, 78
  13634. vpxor xmm2, xmm2, xmm1
  13635. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13636. vaesenc xmm10, xmm10, [rsi+96]
  13637. vaesenc xmm10, xmm10, [rsi+112]
  13638. vaesenc xmm10, xmm10, [rsi+128]
  13639. vpshufd xmm2, xmm2, 78
  13640. vaesenc xmm10, xmm10, [rsi+144]
  13641. vpxor xmm8, xmm8, xmm3
  13642. vpxor xmm2, xmm2, xmm8
  13643. vmovdqu xmm0, OWORD PTR [rsi+160]
  13644. cmp r9d, 11
  13645. jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last
  13646. vaesenc xmm10, xmm10, xmm0
  13647. vaesenc xmm10, xmm10, [rsi+176]
  13648. vmovdqu xmm0, OWORD PTR [rsi+192]
  13649. cmp r9d, 13
  13650. jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last
  13651. vaesenc xmm10, xmm10, xmm0
  13652. vaesenc xmm10, xmm10, [rsi+208]
  13653. vmovdqu xmm0, OWORD PTR [rsi+224]
  13654. L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last:
  13655. vaesenclast xmm10, xmm10, xmm0
  13656. vpxor xmm6, xmm2, xmm1
  13657. vpxor xmm10, xmm10, xmm11
  13658. vmovdqu OWORD PTR [r8+rbx], xmm10
  13659. add ebx, 16
  13660. cmp ebx, r13d
  13661. jl L_AES_GCM_decrypt_avx2_last_block_start
  13662. L_AES_GCM_decrypt_avx2_last_block_done:
  13663. mov ecx, r10d
  13664. mov edx, r10d
  13665. and ecx, 15
  13666. jz L_AES_GCM_decrypt_avx2_done_dec
  13667. ; aesenc_last15_dec
  13668. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  13669. vpxor xmm4, xmm4, [rsi]
  13670. vaesenc xmm4, xmm4, [rsi+16]
  13671. vaesenc xmm4, xmm4, [rsi+32]
  13672. vaesenc xmm4, xmm4, [rsi+48]
  13673. vaesenc xmm4, xmm4, [rsi+64]
  13674. vaesenc xmm4, xmm4, [rsi+80]
  13675. vaesenc xmm4, xmm4, [rsi+96]
  13676. vaesenc xmm4, xmm4, [rsi+112]
  13677. vaesenc xmm4, xmm4, [rsi+128]
  13678. vaesenc xmm4, xmm4, [rsi+144]
  13679. cmp r9d, 11
  13680. vmovdqu xmm1, OWORD PTR [rsi+160]
  13681. jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last
  13682. vaesenc xmm4, xmm4, xmm1
  13683. vaesenc xmm4, xmm4, [rsi+176]
  13684. cmp r9d, 13
  13685. vmovdqu xmm1, OWORD PTR [rsi+192]
  13686. jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last
  13687. vaesenc xmm4, xmm4, xmm1
  13688. vaesenc xmm4, xmm4, [rsi+208]
  13689. vmovdqu xmm1, OWORD PTR [rsi+224]
  13690. L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last:
  13691. vaesenclast xmm4, xmm4, xmm1
  13692. xor ecx, ecx
  13693. vpxor xmm0, xmm0, xmm0
  13694. vmovdqu OWORD PTR [rsp], xmm4
  13695. vmovdqu OWORD PTR [rsp+16], xmm0
  13696. L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop:
  13697. movzx r13d, BYTE PTR [rdi+rbx]
  13698. mov BYTE PTR [rsp+rcx+16], r13b
  13699. xor r13b, BYTE PTR [rsp+rcx]
  13700. mov BYTE PTR [r8+rbx], r13b
  13701. inc ebx
  13702. inc ecx
  13703. cmp ebx, edx
  13704. jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop
  13705. vmovdqu xmm4, OWORD PTR [rsp+16]
  13706. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13707. vpxor xmm6, xmm6, xmm4
  13708. ; ghash_gfmul_red
  13709. vpclmulqdq xmm2, xmm6, xmm5, 16
  13710. vpclmulqdq xmm1, xmm6, xmm5, 1
  13711. vpclmulqdq xmm0, xmm6, xmm5, 0
  13712. vpxor xmm2, xmm2, xmm1
  13713. vpslldq xmm1, xmm2, 8
  13714. vpsrldq xmm2, xmm2, 8
  13715. vpxor xmm1, xmm1, xmm0
  13716. vpclmulqdq xmm6, xmm6, xmm5, 17
  13717. vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13718. vpshufd xmm1, xmm1, 78
  13719. vpxor xmm1, xmm1, xmm0
  13720. vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13721. vpshufd xmm1, xmm1, 78
  13722. vpxor xmm6, xmm6, xmm2
  13723. vpxor xmm6, xmm6, xmm1
  13724. vpxor xmm6, xmm6, xmm0
  13725. L_AES_GCM_decrypt_avx2_done_dec:
  13726. ; calc_tag
  13727. shl r10, 3
  13728. shl r11, 3
  13729. vmovq xmm0, r10
  13730. vmovq xmm1, r11
  13731. vpunpcklqdq xmm0, xmm0, xmm1
  13732. vpxor xmm0, xmm0, xmm6
  13733. ; ghash_gfmul_red
  13734. vpclmulqdq xmm4, xmm0, xmm5, 16
  13735. vpclmulqdq xmm3, xmm0, xmm5, 1
  13736. vpclmulqdq xmm2, xmm0, xmm5, 0
  13737. vpxor xmm4, xmm4, xmm3
  13738. vpslldq xmm3, xmm4, 8
  13739. vpsrldq xmm4, xmm4, 8
  13740. vpxor xmm3, xmm3, xmm2
  13741. vpclmulqdq xmm0, xmm0, xmm5, 17
  13742. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13743. vpshufd xmm3, xmm3, 78
  13744. vpxor xmm3, xmm3, xmm2
  13745. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13746. vpshufd xmm3, xmm3, 78
  13747. vpxor xmm0, xmm0, xmm4
  13748. vpxor xmm0, xmm0, xmm3
  13749. vpxor xmm0, xmm0, xmm2
  13750. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13751. vpxor xmm0, xmm0, xmm15
  13752. ; cmp_tag
  13753. cmp r15d, 16
  13754. je L_AES_GCM_decrypt_avx2_cmp_tag_16
  13755. xor rdx, rdx
  13756. xor rax, rax
  13757. vmovdqu OWORD PTR [rsp], xmm0
  13758. L_AES_GCM_decrypt_avx2_cmp_tag_loop:
  13759. movzx r13d, BYTE PTR [rsp+rdx]
  13760. xor r13b, BYTE PTR [r14+rdx]
  13761. or al, r13b
  13762. inc edx
  13763. cmp edx, r15d
  13764. jne L_AES_GCM_decrypt_avx2_cmp_tag_loop
  13765. cmp rax, 0
  13766. sete al
  13767. jmp L_AES_GCM_decrypt_avx2_cmp_tag_done
  13768. L_AES_GCM_decrypt_avx2_cmp_tag_16:
  13769. vmovdqu xmm1, OWORD PTR [r14]
  13770. vpcmpeqb xmm0, xmm0, xmm1
  13771. vpmovmskb rdx, xmm0
  13772. ; %%edx == 0xFFFF then return 1 else => return 0
  13773. xor eax, eax
  13774. cmp edx, 65535
  13775. sete al
  13776. L_AES_GCM_decrypt_avx2_cmp_tag_done:
  13777. mov DWORD PTR [rbp], eax
  13778. vzeroupper
  13779. vmovdqu xmm6, OWORD PTR [rsp+168]
  13780. vmovdqu xmm7, OWORD PTR [rsp+184]
  13781. vmovdqu xmm8, OWORD PTR [rsp+200]
  13782. vmovdqu xmm9, OWORD PTR [rsp+216]
  13783. vmovdqu xmm10, OWORD PTR [rsp+232]
  13784. vmovdqu xmm11, OWORD PTR [rsp+248]
  13785. vmovdqu xmm12, OWORD PTR [rsp+264]
  13786. vmovdqu xmm13, OWORD PTR [rsp+280]
  13787. vmovdqu xmm14, OWORD PTR [rsp+296]
  13788. vmovdqu xmm15, OWORD PTR [rsp+312]
  13789. add rsp, 328
  13790. pop rbp
  13791. pop rsi
  13792. pop r15
  13793. pop rbx
  13794. pop r14
  13795. pop r12
  13796. pop rdi
  13797. pop r13
  13798. ret
  13799. AES_GCM_decrypt_avx2 ENDP
  13800. _text ENDS
  13801. _text SEGMENT READONLY PARA
  13802. AES_GCM_init_avx2 PROC
  13803. push rbx
  13804. push rdi
  13805. push rsi
  13806. push r12
  13807. mov rdi, rcx
  13808. mov rsi, rdx
  13809. mov r10, r8
  13810. mov r11d, r9d
  13811. mov rax, QWORD PTR [rsp+72]
  13812. mov r8, QWORD PTR [rsp+80]
  13813. mov r9, QWORD PTR [rsp+88]
  13814. sub rsp, 48
  13815. vmovdqu OWORD PTR [rsp+16], xmm6
  13816. vmovdqu OWORD PTR [rsp+32], xmm7
  13817. vpxor xmm4, xmm4, xmm4
  13818. mov edx, r11d
  13819. cmp edx, 12
  13820. je L_AES_GCM_init_avx2_iv_12
  13821. ; Calculate values when IV is not 12 bytes
  13822. ; H = Encrypt X(=0)
  13823. vmovdqu xmm5, OWORD PTR [rdi]
  13824. vaesenc xmm5, xmm5, [rdi+16]
  13825. vaesenc xmm5, xmm5, [rdi+32]
  13826. vaesenc xmm5, xmm5, [rdi+48]
  13827. vaesenc xmm5, xmm5, [rdi+64]
  13828. vaesenc xmm5, xmm5, [rdi+80]
  13829. vaesenc xmm5, xmm5, [rdi+96]
  13830. vaesenc xmm5, xmm5, [rdi+112]
  13831. vaesenc xmm5, xmm5, [rdi+128]
  13832. vaesenc xmm5, xmm5, [rdi+144]
  13833. cmp esi, 11
  13834. vmovdqu xmm0, OWORD PTR [rdi+160]
  13835. jl L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last
  13836. vaesenc xmm5, xmm5, xmm0
  13837. vaesenc xmm5, xmm5, [rdi+176]
  13838. cmp esi, 13
  13839. vmovdqu xmm0, OWORD PTR [rdi+192]
  13840. jl L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last
  13841. vaesenc xmm5, xmm5, xmm0
  13842. vaesenc xmm5, xmm5, [rdi+208]
  13843. vmovdqu xmm0, OWORD PTR [rdi+224]
  13844. L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last:
  13845. vaesenclast xmm5, xmm5, xmm0
  13846. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13847. ; Calc counter
  13848. ; Initialization vector
  13849. cmp edx, 0
  13850. mov rcx, 0
  13851. je L_AES_GCM_init_avx2_calc_iv_done
  13852. cmp edx, 16
  13853. jl L_AES_GCM_init_avx2_calc_iv_lt16
  13854. and edx, 4294967280
  13855. L_AES_GCM_init_avx2_calc_iv_16_loop:
  13856. vmovdqu xmm0, OWORD PTR [r10+rcx]
  13857. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13858. vpxor xmm4, xmm4, xmm0
  13859. ; ghash_gfmul_avx
  13860. vpclmulqdq xmm2, xmm5, xmm4, 16
  13861. vpclmulqdq xmm1, xmm5, xmm4, 1
  13862. vpclmulqdq xmm0, xmm5, xmm4, 0
  13863. vpclmulqdq xmm3, xmm5, xmm4, 17
  13864. vpxor xmm2, xmm2, xmm1
  13865. vpslldq xmm1, xmm2, 8
  13866. vpsrldq xmm2, xmm2, 8
  13867. vpxor xmm6, xmm0, xmm1
  13868. vpxor xmm4, xmm3, xmm2
  13869. ; ghash_mid
  13870. vpsrld xmm0, xmm6, 31
  13871. vpsrld xmm1, xmm4, 31
  13872. vpslld xmm6, xmm6, 1
  13873. vpslld xmm4, xmm4, 1
  13874. vpsrldq xmm2, xmm0, 12
  13875. vpslldq xmm0, xmm0, 4
  13876. vpslldq xmm1, xmm1, 4
  13877. vpor xmm4, xmm4, xmm2
  13878. vpor xmm6, xmm6, xmm0
  13879. vpor xmm4, xmm4, xmm1
  13880. ; ghash_red
  13881. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  13882. vpclmulqdq xmm0, xmm6, xmm2, 16
  13883. vpshufd xmm1, xmm6, 78
  13884. vpxor xmm1, xmm1, xmm0
  13885. vpclmulqdq xmm0, xmm1, xmm2, 16
  13886. vpshufd xmm1, xmm1, 78
  13887. vpxor xmm1, xmm1, xmm0
  13888. vpxor xmm4, xmm4, xmm1
  13889. add ecx, 16
  13890. cmp ecx, edx
  13891. jl L_AES_GCM_init_avx2_calc_iv_16_loop
  13892. mov edx, r11d
  13893. cmp ecx, edx
  13894. je L_AES_GCM_init_avx2_calc_iv_done
  13895. L_AES_GCM_init_avx2_calc_iv_lt16:
  13896. vpxor xmm0, xmm0, xmm0
  13897. xor ebx, ebx
  13898. vmovdqu OWORD PTR [rsp], xmm0
  13899. L_AES_GCM_init_avx2_calc_iv_loop:
  13900. movzx r12d, BYTE PTR [r10+rcx]
  13901. mov BYTE PTR [rsp+rbx], r12b
  13902. inc ecx
  13903. inc ebx
  13904. cmp ecx, edx
  13905. jl L_AES_GCM_init_avx2_calc_iv_loop
  13906. vmovdqu xmm0, OWORD PTR [rsp]
  13907. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13908. vpxor xmm4, xmm4, xmm0
  13909. ; ghash_gfmul_avx
  13910. vpclmulqdq xmm2, xmm5, xmm4, 16
  13911. vpclmulqdq xmm1, xmm5, xmm4, 1
  13912. vpclmulqdq xmm0, xmm5, xmm4, 0
  13913. vpclmulqdq xmm3, xmm5, xmm4, 17
  13914. vpxor xmm2, xmm2, xmm1
  13915. vpslldq xmm1, xmm2, 8
  13916. vpsrldq xmm2, xmm2, 8
  13917. vpxor xmm6, xmm0, xmm1
  13918. vpxor xmm4, xmm3, xmm2
  13919. ; ghash_mid
  13920. vpsrld xmm0, xmm6, 31
  13921. vpsrld xmm1, xmm4, 31
  13922. vpslld xmm6, xmm6, 1
  13923. vpslld xmm4, xmm4, 1
  13924. vpsrldq xmm2, xmm0, 12
  13925. vpslldq xmm0, xmm0, 4
  13926. vpslldq xmm1, xmm1, 4
  13927. vpor xmm4, xmm4, xmm2
  13928. vpor xmm6, xmm6, xmm0
  13929. vpor xmm4, xmm4, xmm1
  13930. ; ghash_red
  13931. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  13932. vpclmulqdq xmm0, xmm6, xmm2, 16
  13933. vpshufd xmm1, xmm6, 78
  13934. vpxor xmm1, xmm1, xmm0
  13935. vpclmulqdq xmm0, xmm1, xmm2, 16
  13936. vpshufd xmm1, xmm1, 78
  13937. vpxor xmm1, xmm1, xmm0
  13938. vpxor xmm4, xmm4, xmm1
  13939. L_AES_GCM_init_avx2_calc_iv_done:
  13940. ; T = Encrypt counter
  13941. vpxor xmm0, xmm0, xmm0
  13942. shl edx, 3
  13943. vmovq xmm0, rdx
  13944. vpxor xmm4, xmm4, xmm0
  13945. ; ghash_gfmul_avx
  13946. vpclmulqdq xmm2, xmm5, xmm4, 16
  13947. vpclmulqdq xmm1, xmm5, xmm4, 1
  13948. vpclmulqdq xmm0, xmm5, xmm4, 0
  13949. vpclmulqdq xmm3, xmm5, xmm4, 17
  13950. vpxor xmm2, xmm2, xmm1
  13951. vpslldq xmm1, xmm2, 8
  13952. vpsrldq xmm2, xmm2, 8
  13953. vpxor xmm6, xmm0, xmm1
  13954. vpxor xmm4, xmm3, xmm2
  13955. ; ghash_mid
  13956. vpsrld xmm0, xmm6, 31
  13957. vpsrld xmm1, xmm4, 31
  13958. vpslld xmm6, xmm6, 1
  13959. vpslld xmm4, xmm4, 1
  13960. vpsrldq xmm2, xmm0, 12
  13961. vpslldq xmm0, xmm0, 4
  13962. vpslldq xmm1, xmm1, 4
  13963. vpor xmm4, xmm4, xmm2
  13964. vpor xmm6, xmm6, xmm0
  13965. vpor xmm4, xmm4, xmm1
  13966. ; ghash_red
  13967. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  13968. vpclmulqdq xmm0, xmm6, xmm2, 16
  13969. vpshufd xmm1, xmm6, 78
  13970. vpxor xmm1, xmm1, xmm0
  13971. vpclmulqdq xmm0, xmm1, xmm2, 16
  13972. vpshufd xmm1, xmm1, 78
  13973. vpxor xmm1, xmm1, xmm0
  13974. vpxor xmm4, xmm4, xmm1
  13975. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13976. ; Encrypt counter
  13977. vmovdqu xmm7, OWORD PTR [rdi]
  13978. vpxor xmm7, xmm7, xmm4
  13979. vaesenc xmm7, xmm7, [rdi+16]
  13980. vaesenc xmm7, xmm7, [rdi+32]
  13981. vaesenc xmm7, xmm7, [rdi+48]
  13982. vaesenc xmm7, xmm7, [rdi+64]
  13983. vaesenc xmm7, xmm7, [rdi+80]
  13984. vaesenc xmm7, xmm7, [rdi+96]
  13985. vaesenc xmm7, xmm7, [rdi+112]
  13986. vaesenc xmm7, xmm7, [rdi+128]
  13987. vaesenc xmm7, xmm7, [rdi+144]
  13988. cmp esi, 11
  13989. vmovdqu xmm0, OWORD PTR [rdi+160]
  13990. jl L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last
  13991. vaesenc xmm7, xmm7, xmm0
  13992. vaesenc xmm7, xmm7, [rdi+176]
  13993. cmp esi, 13
  13994. vmovdqu xmm0, OWORD PTR [rdi+192]
  13995. jl L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last
  13996. vaesenc xmm7, xmm7, xmm0
  13997. vaesenc xmm7, xmm7, [rdi+208]
  13998. vmovdqu xmm0, OWORD PTR [rdi+224]
  13999. L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last:
  14000. vaesenclast xmm7, xmm7, xmm0
  14001. jmp L_AES_GCM_init_avx2_iv_done
  14002. L_AES_GCM_init_avx2_iv_12:
  14003. ; # Calculate values when IV is 12 bytes
  14004. ; Set counter based on IV
  14005. vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_one
  14006. vmovdqu xmm5, OWORD PTR [rdi]
  14007. vpblendd xmm4, xmm4, [r10], 7
  14008. ; H = Encrypt X(=0) and T = Encrypt counter
  14009. vmovdqu xmm6, OWORD PTR [rdi+16]
  14010. vpxor xmm7, xmm4, xmm5
  14011. vaesenc xmm5, xmm5, xmm6
  14012. vaesenc xmm7, xmm7, xmm6
  14013. vmovdqu xmm0, OWORD PTR [rdi+32]
  14014. vaesenc xmm5, xmm5, xmm0
  14015. vaesenc xmm7, xmm7, xmm0
  14016. vmovdqu xmm0, OWORD PTR [rdi+48]
  14017. vaesenc xmm5, xmm5, xmm0
  14018. vaesenc xmm7, xmm7, xmm0
  14019. vmovdqu xmm0, OWORD PTR [rdi+64]
  14020. vaesenc xmm5, xmm5, xmm0
  14021. vaesenc xmm7, xmm7, xmm0
  14022. vmovdqu xmm0, OWORD PTR [rdi+80]
  14023. vaesenc xmm5, xmm5, xmm0
  14024. vaesenc xmm7, xmm7, xmm0
  14025. vmovdqu xmm0, OWORD PTR [rdi+96]
  14026. vaesenc xmm5, xmm5, xmm0
  14027. vaesenc xmm7, xmm7, xmm0
  14028. vmovdqu xmm0, OWORD PTR [rdi+112]
  14029. vaesenc xmm5, xmm5, xmm0
  14030. vaesenc xmm7, xmm7, xmm0
  14031. vmovdqu xmm0, OWORD PTR [rdi+128]
  14032. vaesenc xmm5, xmm5, xmm0
  14033. vaesenc xmm7, xmm7, xmm0
  14034. vmovdqu xmm0, OWORD PTR [rdi+144]
  14035. vaesenc xmm5, xmm5, xmm0
  14036. vaesenc xmm7, xmm7, xmm0
  14037. cmp esi, 11
  14038. vmovdqu xmm0, OWORD PTR [rdi+160]
  14039. jl L_AES_GCM_init_avx2_calc_iv_12_last
  14040. vaesenc xmm5, xmm5, xmm0
  14041. vaesenc xmm7, xmm7, xmm0
  14042. vmovdqu xmm0, OWORD PTR [rdi+176]
  14043. vaesenc xmm5, xmm5, xmm0
  14044. vaesenc xmm7, xmm7, xmm0
  14045. cmp esi, 13
  14046. vmovdqu xmm0, OWORD PTR [rdi+192]
  14047. jl L_AES_GCM_init_avx2_calc_iv_12_last
  14048. vaesenc xmm5, xmm5, xmm0
  14049. vaesenc xmm7, xmm7, xmm0
  14050. vmovdqu xmm0, OWORD PTR [rdi+208]
  14051. vaesenc xmm5, xmm5, xmm0
  14052. vaesenc xmm7, xmm7, xmm0
  14053. vmovdqu xmm0, OWORD PTR [rdi+224]
  14054. L_AES_GCM_init_avx2_calc_iv_12_last:
  14055. vaesenclast xmm5, xmm5, xmm0
  14056. vaesenclast xmm7, xmm7, xmm0
  14057. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14058. L_AES_GCM_init_avx2_iv_done:
  14059. vmovdqu OWORD PTR [r9], xmm7
  14060. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  14061. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  14062. vmovdqu OWORD PTR [rax], xmm5
  14063. vmovdqu OWORD PTR [r8], xmm4
  14064. vzeroupper
  14065. vmovdqu xmm6, OWORD PTR [rsp+16]
  14066. vmovdqu xmm7, OWORD PTR [rsp+32]
  14067. add rsp, 48
  14068. pop r12
  14069. pop rsi
  14070. pop rdi
  14071. pop rbx
  14072. ret
  14073. AES_GCM_init_avx2 ENDP
  14074. _text ENDS
  14075. _text SEGMENT READONLY PARA
  14076. AES_GCM_aad_update_avx2 PROC
  14077. mov rax, rcx
  14078. sub rsp, 16
  14079. vmovdqu OWORD PTR [rsp], xmm6
  14080. vmovdqu xmm4, OWORD PTR [r8]
  14081. vmovdqu xmm5, OWORD PTR [r9]
  14082. xor ecx, ecx
  14083. L_AES_GCM_aad_update_avx2_16_loop:
  14084. vmovdqu xmm0, OWORD PTR [rax+rcx]
  14085. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14086. vpxor xmm4, xmm4, xmm0
  14087. ; ghash_gfmul_avx
  14088. vpclmulqdq xmm2, xmm5, xmm4, 16
  14089. vpclmulqdq xmm1, xmm5, xmm4, 1
  14090. vpclmulqdq xmm0, xmm5, xmm4, 0
  14091. vpclmulqdq xmm3, xmm5, xmm4, 17
  14092. vpxor xmm2, xmm2, xmm1
  14093. vpslldq xmm1, xmm2, 8
  14094. vpsrldq xmm2, xmm2, 8
  14095. vpxor xmm6, xmm0, xmm1
  14096. vpxor xmm4, xmm3, xmm2
  14097. ; ghash_mid
  14098. vpsrld xmm0, xmm6, 31
  14099. vpsrld xmm1, xmm4, 31
  14100. vpslld xmm6, xmm6, 1
  14101. vpslld xmm4, xmm4, 1
  14102. vpsrldq xmm2, xmm0, 12
  14103. vpslldq xmm0, xmm0, 4
  14104. vpslldq xmm1, xmm1, 4
  14105. vpor xmm4, xmm4, xmm2
  14106. vpor xmm6, xmm6, xmm0
  14107. vpor xmm4, xmm4, xmm1
  14108. ; ghash_red
  14109. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  14110. vpclmulqdq xmm0, xmm6, xmm2, 16
  14111. vpshufd xmm1, xmm6, 78
  14112. vpxor xmm1, xmm1, xmm0
  14113. vpclmulqdq xmm0, xmm1, xmm2, 16
  14114. vpshufd xmm1, xmm1, 78
  14115. vpxor xmm1, xmm1, xmm0
  14116. vpxor xmm4, xmm4, xmm1
  14117. add ecx, 16
  14118. cmp ecx, edx
  14119. jl L_AES_GCM_aad_update_avx2_16_loop
  14120. vmovdqu OWORD PTR [r8], xmm4
  14121. vzeroupper
  14122. vmovdqu xmm6, OWORD PTR [rsp]
  14123. add rsp, 16
  14124. ret
  14125. AES_GCM_aad_update_avx2 ENDP
  14126. _text ENDS
  14127. _text SEGMENT READONLY PARA
  14128. AES_GCM_encrypt_block_avx2 PROC
  14129. mov r10, r8
  14130. mov r11, r9
  14131. mov rax, QWORD PTR [rsp+40]
  14132. sub rsp, 152
  14133. vmovdqu xmm3, OWORD PTR [rax]
  14134. ; aesenc_block
  14135. vmovdqu xmm1, xmm3
  14136. vpshufb xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  14137. vpaddd xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_one
  14138. vpxor xmm0, xmm0, [rcx]
  14139. vmovdqu xmm2, OWORD PTR [rcx+16]
  14140. vaesenc xmm0, xmm0, xmm2
  14141. vmovdqu xmm2, OWORD PTR [rcx+32]
  14142. vaesenc xmm0, xmm0, xmm2
  14143. vmovdqu xmm2, OWORD PTR [rcx+48]
  14144. vaesenc xmm0, xmm0, xmm2
  14145. vmovdqu xmm2, OWORD PTR [rcx+64]
  14146. vaesenc xmm0, xmm0, xmm2
  14147. vmovdqu xmm2, OWORD PTR [rcx+80]
  14148. vaesenc xmm0, xmm0, xmm2
  14149. vmovdqu xmm2, OWORD PTR [rcx+96]
  14150. vaesenc xmm0, xmm0, xmm2
  14151. vmovdqu xmm2, OWORD PTR [rcx+112]
  14152. vaesenc xmm0, xmm0, xmm2
  14153. vmovdqu xmm2, OWORD PTR [rcx+128]
  14154. vaesenc xmm0, xmm0, xmm2
  14155. vmovdqu xmm2, OWORD PTR [rcx+144]
  14156. vaesenc xmm0, xmm0, xmm2
  14157. vmovdqu xmm3, xmm1
  14158. cmp edx, 11
  14159. vmovdqu xmm1, OWORD PTR [rcx+160]
  14160. jl L_AES_GCM_encrypt_block_avx2_aesenc_block_last
  14161. vaesenc xmm0, xmm0, xmm1
  14162. vmovdqu xmm2, OWORD PTR [rcx+176]
  14163. vaesenc xmm0, xmm0, xmm2
  14164. cmp edx, 13
  14165. vmovdqu xmm1, OWORD PTR [rcx+192]
  14166. jl L_AES_GCM_encrypt_block_avx2_aesenc_block_last
  14167. vaesenc xmm0, xmm0, xmm1
  14168. vmovdqu xmm2, OWORD PTR [rcx+208]
  14169. vaesenc xmm0, xmm0, xmm2
  14170. vmovdqu xmm1, OWORD PTR [rcx+224]
  14171. L_AES_GCM_encrypt_block_avx2_aesenc_block_last:
  14172. vaesenclast xmm0, xmm0, xmm1
  14173. vmovdqu xmm1, OWORD PTR [r11]
  14174. vpxor xmm0, xmm0, xmm1
  14175. vmovdqu OWORD PTR [r10], xmm0
  14176. vmovdqu OWORD PTR [rax], xmm3
  14177. vzeroupper
  14178. add rsp, 152
  14179. ret
  14180. AES_GCM_encrypt_block_avx2 ENDP
  14181. _text ENDS
  14182. _text SEGMENT READONLY PARA
  14183. AES_GCM_ghash_block_avx2 PROC
  14184. sub rsp, 16
  14185. vmovdqu OWORD PTR [rsp], xmm6
  14186. vmovdqu xmm4, OWORD PTR [rdx]
  14187. vmovdqu xmm5, OWORD PTR [r8]
  14188. vmovdqu xmm0, OWORD PTR [rcx]
  14189. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14190. vpxor xmm4, xmm4, xmm0
  14191. ; ghash_gfmul_avx
  14192. vpclmulqdq xmm2, xmm5, xmm4, 16
  14193. vpclmulqdq xmm1, xmm5, xmm4, 1
  14194. vpclmulqdq xmm0, xmm5, xmm4, 0
  14195. vpclmulqdq xmm3, xmm5, xmm4, 17
  14196. vpxor xmm2, xmm2, xmm1
  14197. vpslldq xmm1, xmm2, 8
  14198. vpsrldq xmm2, xmm2, 8
  14199. vpxor xmm6, xmm0, xmm1
  14200. vpxor xmm4, xmm3, xmm2
  14201. ; ghash_mid
  14202. vpsrld xmm0, xmm6, 31
  14203. vpsrld xmm1, xmm4, 31
  14204. vpslld xmm6, xmm6, 1
  14205. vpslld xmm4, xmm4, 1
  14206. vpsrldq xmm2, xmm0, 12
  14207. vpslldq xmm0, xmm0, 4
  14208. vpslldq xmm1, xmm1, 4
  14209. vpor xmm4, xmm4, xmm2
  14210. vpor xmm6, xmm6, xmm0
  14211. vpor xmm4, xmm4, xmm1
  14212. ; ghash_red
  14213. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  14214. vpclmulqdq xmm0, xmm6, xmm2, 16
  14215. vpshufd xmm1, xmm6, 78
  14216. vpxor xmm1, xmm1, xmm0
  14217. vpclmulqdq xmm0, xmm1, xmm2, 16
  14218. vpshufd xmm1, xmm1, 78
  14219. vpxor xmm1, xmm1, xmm0
  14220. vpxor xmm4, xmm4, xmm1
  14221. vmovdqu OWORD PTR [rdx], xmm4
  14222. vzeroupper
  14223. vmovdqu xmm6, OWORD PTR [rsp]
  14224. add rsp, 16
  14225. ret
  14226. AES_GCM_ghash_block_avx2 ENDP
  14227. _text ENDS
  14228. _text SEGMENT READONLY PARA
  14229. AES_GCM_encrypt_update_avx2 PROC
  14230. push r12
  14231. push r13
  14232. push r14
  14233. push r15
  14234. push rdi
  14235. mov rax, rcx
  14236. mov r10, r8
  14237. mov r8d, edx
  14238. mov r11, r9
  14239. mov r9d, DWORD PTR [rsp+80]
  14240. mov r12, QWORD PTR [rsp+88]
  14241. mov r13, QWORD PTR [rsp+96]
  14242. mov r14, QWORD PTR [rsp+104]
  14243. sub rsp, 312
  14244. vmovdqu OWORD PTR [rsp+152], xmm6
  14245. vmovdqu OWORD PTR [rsp+168], xmm7
  14246. vmovdqu OWORD PTR [rsp+184], xmm8
  14247. vmovdqu OWORD PTR [rsp+200], xmm9
  14248. vmovdqu OWORD PTR [rsp+216], xmm10
  14249. vmovdqu OWORD PTR [rsp+232], xmm11
  14250. vmovdqu OWORD PTR [rsp+248], xmm12
  14251. vmovdqu OWORD PTR [rsp+264], xmm13
  14252. vmovdqu OWORD PTR [rsp+280], xmm14
  14253. vmovdqu OWORD PTR [rsp+296], xmm15
  14254. vmovdqu xmm6, OWORD PTR [r12]
  14255. vmovdqu xmm5, OWORD PTR [r13]
  14256. vmovdqu xmm4, OWORD PTR [r14]
  14257. vpsrlq xmm1, xmm5, 63
  14258. vpsllq xmm0, xmm5, 1
  14259. vpslldq xmm1, xmm1, 8
  14260. vpor xmm0, xmm0, xmm1
  14261. vpshufd xmm5, xmm5, 255
  14262. vpsrad xmm5, xmm5, 31
  14263. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  14264. vpxor xmm5, xmm5, xmm0
  14265. xor edi, edi
  14266. cmp r9d, 128
  14267. mov r15d, r9d
  14268. jl L_AES_GCM_encrypt_update_avx2_done_128
  14269. and r15d, 4294967168
  14270. vmovdqu OWORD PTR [rsp+128], xmm4
  14271. vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
  14272. ; H ^ 1 and H ^ 2
  14273. vpclmulqdq xmm9, xmm5, xmm5, 0
  14274. vpclmulqdq xmm10, xmm5, xmm5, 17
  14275. vpclmulqdq xmm8, xmm9, xmm3, 16
  14276. vpshufd xmm9, xmm9, 78
  14277. vpxor xmm9, xmm9, xmm8
  14278. vpclmulqdq xmm8, xmm9, xmm3, 16
  14279. vpshufd xmm9, xmm9, 78
  14280. vpxor xmm9, xmm9, xmm8
  14281. vpxor xmm0, xmm10, xmm9
  14282. vmovdqu OWORD PTR [rsp], xmm5
  14283. vmovdqu OWORD PTR [rsp+16], xmm0
  14284. ; H ^ 3 and H ^ 4
  14285. vpclmulqdq xmm11, xmm0, xmm5, 16
  14286. vpclmulqdq xmm10, xmm0, xmm5, 1
  14287. vpclmulqdq xmm9, xmm0, xmm5, 0
  14288. vpclmulqdq xmm12, xmm0, xmm5, 17
  14289. vpclmulqdq xmm13, xmm0, xmm0, 0
  14290. vpclmulqdq xmm14, xmm0, xmm0, 17
  14291. vpxor xmm11, xmm11, xmm10
  14292. vpslldq xmm10, xmm11, 8
  14293. vpsrldq xmm11, xmm11, 8
  14294. vpxor xmm10, xmm10, xmm9
  14295. vpclmulqdq xmm8, xmm13, xmm3, 16
  14296. vpclmulqdq xmm9, xmm10, xmm3, 16
  14297. vpshufd xmm10, xmm10, 78
  14298. vpshufd xmm13, xmm13, 78
  14299. vpxor xmm10, xmm10, xmm9
  14300. vpxor xmm13, xmm13, xmm8
  14301. vpclmulqdq xmm9, xmm10, xmm3, 16
  14302. vpclmulqdq xmm8, xmm13, xmm3, 16
  14303. vpshufd xmm10, xmm10, 78
  14304. vpshufd xmm13, xmm13, 78
  14305. vpxor xmm12, xmm12, xmm11
  14306. vpxor xmm13, xmm13, xmm8
  14307. vpxor xmm10, xmm10, xmm12
  14308. vpxor xmm2, xmm13, xmm14
  14309. vpxor xmm1, xmm10, xmm9
  14310. vmovdqu OWORD PTR [rsp+32], xmm1
  14311. vmovdqu OWORD PTR [rsp+48], xmm2
  14312. ; H ^ 5 and H ^ 6
  14313. vpclmulqdq xmm11, xmm1, xmm0, 16
  14314. vpclmulqdq xmm10, xmm1, xmm0, 1
  14315. vpclmulqdq xmm9, xmm1, xmm0, 0
  14316. vpclmulqdq xmm12, xmm1, xmm0, 17
  14317. vpclmulqdq xmm13, xmm1, xmm1, 0
  14318. vpclmulqdq xmm14, xmm1, xmm1, 17
  14319. vpxor xmm11, xmm11, xmm10
  14320. vpslldq xmm10, xmm11, 8
  14321. vpsrldq xmm11, xmm11, 8
  14322. vpxor xmm10, xmm10, xmm9
  14323. vpclmulqdq xmm8, xmm13, xmm3, 16
  14324. vpclmulqdq xmm9, xmm10, xmm3, 16
  14325. vpshufd xmm10, xmm10, 78
  14326. vpshufd xmm13, xmm13, 78
  14327. vpxor xmm10, xmm10, xmm9
  14328. vpxor xmm13, xmm13, xmm8
  14329. vpclmulqdq xmm9, xmm10, xmm3, 16
  14330. vpclmulqdq xmm8, xmm13, xmm3, 16
  14331. vpshufd xmm10, xmm10, 78
  14332. vpshufd xmm13, xmm13, 78
  14333. vpxor xmm12, xmm12, xmm11
  14334. vpxor xmm13, xmm13, xmm8
  14335. vpxor xmm10, xmm10, xmm12
  14336. vpxor xmm0, xmm13, xmm14
  14337. vpxor xmm7, xmm10, xmm9
  14338. vmovdqu OWORD PTR [rsp+64], xmm7
  14339. vmovdqu OWORD PTR [rsp+80], xmm0
  14340. ; H ^ 7 and H ^ 8
  14341. vpclmulqdq xmm11, xmm2, xmm1, 16
  14342. vpclmulqdq xmm10, xmm2, xmm1, 1
  14343. vpclmulqdq xmm9, xmm2, xmm1, 0
  14344. vpclmulqdq xmm12, xmm2, xmm1, 17
  14345. vpclmulqdq xmm13, xmm2, xmm2, 0
  14346. vpclmulqdq xmm14, xmm2, xmm2, 17
  14347. vpxor xmm11, xmm11, xmm10
  14348. vpslldq xmm10, xmm11, 8
  14349. vpsrldq xmm11, xmm11, 8
  14350. vpxor xmm10, xmm10, xmm9
  14351. vpclmulqdq xmm8, xmm13, xmm3, 16
  14352. vpclmulqdq xmm9, xmm10, xmm3, 16
  14353. vpshufd xmm10, xmm10, 78
  14354. vpshufd xmm13, xmm13, 78
  14355. vpxor xmm10, xmm10, xmm9
  14356. vpxor xmm13, xmm13, xmm8
  14357. vpclmulqdq xmm9, xmm10, xmm3, 16
  14358. vpclmulqdq xmm8, xmm13, xmm3, 16
  14359. vpshufd xmm10, xmm10, 78
  14360. vpshufd xmm13, xmm13, 78
  14361. vpxor xmm12, xmm12, xmm11
  14362. vpxor xmm13, xmm13, xmm8
  14363. vpxor xmm10, xmm10, xmm12
  14364. vpxor xmm0, xmm13, xmm14
  14365. vpxor xmm7, xmm10, xmm9
  14366. vmovdqu OWORD PTR [rsp+96], xmm7
  14367. vmovdqu OWORD PTR [rsp+112], xmm0
  14368. ; First 128 bytes of input
  14369. ; aesenc_128
  14370. ; aesenc_ctr
  14371. vmovdqu xmm0, OWORD PTR [rsp+128]
  14372. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  14373. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  14374. vpshufb xmm8, xmm0, xmm1
  14375. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  14376. vpshufb xmm9, xmm9, xmm1
  14377. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  14378. vpshufb xmm10, xmm10, xmm1
  14379. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  14380. vpshufb xmm11, xmm11, xmm1
  14381. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  14382. vpshufb xmm12, xmm12, xmm1
  14383. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  14384. vpshufb xmm13, xmm13, xmm1
  14385. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  14386. vpshufb xmm14, xmm14, xmm1
  14387. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  14388. vpshufb xmm15, xmm15, xmm1
  14389. ; aesenc_xor
  14390. vmovdqu xmm7, OWORD PTR [rax]
  14391. vmovdqu OWORD PTR [rsp+128], xmm0
  14392. vpxor xmm8, xmm8, xmm7
  14393. vpxor xmm9, xmm9, xmm7
  14394. vpxor xmm10, xmm10, xmm7
  14395. vpxor xmm11, xmm11, xmm7
  14396. vpxor xmm12, xmm12, xmm7
  14397. vpxor xmm13, xmm13, xmm7
  14398. vpxor xmm14, xmm14, xmm7
  14399. vpxor xmm15, xmm15, xmm7
  14400. vmovdqu xmm7, OWORD PTR [rax+16]
  14401. vaesenc xmm8, xmm8, xmm7
  14402. vaesenc xmm9, xmm9, xmm7
  14403. vaesenc xmm10, xmm10, xmm7
  14404. vaesenc xmm11, xmm11, xmm7
  14405. vaesenc xmm12, xmm12, xmm7
  14406. vaesenc xmm13, xmm13, xmm7
  14407. vaesenc xmm14, xmm14, xmm7
  14408. vaesenc xmm15, xmm15, xmm7
  14409. vmovdqu xmm7, OWORD PTR [rax+32]
  14410. vaesenc xmm8, xmm8, xmm7
  14411. vaesenc xmm9, xmm9, xmm7
  14412. vaesenc xmm10, xmm10, xmm7
  14413. vaesenc xmm11, xmm11, xmm7
  14414. vaesenc xmm12, xmm12, xmm7
  14415. vaesenc xmm13, xmm13, xmm7
  14416. vaesenc xmm14, xmm14, xmm7
  14417. vaesenc xmm15, xmm15, xmm7
  14418. vmovdqu xmm7, OWORD PTR [rax+48]
  14419. vaesenc xmm8, xmm8, xmm7
  14420. vaesenc xmm9, xmm9, xmm7
  14421. vaesenc xmm10, xmm10, xmm7
  14422. vaesenc xmm11, xmm11, xmm7
  14423. vaesenc xmm12, xmm12, xmm7
  14424. vaesenc xmm13, xmm13, xmm7
  14425. vaesenc xmm14, xmm14, xmm7
  14426. vaesenc xmm15, xmm15, xmm7
  14427. vmovdqu xmm7, OWORD PTR [rax+64]
  14428. vaesenc xmm8, xmm8, xmm7
  14429. vaesenc xmm9, xmm9, xmm7
  14430. vaesenc xmm10, xmm10, xmm7
  14431. vaesenc xmm11, xmm11, xmm7
  14432. vaesenc xmm12, xmm12, xmm7
  14433. vaesenc xmm13, xmm13, xmm7
  14434. vaesenc xmm14, xmm14, xmm7
  14435. vaesenc xmm15, xmm15, xmm7
  14436. vmovdqu xmm7, OWORD PTR [rax+80]
  14437. vaesenc xmm8, xmm8, xmm7
  14438. vaesenc xmm9, xmm9, xmm7
  14439. vaesenc xmm10, xmm10, xmm7
  14440. vaesenc xmm11, xmm11, xmm7
  14441. vaesenc xmm12, xmm12, xmm7
  14442. vaesenc xmm13, xmm13, xmm7
  14443. vaesenc xmm14, xmm14, xmm7
  14444. vaesenc xmm15, xmm15, xmm7
  14445. vmovdqu xmm7, OWORD PTR [rax+96]
  14446. vaesenc xmm8, xmm8, xmm7
  14447. vaesenc xmm9, xmm9, xmm7
  14448. vaesenc xmm10, xmm10, xmm7
  14449. vaesenc xmm11, xmm11, xmm7
  14450. vaesenc xmm12, xmm12, xmm7
  14451. vaesenc xmm13, xmm13, xmm7
  14452. vaesenc xmm14, xmm14, xmm7
  14453. vaesenc xmm15, xmm15, xmm7
  14454. vmovdqu xmm7, OWORD PTR [rax+112]
  14455. vaesenc xmm8, xmm8, xmm7
  14456. vaesenc xmm9, xmm9, xmm7
  14457. vaesenc xmm10, xmm10, xmm7
  14458. vaesenc xmm11, xmm11, xmm7
  14459. vaesenc xmm12, xmm12, xmm7
  14460. vaesenc xmm13, xmm13, xmm7
  14461. vaesenc xmm14, xmm14, xmm7
  14462. vaesenc xmm15, xmm15, xmm7
  14463. vmovdqu xmm7, OWORD PTR [rax+128]
  14464. vaesenc xmm8, xmm8, xmm7
  14465. vaesenc xmm9, xmm9, xmm7
  14466. vaesenc xmm10, xmm10, xmm7
  14467. vaesenc xmm11, xmm11, xmm7
  14468. vaesenc xmm12, xmm12, xmm7
  14469. vaesenc xmm13, xmm13, xmm7
  14470. vaesenc xmm14, xmm14, xmm7
  14471. vaesenc xmm15, xmm15, xmm7
  14472. vmovdqu xmm7, OWORD PTR [rax+144]
  14473. vaesenc xmm8, xmm8, xmm7
  14474. vaesenc xmm9, xmm9, xmm7
  14475. vaesenc xmm10, xmm10, xmm7
  14476. vaesenc xmm11, xmm11, xmm7
  14477. vaesenc xmm12, xmm12, xmm7
  14478. vaesenc xmm13, xmm13, xmm7
  14479. vaesenc xmm14, xmm14, xmm7
  14480. vaesenc xmm15, xmm15, xmm7
  14481. cmp r8d, 11
  14482. vmovdqu xmm7, OWORD PTR [rax+160]
  14483. jl L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done
  14484. vaesenc xmm8, xmm8, xmm7
  14485. vaesenc xmm9, xmm9, xmm7
  14486. vaesenc xmm10, xmm10, xmm7
  14487. vaesenc xmm11, xmm11, xmm7
  14488. vaesenc xmm12, xmm12, xmm7
  14489. vaesenc xmm13, xmm13, xmm7
  14490. vaesenc xmm14, xmm14, xmm7
  14491. vaesenc xmm15, xmm15, xmm7
  14492. vmovdqu xmm7, OWORD PTR [rax+176]
  14493. vaesenc xmm8, xmm8, xmm7
  14494. vaesenc xmm9, xmm9, xmm7
  14495. vaesenc xmm10, xmm10, xmm7
  14496. vaesenc xmm11, xmm11, xmm7
  14497. vaesenc xmm12, xmm12, xmm7
  14498. vaesenc xmm13, xmm13, xmm7
  14499. vaesenc xmm14, xmm14, xmm7
  14500. vaesenc xmm15, xmm15, xmm7
  14501. cmp r8d, 13
  14502. vmovdqu xmm7, OWORD PTR [rax+192]
  14503. jl L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done
  14504. vaesenc xmm8, xmm8, xmm7
  14505. vaesenc xmm9, xmm9, xmm7
  14506. vaesenc xmm10, xmm10, xmm7
  14507. vaesenc xmm11, xmm11, xmm7
  14508. vaesenc xmm12, xmm12, xmm7
  14509. vaesenc xmm13, xmm13, xmm7
  14510. vaesenc xmm14, xmm14, xmm7
  14511. vaesenc xmm15, xmm15, xmm7
  14512. vmovdqu xmm7, OWORD PTR [rax+208]
  14513. vaesenc xmm8, xmm8, xmm7
  14514. vaesenc xmm9, xmm9, xmm7
  14515. vaesenc xmm10, xmm10, xmm7
  14516. vaesenc xmm11, xmm11, xmm7
  14517. vaesenc xmm12, xmm12, xmm7
  14518. vaesenc xmm13, xmm13, xmm7
  14519. vaesenc xmm14, xmm14, xmm7
  14520. vaesenc xmm15, xmm15, xmm7
  14521. vmovdqu xmm7, OWORD PTR [rax+224]
  14522. L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done:
  14523. ; aesenc_last
  14524. vaesenclast xmm8, xmm8, xmm7
  14525. vaesenclast xmm9, xmm9, xmm7
  14526. vaesenclast xmm10, xmm10, xmm7
  14527. vaesenclast xmm11, xmm11, xmm7
  14528. vmovdqu xmm0, OWORD PTR [r11]
  14529. vmovdqu xmm1, OWORD PTR [r11+16]
  14530. vmovdqu xmm2, OWORD PTR [r11+32]
  14531. vmovdqu xmm3, OWORD PTR [r11+48]
  14532. vpxor xmm8, xmm8, xmm0
  14533. vpxor xmm9, xmm9, xmm1
  14534. vpxor xmm10, xmm10, xmm2
  14535. vpxor xmm11, xmm11, xmm3
  14536. vmovdqu OWORD PTR [r10], xmm8
  14537. vmovdqu OWORD PTR [r10+16], xmm9
  14538. vmovdqu OWORD PTR [r10+32], xmm10
  14539. vmovdqu OWORD PTR [r10+48], xmm11
  14540. vaesenclast xmm12, xmm12, xmm7
  14541. vaesenclast xmm13, xmm13, xmm7
  14542. vaesenclast xmm14, xmm14, xmm7
  14543. vaesenclast xmm15, xmm15, xmm7
  14544. vmovdqu xmm0, OWORD PTR [r11+64]
  14545. vmovdqu xmm1, OWORD PTR [r11+80]
  14546. vmovdqu xmm2, OWORD PTR [r11+96]
  14547. vmovdqu xmm3, OWORD PTR [r11+112]
  14548. vpxor xmm12, xmm12, xmm0
  14549. vpxor xmm13, xmm13, xmm1
  14550. vpxor xmm14, xmm14, xmm2
  14551. vpxor xmm15, xmm15, xmm3
  14552. vmovdqu OWORD PTR [r10+64], xmm12
  14553. vmovdqu OWORD PTR [r10+80], xmm13
  14554. vmovdqu OWORD PTR [r10+96], xmm14
  14555. vmovdqu OWORD PTR [r10+112], xmm15
  14556. cmp r15d, 128
  14557. mov edi, 128
  14558. jle L_AES_GCM_encrypt_update_avx2_end_128
  14559. ; More 128 bytes of input
  14560. L_AES_GCM_encrypt_update_avx2_ghash_128:
  14561. ; aesenc_128_ghash
  14562. lea rcx, QWORD PTR [r11+rdi]
  14563. lea rdx, QWORD PTR [r10+rdi]
  14564. ; aesenc_ctr
  14565. vmovdqu xmm0, OWORD PTR [rsp+128]
  14566. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  14567. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  14568. vpshufb xmm8, xmm0, xmm1
  14569. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  14570. vpshufb xmm9, xmm9, xmm1
  14571. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  14572. vpshufb xmm10, xmm10, xmm1
  14573. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  14574. vpshufb xmm11, xmm11, xmm1
  14575. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  14576. vpshufb xmm12, xmm12, xmm1
  14577. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  14578. vpshufb xmm13, xmm13, xmm1
  14579. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  14580. vpshufb xmm14, xmm14, xmm1
  14581. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  14582. vpshufb xmm15, xmm15, xmm1
  14583. ; aesenc_xor
  14584. vmovdqu xmm7, OWORD PTR [rax]
  14585. vmovdqu OWORD PTR [rsp+128], xmm0
  14586. vpxor xmm8, xmm8, xmm7
  14587. vpxor xmm9, xmm9, xmm7
  14588. vpxor xmm10, xmm10, xmm7
  14589. vpxor xmm11, xmm11, xmm7
  14590. vpxor xmm12, xmm12, xmm7
  14591. vpxor xmm13, xmm13, xmm7
  14592. vpxor xmm14, xmm14, xmm7
  14593. vpxor xmm15, xmm15, xmm7
  14594. ; aesenc_pclmul_1
  14595. vmovdqu xmm1, OWORD PTR [rdx+-128]
  14596. vmovdqu xmm0, OWORD PTR [rax+16]
  14597. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14598. vmovdqu xmm2, OWORD PTR [rsp+112]
  14599. vpxor xmm1, xmm1, xmm6
  14600. vpclmulqdq xmm5, xmm1, xmm2, 16
  14601. vpclmulqdq xmm3, xmm1, xmm2, 1
  14602. vpclmulqdq xmm6, xmm1, xmm2, 0
  14603. vpclmulqdq xmm7, xmm1, xmm2, 17
  14604. vaesenc xmm8, xmm8, xmm0
  14605. vaesenc xmm9, xmm9, xmm0
  14606. vaesenc xmm10, xmm10, xmm0
  14607. vaesenc xmm11, xmm11, xmm0
  14608. vaesenc xmm12, xmm12, xmm0
  14609. vaesenc xmm13, xmm13, xmm0
  14610. vaesenc xmm14, xmm14, xmm0
  14611. vaesenc xmm15, xmm15, xmm0
  14612. ; aesenc_pclmul_2
  14613. vmovdqu xmm1, OWORD PTR [rdx+-112]
  14614. vmovdqu xmm0, OWORD PTR [rsp+96]
  14615. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14616. vpxor xmm5, xmm5, xmm3
  14617. vpclmulqdq xmm2, xmm1, xmm0, 16
  14618. vpclmulqdq xmm3, xmm1, xmm0, 1
  14619. vpclmulqdq xmm4, xmm1, xmm0, 0
  14620. vpclmulqdq xmm1, xmm1, xmm0, 17
  14621. vmovdqu xmm0, OWORD PTR [rax+32]
  14622. vpxor xmm7, xmm7, xmm1
  14623. vaesenc xmm8, xmm8, xmm0
  14624. vaesenc xmm9, xmm9, xmm0
  14625. vaesenc xmm10, xmm10, xmm0
  14626. vaesenc xmm11, xmm11, xmm0
  14627. vaesenc xmm12, xmm12, xmm0
  14628. vaesenc xmm13, xmm13, xmm0
  14629. vaesenc xmm14, xmm14, xmm0
  14630. vaesenc xmm15, xmm15, xmm0
  14631. ; aesenc_pclmul_n
  14632. vmovdqu xmm1, OWORD PTR [rdx+-96]
  14633. vmovdqu xmm0, OWORD PTR [rsp+80]
  14634. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14635. vpxor xmm5, xmm5, xmm2
  14636. vpclmulqdq xmm2, xmm1, xmm0, 16
  14637. vpxor xmm5, xmm5, xmm3
  14638. vpclmulqdq xmm3, xmm1, xmm0, 1
  14639. vpxor xmm6, xmm6, xmm4
  14640. vpclmulqdq xmm4, xmm1, xmm0, 0
  14641. vpclmulqdq xmm1, xmm1, xmm0, 17
  14642. vmovdqu xmm0, OWORD PTR [rax+48]
  14643. vpxor xmm7, xmm7, xmm1
  14644. vaesenc xmm8, xmm8, xmm0
  14645. vaesenc xmm9, xmm9, xmm0
  14646. vaesenc xmm10, xmm10, xmm0
  14647. vaesenc xmm11, xmm11, xmm0
  14648. vaesenc xmm12, xmm12, xmm0
  14649. vaesenc xmm13, xmm13, xmm0
  14650. vaesenc xmm14, xmm14, xmm0
  14651. vaesenc xmm15, xmm15, xmm0
  14652. ; aesenc_pclmul_n
  14653. vmovdqu xmm1, OWORD PTR [rdx+-80]
  14654. vmovdqu xmm0, OWORD PTR [rsp+64]
  14655. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14656. vpxor xmm5, xmm5, xmm2
  14657. vpclmulqdq xmm2, xmm1, xmm0, 16
  14658. vpxor xmm5, xmm5, xmm3
  14659. vpclmulqdq xmm3, xmm1, xmm0, 1
  14660. vpxor xmm6, xmm6, xmm4
  14661. vpclmulqdq xmm4, xmm1, xmm0, 0
  14662. vpclmulqdq xmm1, xmm1, xmm0, 17
  14663. vmovdqu xmm0, OWORD PTR [rax+64]
  14664. vpxor xmm7, xmm7, xmm1
  14665. vaesenc xmm8, xmm8, xmm0
  14666. vaesenc xmm9, xmm9, xmm0
  14667. vaesenc xmm10, xmm10, xmm0
  14668. vaesenc xmm11, xmm11, xmm0
  14669. vaesenc xmm12, xmm12, xmm0
  14670. vaesenc xmm13, xmm13, xmm0
  14671. vaesenc xmm14, xmm14, xmm0
  14672. vaesenc xmm15, xmm15, xmm0
  14673. ; aesenc_pclmul_n
  14674. vmovdqu xmm1, OWORD PTR [rdx+-64]
  14675. vmovdqu xmm0, OWORD PTR [rsp+48]
  14676. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14677. vpxor xmm5, xmm5, xmm2
  14678. vpclmulqdq xmm2, xmm1, xmm0, 16
  14679. vpxor xmm5, xmm5, xmm3
  14680. vpclmulqdq xmm3, xmm1, xmm0, 1
  14681. vpxor xmm6, xmm6, xmm4
  14682. vpclmulqdq xmm4, xmm1, xmm0, 0
  14683. vpclmulqdq xmm1, xmm1, xmm0, 17
  14684. vmovdqu xmm0, OWORD PTR [rax+80]
  14685. vpxor xmm7, xmm7, xmm1
  14686. vaesenc xmm8, xmm8, xmm0
  14687. vaesenc xmm9, xmm9, xmm0
  14688. vaesenc xmm10, xmm10, xmm0
  14689. vaesenc xmm11, xmm11, xmm0
  14690. vaesenc xmm12, xmm12, xmm0
  14691. vaesenc xmm13, xmm13, xmm0
  14692. vaesenc xmm14, xmm14, xmm0
  14693. vaesenc xmm15, xmm15, xmm0
  14694. ; aesenc_pclmul_n
  14695. vmovdqu xmm1, OWORD PTR [rdx+-48]
  14696. vmovdqu xmm0, OWORD PTR [rsp+32]
  14697. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14698. vpxor xmm5, xmm5, xmm2
  14699. vpclmulqdq xmm2, xmm1, xmm0, 16
  14700. vpxor xmm5, xmm5, xmm3
  14701. vpclmulqdq xmm3, xmm1, xmm0, 1
  14702. vpxor xmm6, xmm6, xmm4
  14703. vpclmulqdq xmm4, xmm1, xmm0, 0
  14704. vpclmulqdq xmm1, xmm1, xmm0, 17
  14705. vmovdqu xmm0, OWORD PTR [rax+96]
  14706. vpxor xmm7, xmm7, xmm1
  14707. vaesenc xmm8, xmm8, xmm0
  14708. vaesenc xmm9, xmm9, xmm0
  14709. vaesenc xmm10, xmm10, xmm0
  14710. vaesenc xmm11, xmm11, xmm0
  14711. vaesenc xmm12, xmm12, xmm0
  14712. vaesenc xmm13, xmm13, xmm0
  14713. vaesenc xmm14, xmm14, xmm0
  14714. vaesenc xmm15, xmm15, xmm0
  14715. ; aesenc_pclmul_n
  14716. vmovdqu xmm1, OWORD PTR [rdx+-32]
  14717. vmovdqu xmm0, OWORD PTR [rsp+16]
  14718. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14719. vpxor xmm5, xmm5, xmm2
  14720. vpclmulqdq xmm2, xmm1, xmm0, 16
  14721. vpxor xmm5, xmm5, xmm3
  14722. vpclmulqdq xmm3, xmm1, xmm0, 1
  14723. vpxor xmm6, xmm6, xmm4
  14724. vpclmulqdq xmm4, xmm1, xmm0, 0
  14725. vpclmulqdq xmm1, xmm1, xmm0, 17
  14726. vmovdqu xmm0, OWORD PTR [rax+112]
  14727. vpxor xmm7, xmm7, xmm1
  14728. vaesenc xmm8, xmm8, xmm0
  14729. vaesenc xmm9, xmm9, xmm0
  14730. vaesenc xmm10, xmm10, xmm0
  14731. vaesenc xmm11, xmm11, xmm0
  14732. vaesenc xmm12, xmm12, xmm0
  14733. vaesenc xmm13, xmm13, xmm0
  14734. vaesenc xmm14, xmm14, xmm0
  14735. vaesenc xmm15, xmm15, xmm0
  14736. ; aesenc_pclmul_n
  14737. vmovdqu xmm1, OWORD PTR [rdx+-16]
  14738. vmovdqu xmm0, OWORD PTR [rsp]
  14739. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14740. vpxor xmm5, xmm5, xmm2
  14741. vpclmulqdq xmm2, xmm1, xmm0, 16
  14742. vpxor xmm5, xmm5, xmm3
  14743. vpclmulqdq xmm3, xmm1, xmm0, 1
  14744. vpxor xmm6, xmm6, xmm4
  14745. vpclmulqdq xmm4, xmm1, xmm0, 0
  14746. vpclmulqdq xmm1, xmm1, xmm0, 17
  14747. vmovdqu xmm0, OWORD PTR [rax+128]
  14748. vpxor xmm7, xmm7, xmm1
  14749. vaesenc xmm8, xmm8, xmm0
  14750. vaesenc xmm9, xmm9, xmm0
  14751. vaesenc xmm10, xmm10, xmm0
  14752. vaesenc xmm11, xmm11, xmm0
  14753. vaesenc xmm12, xmm12, xmm0
  14754. vaesenc xmm13, xmm13, xmm0
  14755. vaesenc xmm14, xmm14, xmm0
  14756. vaesenc xmm15, xmm15, xmm0
  14757. ; aesenc_pclmul_l
  14758. vpxor xmm5, xmm5, xmm2
  14759. vpxor xmm6, xmm6, xmm4
  14760. vpxor xmm5, xmm5, xmm3
  14761. vpslldq xmm1, xmm5, 8
  14762. vpsrldq xmm5, xmm5, 8
  14763. vmovdqu xmm4, OWORD PTR [rax+144]
  14764. vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
  14765. vaesenc xmm8, xmm8, xmm4
  14766. vpxor xmm6, xmm6, xmm1
  14767. vpxor xmm7, xmm7, xmm5
  14768. vpclmulqdq xmm3, xmm6, xmm0, 16
  14769. vaesenc xmm9, xmm9, xmm4
  14770. vaesenc xmm10, xmm10, xmm4
  14771. vaesenc xmm11, xmm11, xmm4
  14772. vpshufd xmm6, xmm6, 78
  14773. vpxor xmm6, xmm6, xmm3
  14774. vpclmulqdq xmm3, xmm6, xmm0, 16
  14775. vaesenc xmm12, xmm12, xmm4
  14776. vaesenc xmm13, xmm13, xmm4
  14777. vaesenc xmm14, xmm14, xmm4
  14778. vpshufd xmm6, xmm6, 78
  14779. vpxor xmm6, xmm6, xmm3
  14780. vpxor xmm6, xmm6, xmm7
  14781. vaesenc xmm15, xmm15, xmm4
  14782. cmp r8d, 11
  14783. vmovdqu xmm7, OWORD PTR [rax+160]
  14784. jl L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done
  14785. vaesenc xmm8, xmm8, xmm7
  14786. vaesenc xmm9, xmm9, xmm7
  14787. vaesenc xmm10, xmm10, xmm7
  14788. vaesenc xmm11, xmm11, xmm7
  14789. vaesenc xmm12, xmm12, xmm7
  14790. vaesenc xmm13, xmm13, xmm7
  14791. vaesenc xmm14, xmm14, xmm7
  14792. vaesenc xmm15, xmm15, xmm7
  14793. vmovdqu xmm7, OWORD PTR [rax+176]
  14794. vaesenc xmm8, xmm8, xmm7
  14795. vaesenc xmm9, xmm9, xmm7
  14796. vaesenc xmm10, xmm10, xmm7
  14797. vaesenc xmm11, xmm11, xmm7
  14798. vaesenc xmm12, xmm12, xmm7
  14799. vaesenc xmm13, xmm13, xmm7
  14800. vaesenc xmm14, xmm14, xmm7
  14801. vaesenc xmm15, xmm15, xmm7
  14802. cmp r8d, 13
  14803. vmovdqu xmm7, OWORD PTR [rax+192]
  14804. jl L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done
  14805. vaesenc xmm8, xmm8, xmm7
  14806. vaesenc xmm9, xmm9, xmm7
  14807. vaesenc xmm10, xmm10, xmm7
  14808. vaesenc xmm11, xmm11, xmm7
  14809. vaesenc xmm12, xmm12, xmm7
  14810. vaesenc xmm13, xmm13, xmm7
  14811. vaesenc xmm14, xmm14, xmm7
  14812. vaesenc xmm15, xmm15, xmm7
  14813. vmovdqu xmm7, OWORD PTR [rax+208]
  14814. vaesenc xmm8, xmm8, xmm7
  14815. vaesenc xmm9, xmm9, xmm7
  14816. vaesenc xmm10, xmm10, xmm7
  14817. vaesenc xmm11, xmm11, xmm7
  14818. vaesenc xmm12, xmm12, xmm7
  14819. vaesenc xmm13, xmm13, xmm7
  14820. vaesenc xmm14, xmm14, xmm7
  14821. vaesenc xmm15, xmm15, xmm7
  14822. vmovdqu xmm7, OWORD PTR [rax+224]
  14823. L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done:
  14824. ; aesenc_last
  14825. vaesenclast xmm8, xmm8, xmm7
  14826. vaesenclast xmm9, xmm9, xmm7
  14827. vaesenclast xmm10, xmm10, xmm7
  14828. vaesenclast xmm11, xmm11, xmm7
  14829. vmovdqu xmm0, OWORD PTR [rcx]
  14830. vmovdqu xmm1, OWORD PTR [rcx+16]
  14831. vmovdqu xmm2, OWORD PTR [rcx+32]
  14832. vmovdqu xmm3, OWORD PTR [rcx+48]
  14833. vpxor xmm8, xmm8, xmm0
  14834. vpxor xmm9, xmm9, xmm1
  14835. vpxor xmm10, xmm10, xmm2
  14836. vpxor xmm11, xmm11, xmm3
  14837. vmovdqu OWORD PTR [rdx], xmm8
  14838. vmovdqu OWORD PTR [rdx+16], xmm9
  14839. vmovdqu OWORD PTR [rdx+32], xmm10
  14840. vmovdqu OWORD PTR [rdx+48], xmm11
  14841. vaesenclast xmm12, xmm12, xmm7
  14842. vaesenclast xmm13, xmm13, xmm7
  14843. vaesenclast xmm14, xmm14, xmm7
  14844. vaesenclast xmm15, xmm15, xmm7
  14845. vmovdqu xmm0, OWORD PTR [rcx+64]
  14846. vmovdqu xmm1, OWORD PTR [rcx+80]
  14847. vmovdqu xmm2, OWORD PTR [rcx+96]
  14848. vmovdqu xmm3, OWORD PTR [rcx+112]
  14849. vpxor xmm12, xmm12, xmm0
  14850. vpxor xmm13, xmm13, xmm1
  14851. vpxor xmm14, xmm14, xmm2
  14852. vpxor xmm15, xmm15, xmm3
  14853. vmovdqu OWORD PTR [rdx+64], xmm12
  14854. vmovdqu OWORD PTR [rdx+80], xmm13
  14855. vmovdqu OWORD PTR [rdx+96], xmm14
  14856. vmovdqu OWORD PTR [rdx+112], xmm15
  14857. ; aesenc_128_ghash - end
  14858. add edi, 128
  14859. cmp edi, r15d
  14860. jl L_AES_GCM_encrypt_update_avx2_ghash_128
  14861. L_AES_GCM_encrypt_update_avx2_end_128:
  14862. vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14863. vpshufb xmm8, xmm8, xmm4
  14864. vpshufb xmm9, xmm9, xmm4
  14865. vpshufb xmm10, xmm10, xmm4
  14866. vpshufb xmm11, xmm11, xmm4
  14867. vpshufb xmm12, xmm12, xmm4
  14868. vpshufb xmm13, xmm13, xmm4
  14869. vpshufb xmm14, xmm14, xmm4
  14870. vpshufb xmm15, xmm15, xmm4
  14871. vpxor xmm8, xmm8, xmm6
  14872. vmovdqu xmm7, OWORD PTR [rsp]
  14873. vpclmulqdq xmm5, xmm7, xmm15, 16
  14874. vpclmulqdq xmm1, xmm7, xmm15, 1
  14875. vpclmulqdq xmm4, xmm7, xmm15, 0
  14876. vpclmulqdq xmm6, xmm7, xmm15, 17
  14877. vpxor xmm5, xmm5, xmm1
  14878. vmovdqu xmm7, OWORD PTR [rsp+16]
  14879. vpclmulqdq xmm2, xmm7, xmm14, 16
  14880. vpclmulqdq xmm1, xmm7, xmm14, 1
  14881. vpclmulqdq xmm0, xmm7, xmm14, 0
  14882. vpclmulqdq xmm3, xmm7, xmm14, 17
  14883. vpxor xmm2, xmm2, xmm1
  14884. vpxor xmm6, xmm6, xmm3
  14885. vpxor xmm5, xmm5, xmm2
  14886. vpxor xmm4, xmm4, xmm0
  14887. vmovdqu xmm15, OWORD PTR [rsp+32]
  14888. vmovdqu xmm7, OWORD PTR [rsp+48]
  14889. vpclmulqdq xmm2, xmm15, xmm13, 16
  14890. vpclmulqdq xmm1, xmm15, xmm13, 1
  14891. vpclmulqdq xmm0, xmm15, xmm13, 0
  14892. vpclmulqdq xmm3, xmm15, xmm13, 17
  14893. vpxor xmm2, xmm2, xmm1
  14894. vpxor xmm6, xmm6, xmm3
  14895. vpxor xmm5, xmm5, xmm2
  14896. vpxor xmm4, xmm4, xmm0
  14897. vpclmulqdq xmm2, xmm7, xmm12, 16
  14898. vpclmulqdq xmm1, xmm7, xmm12, 1
  14899. vpclmulqdq xmm0, xmm7, xmm12, 0
  14900. vpclmulqdq xmm3, xmm7, xmm12, 17
  14901. vpxor xmm2, xmm2, xmm1
  14902. vpxor xmm6, xmm6, xmm3
  14903. vpxor xmm5, xmm5, xmm2
  14904. vpxor xmm4, xmm4, xmm0
  14905. vmovdqu xmm15, OWORD PTR [rsp+64]
  14906. vmovdqu xmm7, OWORD PTR [rsp+80]
  14907. vpclmulqdq xmm2, xmm15, xmm11, 16
  14908. vpclmulqdq xmm1, xmm15, xmm11, 1
  14909. vpclmulqdq xmm0, xmm15, xmm11, 0
  14910. vpclmulqdq xmm3, xmm15, xmm11, 17
  14911. vpxor xmm2, xmm2, xmm1
  14912. vpxor xmm6, xmm6, xmm3
  14913. vpxor xmm5, xmm5, xmm2
  14914. vpxor xmm4, xmm4, xmm0
  14915. vpclmulqdq xmm2, xmm7, xmm10, 16
  14916. vpclmulqdq xmm1, xmm7, xmm10, 1
  14917. vpclmulqdq xmm0, xmm7, xmm10, 0
  14918. vpclmulqdq xmm3, xmm7, xmm10, 17
  14919. vpxor xmm2, xmm2, xmm1
  14920. vpxor xmm6, xmm6, xmm3
  14921. vpxor xmm5, xmm5, xmm2
  14922. vpxor xmm4, xmm4, xmm0
  14923. vmovdqu xmm15, OWORD PTR [rsp+96]
  14924. vmovdqu xmm7, OWORD PTR [rsp+112]
  14925. vpclmulqdq xmm2, xmm15, xmm9, 16
  14926. vpclmulqdq xmm1, xmm15, xmm9, 1
  14927. vpclmulqdq xmm0, xmm15, xmm9, 0
  14928. vpclmulqdq xmm3, xmm15, xmm9, 17
  14929. vpxor xmm2, xmm2, xmm1
  14930. vpxor xmm6, xmm6, xmm3
  14931. vpxor xmm5, xmm5, xmm2
  14932. vpxor xmm4, xmm4, xmm0
  14933. vpclmulqdq xmm2, xmm7, xmm8, 16
  14934. vpclmulqdq xmm1, xmm7, xmm8, 1
  14935. vpclmulqdq xmm0, xmm7, xmm8, 0
  14936. vpclmulqdq xmm3, xmm7, xmm8, 17
  14937. vpxor xmm2, xmm2, xmm1
  14938. vpxor xmm6, xmm6, xmm3
  14939. vpxor xmm5, xmm5, xmm2
  14940. vpxor xmm4, xmm4, xmm0
  14941. vpslldq xmm7, xmm5, 8
  14942. vpsrldq xmm5, xmm5, 8
  14943. vpxor xmm4, xmm4, xmm7
  14944. vpxor xmm6, xmm6, xmm5
  14945. ; ghash_red
  14946. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  14947. vpclmulqdq xmm0, xmm4, xmm2, 16
  14948. vpshufd xmm1, xmm4, 78
  14949. vpxor xmm1, xmm1, xmm0
  14950. vpclmulqdq xmm0, xmm1, xmm2, 16
  14951. vpshufd xmm1, xmm1, 78
  14952. vpxor xmm1, xmm1, xmm0
  14953. vpxor xmm6, xmm6, xmm1
  14954. vmovdqu xmm5, OWORD PTR [rsp]
  14955. vmovdqu xmm4, OWORD PTR [rsp+128]
  14956. L_AES_GCM_encrypt_update_avx2_done_128:
  14957. cmp edi, r9d
  14958. je L_AES_GCM_encrypt_update_avx2_done_enc
  14959. mov r15d, r9d
  14960. and r15d, 4294967280
  14961. cmp edi, r15d
  14962. jge L_AES_GCM_encrypt_update_avx2_last_block_done
  14963. ; aesenc_block
  14964. vmovdqu xmm1, xmm4
  14965. vpshufb xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  14966. vpaddd xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_one
  14967. vpxor xmm0, xmm0, [rax]
  14968. vmovdqu xmm2, OWORD PTR [rax+16]
  14969. vaesenc xmm0, xmm0, xmm2
  14970. vmovdqu xmm2, OWORD PTR [rax+32]
  14971. vaesenc xmm0, xmm0, xmm2
  14972. vmovdqu xmm2, OWORD PTR [rax+48]
  14973. vaesenc xmm0, xmm0, xmm2
  14974. vmovdqu xmm2, OWORD PTR [rax+64]
  14975. vaesenc xmm0, xmm0, xmm2
  14976. vmovdqu xmm2, OWORD PTR [rax+80]
  14977. vaesenc xmm0, xmm0, xmm2
  14978. vmovdqu xmm2, OWORD PTR [rax+96]
  14979. vaesenc xmm0, xmm0, xmm2
  14980. vmovdqu xmm2, OWORD PTR [rax+112]
  14981. vaesenc xmm0, xmm0, xmm2
  14982. vmovdqu xmm2, OWORD PTR [rax+128]
  14983. vaesenc xmm0, xmm0, xmm2
  14984. vmovdqu xmm2, OWORD PTR [rax+144]
  14985. vaesenc xmm0, xmm0, xmm2
  14986. vmovdqu xmm4, xmm1
  14987. cmp r8d, 11
  14988. vmovdqu xmm1, OWORD PTR [rax+160]
  14989. jl L_AES_GCM_encrypt_update_avx2_aesenc_block_last
  14990. vaesenc xmm0, xmm0, xmm1
  14991. vmovdqu xmm2, OWORD PTR [rax+176]
  14992. vaesenc xmm0, xmm0, xmm2
  14993. cmp r8d, 13
  14994. vmovdqu xmm1, OWORD PTR [rax+192]
  14995. jl L_AES_GCM_encrypt_update_avx2_aesenc_block_last
  14996. vaesenc xmm0, xmm0, xmm1
  14997. vmovdqu xmm2, OWORD PTR [rax+208]
  14998. vaesenc xmm0, xmm0, xmm2
  14999. vmovdqu xmm1, OWORD PTR [rax+224]
  15000. L_AES_GCM_encrypt_update_avx2_aesenc_block_last:
  15001. vaesenclast xmm0, xmm0, xmm1
  15002. vmovdqu xmm1, OWORD PTR [r11+rdi]
  15003. vpxor xmm0, xmm0, xmm1
  15004. vmovdqu OWORD PTR [r10+rdi], xmm0
  15005. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15006. vpxor xmm6, xmm6, xmm0
  15007. add edi, 16
  15008. cmp edi, r15d
  15009. jge L_AES_GCM_encrypt_update_avx2_last_block_ghash
  15010. L_AES_GCM_encrypt_update_avx2_last_block_start:
  15011. vmovdqu xmm12, OWORD PTR [r11+rdi]
  15012. vpshufb xmm11, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  15013. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  15014. ; aesenc_gfmul_sb
  15015. vpclmulqdq xmm2, xmm6, xmm5, 1
  15016. vpclmulqdq xmm3, xmm6, xmm5, 16
  15017. vpclmulqdq xmm1, xmm6, xmm5, 0
  15018. vpclmulqdq xmm8, xmm6, xmm5, 17
  15019. vpxor xmm11, xmm11, [rax]
  15020. vaesenc xmm11, xmm11, [rax+16]
  15021. vpxor xmm3, xmm3, xmm2
  15022. vpslldq xmm2, xmm3, 8
  15023. vpsrldq xmm3, xmm3, 8
  15024. vaesenc xmm11, xmm11, [rax+32]
  15025. vpxor xmm2, xmm2, xmm1
  15026. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15027. vaesenc xmm11, xmm11, [rax+48]
  15028. vaesenc xmm11, xmm11, [rax+64]
  15029. vaesenc xmm11, xmm11, [rax+80]
  15030. vpshufd xmm2, xmm2, 78
  15031. vpxor xmm2, xmm2, xmm1
  15032. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15033. vaesenc xmm11, xmm11, [rax+96]
  15034. vaesenc xmm11, xmm11, [rax+112]
  15035. vaesenc xmm11, xmm11, [rax+128]
  15036. vpshufd xmm2, xmm2, 78
  15037. vaesenc xmm11, xmm11, [rax+144]
  15038. vpxor xmm8, xmm8, xmm3
  15039. vpxor xmm2, xmm2, xmm8
  15040. vmovdqu xmm0, OWORD PTR [rax+160]
  15041. cmp r8d, 11
  15042. jl L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last
  15043. vaesenc xmm11, xmm11, xmm0
  15044. vaesenc xmm11, xmm11, [rax+176]
  15045. vmovdqu xmm0, OWORD PTR [rax+192]
  15046. cmp r8d, 13
  15047. jl L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last
  15048. vaesenc xmm11, xmm11, xmm0
  15049. vaesenc xmm11, xmm11, [rax+208]
  15050. vmovdqu xmm0, OWORD PTR [rax+224]
  15051. L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last:
  15052. vaesenclast xmm11, xmm11, xmm0
  15053. vpxor xmm6, xmm2, xmm1
  15054. vpxor xmm11, xmm11, xmm12
  15055. vmovdqu OWORD PTR [r10+rdi], xmm11
  15056. vpshufb xmm11, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15057. vpxor xmm6, xmm6, xmm11
  15058. add edi, 16
  15059. cmp edi, r15d
  15060. jl L_AES_GCM_encrypt_update_avx2_last_block_start
  15061. L_AES_GCM_encrypt_update_avx2_last_block_ghash:
  15062. ; ghash_gfmul_red
  15063. vpclmulqdq xmm10, xmm6, xmm5, 16
  15064. vpclmulqdq xmm9, xmm6, xmm5, 1
  15065. vpclmulqdq xmm8, xmm6, xmm5, 0
  15066. vpxor xmm10, xmm10, xmm9
  15067. vpslldq xmm9, xmm10, 8
  15068. vpsrldq xmm10, xmm10, 8
  15069. vpxor xmm9, xmm9, xmm8
  15070. vpclmulqdq xmm6, xmm6, xmm5, 17
  15071. vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15072. vpshufd xmm9, xmm9, 78
  15073. vpxor xmm9, xmm9, xmm8
  15074. vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15075. vpshufd xmm9, xmm9, 78
  15076. vpxor xmm6, xmm6, xmm10
  15077. vpxor xmm6, xmm6, xmm9
  15078. vpxor xmm6, xmm6, xmm8
  15079. L_AES_GCM_encrypt_update_avx2_last_block_done:
  15080. L_AES_GCM_encrypt_update_avx2_done_enc:
  15081. vmovdqu OWORD PTR [r12], xmm6
  15082. vmovdqu OWORD PTR [r14], xmm4
  15083. vzeroupper
  15084. vmovdqu xmm6, OWORD PTR [rsp+152]
  15085. vmovdqu xmm7, OWORD PTR [rsp+168]
  15086. vmovdqu xmm8, OWORD PTR [rsp+184]
  15087. vmovdqu xmm9, OWORD PTR [rsp+200]
  15088. vmovdqu xmm10, OWORD PTR [rsp+216]
  15089. vmovdqu xmm11, OWORD PTR [rsp+232]
  15090. vmovdqu xmm12, OWORD PTR [rsp+248]
  15091. vmovdqu xmm13, OWORD PTR [rsp+264]
  15092. vmovdqu xmm14, OWORD PTR [rsp+280]
  15093. vmovdqu xmm15, OWORD PTR [rsp+296]
  15094. add rsp, 312
  15095. pop rdi
  15096. pop r15
  15097. pop r14
  15098. pop r13
  15099. pop r12
  15100. ret
  15101. AES_GCM_encrypt_update_avx2 ENDP
  15102. _text ENDS
  15103. _text SEGMENT READONLY PARA
  15104. AES_GCM_encrypt_final_avx2 PROC
  15105. push r12
  15106. push r13
  15107. mov eax, DWORD PTR [rsp+56]
  15108. mov r10, QWORD PTR [rsp+64]
  15109. mov r11, QWORD PTR [rsp+72]
  15110. sub rsp, 48
  15111. vmovdqu OWORD PTR [rsp+16], xmm6
  15112. vmovdqu OWORD PTR [rsp+32], xmm7
  15113. vmovdqu xmm4, OWORD PTR [rcx]
  15114. vmovdqu xmm5, OWORD PTR [r10]
  15115. vmovdqu xmm6, OWORD PTR [r11]
  15116. vpsrlq xmm1, xmm5, 63
  15117. vpsllq xmm0, xmm5, 1
  15118. vpslldq xmm1, xmm1, 8
  15119. vpor xmm0, xmm0, xmm1
  15120. vpshufd xmm5, xmm5, 255
  15121. vpsrad xmm5, xmm5, 31
  15122. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  15123. vpxor xmm5, xmm5, xmm0
  15124. ; calc_tag
  15125. shl r9, 3
  15126. shl rax, 3
  15127. vmovq xmm0, r9
  15128. vmovq xmm1, rax
  15129. vpunpcklqdq xmm0, xmm0, xmm1
  15130. vpxor xmm0, xmm0, xmm4
  15131. ; ghash_gfmul_red
  15132. vpclmulqdq xmm7, xmm0, xmm5, 16
  15133. vpclmulqdq xmm3, xmm0, xmm5, 1
  15134. vpclmulqdq xmm2, xmm0, xmm5, 0
  15135. vpxor xmm7, xmm7, xmm3
  15136. vpslldq xmm3, xmm7, 8
  15137. vpsrldq xmm7, xmm7, 8
  15138. vpxor xmm3, xmm3, xmm2
  15139. vpclmulqdq xmm0, xmm0, xmm5, 17
  15140. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15141. vpshufd xmm3, xmm3, 78
  15142. vpxor xmm3, xmm3, xmm2
  15143. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15144. vpshufd xmm3, xmm3, 78
  15145. vpxor xmm0, xmm0, xmm7
  15146. vpxor xmm0, xmm0, xmm3
  15147. vpxor xmm0, xmm0, xmm2
  15148. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15149. vpxor xmm0, xmm0, xmm6
  15150. ; store_tag
  15151. cmp r8d, 16
  15152. je L_AES_GCM_encrypt_final_avx2_store_tag_16
  15153. xor r12, r12
  15154. vmovdqu OWORD PTR [rsp], xmm0
  15155. L_AES_GCM_encrypt_final_avx2_store_tag_loop:
  15156. movzx r13d, BYTE PTR [rsp+r12]
  15157. mov BYTE PTR [rdx+r12], r13b
  15158. inc r12d
  15159. cmp r12d, r8d
  15160. jne L_AES_GCM_encrypt_final_avx2_store_tag_loop
  15161. jmp L_AES_GCM_encrypt_final_avx2_store_tag_done
  15162. L_AES_GCM_encrypt_final_avx2_store_tag_16:
  15163. vmovdqu OWORD PTR [rdx], xmm0
  15164. L_AES_GCM_encrypt_final_avx2_store_tag_done:
  15165. vzeroupper
  15166. vmovdqu xmm6, OWORD PTR [rsp+16]
  15167. vmovdqu xmm7, OWORD PTR [rsp+32]
  15168. add rsp, 48
  15169. pop r13
  15170. pop r12
  15171. ret
  15172. AES_GCM_encrypt_final_avx2 ENDP
  15173. _text ENDS
  15174. _text SEGMENT READONLY PARA
  15175. AES_GCM_decrypt_update_avx2 PROC
  15176. push r13
  15177. push r12
  15178. push r14
  15179. push r15
  15180. push rdi
  15181. mov rax, rcx
  15182. mov r10, r8
  15183. mov r8d, edx
  15184. mov r11, r9
  15185. mov r9d, DWORD PTR [rsp+80]
  15186. mov r12, QWORD PTR [rsp+88]
  15187. mov r14, QWORD PTR [rsp+96]
  15188. mov r15, QWORD PTR [rsp+104]
  15189. sub rsp, 328
  15190. vmovdqu OWORD PTR [rsp+168], xmm6
  15191. vmovdqu OWORD PTR [rsp+184], xmm7
  15192. vmovdqu OWORD PTR [rsp+200], xmm8
  15193. vmovdqu OWORD PTR [rsp+216], xmm9
  15194. vmovdqu OWORD PTR [rsp+232], xmm10
  15195. vmovdqu OWORD PTR [rsp+248], xmm11
  15196. vmovdqu OWORD PTR [rsp+264], xmm12
  15197. vmovdqu OWORD PTR [rsp+280], xmm13
  15198. vmovdqu OWORD PTR [rsp+296], xmm14
  15199. vmovdqu OWORD PTR [rsp+312], xmm15
  15200. vmovdqu xmm6, OWORD PTR [r12]
  15201. vmovdqu xmm5, OWORD PTR [r14]
  15202. vmovdqu xmm4, OWORD PTR [r15]
  15203. ; Calculate H
  15204. vpsrlq xmm1, xmm5, 63
  15205. vpsllq xmm0, xmm5, 1
  15206. vpslldq xmm1, xmm1, 8
  15207. vpor xmm0, xmm0, xmm1
  15208. vpshufd xmm5, xmm5, 255
  15209. vpsrad xmm5, xmm5, 31
  15210. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  15211. vpxor xmm5, xmm5, xmm0
  15212. xor edi, edi
  15213. cmp r9d, 128
  15214. mov r13d, r9d
  15215. jl L_AES_GCM_decrypt_update_avx2_done_128
  15216. and r13d, 4294967168
  15217. vmovdqu OWORD PTR [rsp+128], xmm4
  15218. vmovdqu OWORD PTR [rsp+144], xmm15
  15219. vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
  15220. ; H ^ 1 and H ^ 2
  15221. vpclmulqdq xmm9, xmm5, xmm5, 0
  15222. vpclmulqdq xmm10, xmm5, xmm5, 17
  15223. vpclmulqdq xmm8, xmm9, xmm3, 16
  15224. vpshufd xmm9, xmm9, 78
  15225. vpxor xmm9, xmm9, xmm8
  15226. vpclmulqdq xmm8, xmm9, xmm3, 16
  15227. vpshufd xmm9, xmm9, 78
  15228. vpxor xmm9, xmm9, xmm8
  15229. vpxor xmm0, xmm10, xmm9
  15230. vmovdqu OWORD PTR [rsp], xmm5
  15231. vmovdqu OWORD PTR [rsp+16], xmm0
  15232. ; H ^ 3 and H ^ 4
  15233. vpclmulqdq xmm11, xmm0, xmm5, 16
  15234. vpclmulqdq xmm10, xmm0, xmm5, 1
  15235. vpclmulqdq xmm9, xmm0, xmm5, 0
  15236. vpclmulqdq xmm12, xmm0, xmm5, 17
  15237. vpclmulqdq xmm13, xmm0, xmm0, 0
  15238. vpclmulqdq xmm14, xmm0, xmm0, 17
  15239. vpxor xmm11, xmm11, xmm10
  15240. vpslldq xmm10, xmm11, 8
  15241. vpsrldq xmm11, xmm11, 8
  15242. vpxor xmm10, xmm10, xmm9
  15243. vpclmulqdq xmm8, xmm13, xmm3, 16
  15244. vpclmulqdq xmm9, xmm10, xmm3, 16
  15245. vpshufd xmm10, xmm10, 78
  15246. vpshufd xmm13, xmm13, 78
  15247. vpxor xmm10, xmm10, xmm9
  15248. vpxor xmm13, xmm13, xmm8
  15249. vpclmulqdq xmm9, xmm10, xmm3, 16
  15250. vpclmulqdq xmm8, xmm13, xmm3, 16
  15251. vpshufd xmm10, xmm10, 78
  15252. vpshufd xmm13, xmm13, 78
  15253. vpxor xmm12, xmm12, xmm11
  15254. vpxor xmm13, xmm13, xmm8
  15255. vpxor xmm10, xmm10, xmm12
  15256. vpxor xmm2, xmm13, xmm14
  15257. vpxor xmm1, xmm10, xmm9
  15258. vmovdqu OWORD PTR [rsp+32], xmm1
  15259. vmovdqu OWORD PTR [rsp+48], xmm2
  15260. ; H ^ 5 and H ^ 6
  15261. vpclmulqdq xmm11, xmm1, xmm0, 16
  15262. vpclmulqdq xmm10, xmm1, xmm0, 1
  15263. vpclmulqdq xmm9, xmm1, xmm0, 0
  15264. vpclmulqdq xmm12, xmm1, xmm0, 17
  15265. vpclmulqdq xmm13, xmm1, xmm1, 0
  15266. vpclmulqdq xmm14, xmm1, xmm1, 17
  15267. vpxor xmm11, xmm11, xmm10
  15268. vpslldq xmm10, xmm11, 8
  15269. vpsrldq xmm11, xmm11, 8
  15270. vpxor xmm10, xmm10, xmm9
  15271. vpclmulqdq xmm8, xmm13, xmm3, 16
  15272. vpclmulqdq xmm9, xmm10, xmm3, 16
  15273. vpshufd xmm10, xmm10, 78
  15274. vpshufd xmm13, xmm13, 78
  15275. vpxor xmm10, xmm10, xmm9
  15276. vpxor xmm13, xmm13, xmm8
  15277. vpclmulqdq xmm9, xmm10, xmm3, 16
  15278. vpclmulqdq xmm8, xmm13, xmm3, 16
  15279. vpshufd xmm10, xmm10, 78
  15280. vpshufd xmm13, xmm13, 78
  15281. vpxor xmm12, xmm12, xmm11
  15282. vpxor xmm13, xmm13, xmm8
  15283. vpxor xmm10, xmm10, xmm12
  15284. vpxor xmm0, xmm13, xmm14
  15285. vpxor xmm7, xmm10, xmm9
  15286. vmovdqu OWORD PTR [rsp+64], xmm7
  15287. vmovdqu OWORD PTR [rsp+80], xmm0
  15288. ; H ^ 7 and H ^ 8
  15289. vpclmulqdq xmm11, xmm2, xmm1, 16
  15290. vpclmulqdq xmm10, xmm2, xmm1, 1
  15291. vpclmulqdq xmm9, xmm2, xmm1, 0
  15292. vpclmulqdq xmm12, xmm2, xmm1, 17
  15293. vpclmulqdq xmm13, xmm2, xmm2, 0
  15294. vpclmulqdq xmm14, xmm2, xmm2, 17
  15295. vpxor xmm11, xmm11, xmm10
  15296. vpslldq xmm10, xmm11, 8
  15297. vpsrldq xmm11, xmm11, 8
  15298. vpxor xmm10, xmm10, xmm9
  15299. vpclmulqdq xmm8, xmm13, xmm3, 16
  15300. vpclmulqdq xmm9, xmm10, xmm3, 16
  15301. vpshufd xmm10, xmm10, 78
  15302. vpshufd xmm13, xmm13, 78
  15303. vpxor xmm10, xmm10, xmm9
  15304. vpxor xmm13, xmm13, xmm8
  15305. vpclmulqdq xmm9, xmm10, xmm3, 16
  15306. vpclmulqdq xmm8, xmm13, xmm3, 16
  15307. vpshufd xmm10, xmm10, 78
  15308. vpshufd xmm13, xmm13, 78
  15309. vpxor xmm12, xmm12, xmm11
  15310. vpxor xmm13, xmm13, xmm8
  15311. vpxor xmm10, xmm10, xmm12
  15312. vpxor xmm0, xmm13, xmm14
  15313. vpxor xmm7, xmm10, xmm9
  15314. vmovdqu OWORD PTR [rsp+96], xmm7
  15315. vmovdqu OWORD PTR [rsp+112], xmm0
  15316. L_AES_GCM_decrypt_update_avx2_ghash_128:
  15317. ; aesenc_128_ghash
  15318. lea rcx, QWORD PTR [r11+rdi]
  15319. lea rdx, QWORD PTR [r10+rdi]
  15320. ; aesenc_ctr
  15321. vmovdqu xmm0, OWORD PTR [rsp+128]
  15322. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  15323. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  15324. vpshufb xmm8, xmm0, xmm1
  15325. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  15326. vpshufb xmm9, xmm9, xmm1
  15327. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  15328. vpshufb xmm10, xmm10, xmm1
  15329. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  15330. vpshufb xmm11, xmm11, xmm1
  15331. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  15332. vpshufb xmm12, xmm12, xmm1
  15333. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  15334. vpshufb xmm13, xmm13, xmm1
  15335. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  15336. vpshufb xmm14, xmm14, xmm1
  15337. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  15338. vpshufb xmm15, xmm15, xmm1
  15339. ; aesenc_xor
  15340. vmovdqu xmm7, OWORD PTR [rax]
  15341. vmovdqu OWORD PTR [rsp+128], xmm0
  15342. vpxor xmm8, xmm8, xmm7
  15343. vpxor xmm9, xmm9, xmm7
  15344. vpxor xmm10, xmm10, xmm7
  15345. vpxor xmm11, xmm11, xmm7
  15346. vpxor xmm12, xmm12, xmm7
  15347. vpxor xmm13, xmm13, xmm7
  15348. vpxor xmm14, xmm14, xmm7
  15349. vpxor xmm15, xmm15, xmm7
  15350. ; aesenc_pclmul_1
  15351. vmovdqu xmm1, OWORD PTR [rcx]
  15352. vmovdqu xmm0, OWORD PTR [rax+16]
  15353. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15354. vmovdqu xmm2, OWORD PTR [rsp+112]
  15355. vpxor xmm1, xmm1, xmm6
  15356. vpclmulqdq xmm5, xmm1, xmm2, 16
  15357. vpclmulqdq xmm3, xmm1, xmm2, 1
  15358. vpclmulqdq xmm6, xmm1, xmm2, 0
  15359. vpclmulqdq xmm7, xmm1, xmm2, 17
  15360. vaesenc xmm8, xmm8, xmm0
  15361. vaesenc xmm9, xmm9, xmm0
  15362. vaesenc xmm10, xmm10, xmm0
  15363. vaesenc xmm11, xmm11, xmm0
  15364. vaesenc xmm12, xmm12, xmm0
  15365. vaesenc xmm13, xmm13, xmm0
  15366. vaesenc xmm14, xmm14, xmm0
  15367. vaesenc xmm15, xmm15, xmm0
  15368. ; aesenc_pclmul_2
  15369. vmovdqu xmm1, OWORD PTR [rcx+16]
  15370. vmovdqu xmm0, OWORD PTR [rsp+96]
  15371. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15372. vpxor xmm5, xmm5, xmm3
  15373. vpclmulqdq xmm2, xmm1, xmm0, 16
  15374. vpclmulqdq xmm3, xmm1, xmm0, 1
  15375. vpclmulqdq xmm4, xmm1, xmm0, 0
  15376. vpclmulqdq xmm1, xmm1, xmm0, 17
  15377. vmovdqu xmm0, OWORD PTR [rax+32]
  15378. vpxor xmm7, xmm7, xmm1
  15379. vaesenc xmm8, xmm8, xmm0
  15380. vaesenc xmm9, xmm9, xmm0
  15381. vaesenc xmm10, xmm10, xmm0
  15382. vaesenc xmm11, xmm11, xmm0
  15383. vaesenc xmm12, xmm12, xmm0
  15384. vaesenc xmm13, xmm13, xmm0
  15385. vaesenc xmm14, xmm14, xmm0
  15386. vaesenc xmm15, xmm15, xmm0
  15387. ; aesenc_pclmul_n
  15388. vmovdqu xmm1, OWORD PTR [rcx+32]
  15389. vmovdqu xmm0, OWORD PTR [rsp+80]
  15390. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15391. vpxor xmm5, xmm5, xmm2
  15392. vpclmulqdq xmm2, xmm1, xmm0, 16
  15393. vpxor xmm5, xmm5, xmm3
  15394. vpclmulqdq xmm3, xmm1, xmm0, 1
  15395. vpxor xmm6, xmm6, xmm4
  15396. vpclmulqdq xmm4, xmm1, xmm0, 0
  15397. vpclmulqdq xmm1, xmm1, xmm0, 17
  15398. vmovdqu xmm0, OWORD PTR [rax+48]
  15399. vpxor xmm7, xmm7, xmm1
  15400. vaesenc xmm8, xmm8, xmm0
  15401. vaesenc xmm9, xmm9, xmm0
  15402. vaesenc xmm10, xmm10, xmm0
  15403. vaesenc xmm11, xmm11, xmm0
  15404. vaesenc xmm12, xmm12, xmm0
  15405. vaesenc xmm13, xmm13, xmm0
  15406. vaesenc xmm14, xmm14, xmm0
  15407. vaesenc xmm15, xmm15, xmm0
  15408. ; aesenc_pclmul_n
  15409. vmovdqu xmm1, OWORD PTR [rcx+48]
  15410. vmovdqu xmm0, OWORD PTR [rsp+64]
  15411. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15412. vpxor xmm5, xmm5, xmm2
  15413. vpclmulqdq xmm2, xmm1, xmm0, 16
  15414. vpxor xmm5, xmm5, xmm3
  15415. vpclmulqdq xmm3, xmm1, xmm0, 1
  15416. vpxor xmm6, xmm6, xmm4
  15417. vpclmulqdq xmm4, xmm1, xmm0, 0
  15418. vpclmulqdq xmm1, xmm1, xmm0, 17
  15419. vmovdqu xmm0, OWORD PTR [rax+64]
  15420. vpxor xmm7, xmm7, xmm1
  15421. vaesenc xmm8, xmm8, xmm0
  15422. vaesenc xmm9, xmm9, xmm0
  15423. vaesenc xmm10, xmm10, xmm0
  15424. vaesenc xmm11, xmm11, xmm0
  15425. vaesenc xmm12, xmm12, xmm0
  15426. vaesenc xmm13, xmm13, xmm0
  15427. vaesenc xmm14, xmm14, xmm0
  15428. vaesenc xmm15, xmm15, xmm0
  15429. ; aesenc_pclmul_n
  15430. vmovdqu xmm1, OWORD PTR [rcx+64]
  15431. vmovdqu xmm0, OWORD PTR [rsp+48]
  15432. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15433. vpxor xmm5, xmm5, xmm2
  15434. vpclmulqdq xmm2, xmm1, xmm0, 16
  15435. vpxor xmm5, xmm5, xmm3
  15436. vpclmulqdq xmm3, xmm1, xmm0, 1
  15437. vpxor xmm6, xmm6, xmm4
  15438. vpclmulqdq xmm4, xmm1, xmm0, 0
  15439. vpclmulqdq xmm1, xmm1, xmm0, 17
  15440. vmovdqu xmm0, OWORD PTR [rax+80]
  15441. vpxor xmm7, xmm7, xmm1
  15442. vaesenc xmm8, xmm8, xmm0
  15443. vaesenc xmm9, xmm9, xmm0
  15444. vaesenc xmm10, xmm10, xmm0
  15445. vaesenc xmm11, xmm11, xmm0
  15446. vaesenc xmm12, xmm12, xmm0
  15447. vaesenc xmm13, xmm13, xmm0
  15448. vaesenc xmm14, xmm14, xmm0
  15449. vaesenc xmm15, xmm15, xmm0
  15450. ; aesenc_pclmul_n
  15451. vmovdqu xmm1, OWORD PTR [rcx+80]
  15452. vmovdqu xmm0, OWORD PTR [rsp+32]
  15453. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15454. vpxor xmm5, xmm5, xmm2
  15455. vpclmulqdq xmm2, xmm1, xmm0, 16
  15456. vpxor xmm5, xmm5, xmm3
  15457. vpclmulqdq xmm3, xmm1, xmm0, 1
  15458. vpxor xmm6, xmm6, xmm4
  15459. vpclmulqdq xmm4, xmm1, xmm0, 0
  15460. vpclmulqdq xmm1, xmm1, xmm0, 17
  15461. vmovdqu xmm0, OWORD PTR [rax+96]
  15462. vpxor xmm7, xmm7, xmm1
  15463. vaesenc xmm8, xmm8, xmm0
  15464. vaesenc xmm9, xmm9, xmm0
  15465. vaesenc xmm10, xmm10, xmm0
  15466. vaesenc xmm11, xmm11, xmm0
  15467. vaesenc xmm12, xmm12, xmm0
  15468. vaesenc xmm13, xmm13, xmm0
  15469. vaesenc xmm14, xmm14, xmm0
  15470. vaesenc xmm15, xmm15, xmm0
  15471. ; aesenc_pclmul_n
  15472. vmovdqu xmm1, OWORD PTR [rcx+96]
  15473. vmovdqu xmm0, OWORD PTR [rsp+16]
  15474. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15475. vpxor xmm5, xmm5, xmm2
  15476. vpclmulqdq xmm2, xmm1, xmm0, 16
  15477. vpxor xmm5, xmm5, xmm3
  15478. vpclmulqdq xmm3, xmm1, xmm0, 1
  15479. vpxor xmm6, xmm6, xmm4
  15480. vpclmulqdq xmm4, xmm1, xmm0, 0
  15481. vpclmulqdq xmm1, xmm1, xmm0, 17
  15482. vmovdqu xmm0, OWORD PTR [rax+112]
  15483. vpxor xmm7, xmm7, xmm1
  15484. vaesenc xmm8, xmm8, xmm0
  15485. vaesenc xmm9, xmm9, xmm0
  15486. vaesenc xmm10, xmm10, xmm0
  15487. vaesenc xmm11, xmm11, xmm0
  15488. vaesenc xmm12, xmm12, xmm0
  15489. vaesenc xmm13, xmm13, xmm0
  15490. vaesenc xmm14, xmm14, xmm0
  15491. vaesenc xmm15, xmm15, xmm0
  15492. ; aesenc_pclmul_n
  15493. vmovdqu xmm1, OWORD PTR [rcx+112]
  15494. vmovdqu xmm0, OWORD PTR [rsp]
  15495. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15496. vpxor xmm5, xmm5, xmm2
  15497. vpclmulqdq xmm2, xmm1, xmm0, 16
  15498. vpxor xmm5, xmm5, xmm3
  15499. vpclmulqdq xmm3, xmm1, xmm0, 1
  15500. vpxor xmm6, xmm6, xmm4
  15501. vpclmulqdq xmm4, xmm1, xmm0, 0
  15502. vpclmulqdq xmm1, xmm1, xmm0, 17
  15503. vmovdqu xmm0, OWORD PTR [rax+128]
  15504. vpxor xmm7, xmm7, xmm1
  15505. vaesenc xmm8, xmm8, xmm0
  15506. vaesenc xmm9, xmm9, xmm0
  15507. vaesenc xmm10, xmm10, xmm0
  15508. vaesenc xmm11, xmm11, xmm0
  15509. vaesenc xmm12, xmm12, xmm0
  15510. vaesenc xmm13, xmm13, xmm0
  15511. vaesenc xmm14, xmm14, xmm0
  15512. vaesenc xmm15, xmm15, xmm0
  15513. ; aesenc_pclmul_l
  15514. vpxor xmm5, xmm5, xmm2
  15515. vpxor xmm6, xmm6, xmm4
  15516. vpxor xmm5, xmm5, xmm3
  15517. vpslldq xmm1, xmm5, 8
  15518. vpsrldq xmm5, xmm5, 8
  15519. vmovdqu xmm4, OWORD PTR [rax+144]
  15520. vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
  15521. vaesenc xmm8, xmm8, xmm4
  15522. vpxor xmm6, xmm6, xmm1
  15523. vpxor xmm7, xmm7, xmm5
  15524. vpclmulqdq xmm3, xmm6, xmm0, 16
  15525. vaesenc xmm9, xmm9, xmm4
  15526. vaesenc xmm10, xmm10, xmm4
  15527. vaesenc xmm11, xmm11, xmm4
  15528. vpshufd xmm6, xmm6, 78
  15529. vpxor xmm6, xmm6, xmm3
  15530. vpclmulqdq xmm3, xmm6, xmm0, 16
  15531. vaesenc xmm12, xmm12, xmm4
  15532. vaesenc xmm13, xmm13, xmm4
  15533. vaesenc xmm14, xmm14, xmm4
  15534. vpshufd xmm6, xmm6, 78
  15535. vpxor xmm6, xmm6, xmm3
  15536. vpxor xmm6, xmm6, xmm7
  15537. vaesenc xmm15, xmm15, xmm4
  15538. cmp r8d, 11
  15539. vmovdqu xmm7, OWORD PTR [rax+160]
  15540. jl L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done
  15541. vaesenc xmm8, xmm8, xmm7
  15542. vaesenc xmm9, xmm9, xmm7
  15543. vaesenc xmm10, xmm10, xmm7
  15544. vaesenc xmm11, xmm11, xmm7
  15545. vaesenc xmm12, xmm12, xmm7
  15546. vaesenc xmm13, xmm13, xmm7
  15547. vaesenc xmm14, xmm14, xmm7
  15548. vaesenc xmm15, xmm15, xmm7
  15549. vmovdqu xmm7, OWORD PTR [rax+176]
  15550. vaesenc xmm8, xmm8, xmm7
  15551. vaesenc xmm9, xmm9, xmm7
  15552. vaesenc xmm10, xmm10, xmm7
  15553. vaesenc xmm11, xmm11, xmm7
  15554. vaesenc xmm12, xmm12, xmm7
  15555. vaesenc xmm13, xmm13, xmm7
  15556. vaesenc xmm14, xmm14, xmm7
  15557. vaesenc xmm15, xmm15, xmm7
  15558. cmp r8d, 13
  15559. vmovdqu xmm7, OWORD PTR [rax+192]
  15560. jl L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done
  15561. vaesenc xmm8, xmm8, xmm7
  15562. vaesenc xmm9, xmm9, xmm7
  15563. vaesenc xmm10, xmm10, xmm7
  15564. vaesenc xmm11, xmm11, xmm7
  15565. vaesenc xmm12, xmm12, xmm7
  15566. vaesenc xmm13, xmm13, xmm7
  15567. vaesenc xmm14, xmm14, xmm7
  15568. vaesenc xmm15, xmm15, xmm7
  15569. vmovdqu xmm7, OWORD PTR [rax+208]
  15570. vaesenc xmm8, xmm8, xmm7
  15571. vaesenc xmm9, xmm9, xmm7
  15572. vaesenc xmm10, xmm10, xmm7
  15573. vaesenc xmm11, xmm11, xmm7
  15574. vaesenc xmm12, xmm12, xmm7
  15575. vaesenc xmm13, xmm13, xmm7
  15576. vaesenc xmm14, xmm14, xmm7
  15577. vaesenc xmm15, xmm15, xmm7
  15578. vmovdqu xmm7, OWORD PTR [rax+224]
  15579. L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done:
  15580. ; aesenc_last
  15581. vaesenclast xmm8, xmm8, xmm7
  15582. vaesenclast xmm9, xmm9, xmm7
  15583. vaesenclast xmm10, xmm10, xmm7
  15584. vaesenclast xmm11, xmm11, xmm7
  15585. vmovdqu xmm0, OWORD PTR [rcx]
  15586. vmovdqu xmm1, OWORD PTR [rcx+16]
  15587. vmovdqu xmm2, OWORD PTR [rcx+32]
  15588. vmovdqu xmm3, OWORD PTR [rcx+48]
  15589. vpxor xmm8, xmm8, xmm0
  15590. vpxor xmm9, xmm9, xmm1
  15591. vpxor xmm10, xmm10, xmm2
  15592. vpxor xmm11, xmm11, xmm3
  15593. vmovdqu OWORD PTR [rdx], xmm8
  15594. vmovdqu OWORD PTR [rdx+16], xmm9
  15595. vmovdqu OWORD PTR [rdx+32], xmm10
  15596. vmovdqu OWORD PTR [rdx+48], xmm11
  15597. vaesenclast xmm12, xmm12, xmm7
  15598. vaesenclast xmm13, xmm13, xmm7
  15599. vaesenclast xmm14, xmm14, xmm7
  15600. vaesenclast xmm15, xmm15, xmm7
  15601. vmovdqu xmm0, OWORD PTR [rcx+64]
  15602. vmovdqu xmm1, OWORD PTR [rcx+80]
  15603. vmovdqu xmm2, OWORD PTR [rcx+96]
  15604. vmovdqu xmm3, OWORD PTR [rcx+112]
  15605. vpxor xmm12, xmm12, xmm0
  15606. vpxor xmm13, xmm13, xmm1
  15607. vpxor xmm14, xmm14, xmm2
  15608. vpxor xmm15, xmm15, xmm3
  15609. vmovdqu OWORD PTR [rdx+64], xmm12
  15610. vmovdqu OWORD PTR [rdx+80], xmm13
  15611. vmovdqu OWORD PTR [rdx+96], xmm14
  15612. vmovdqu OWORD PTR [rdx+112], xmm15
  15613. ; aesenc_128_ghash - end
  15614. add edi, 128
  15615. cmp edi, r13d
  15616. jl L_AES_GCM_decrypt_update_avx2_ghash_128
  15617. vmovdqu xmm5, OWORD PTR [rsp]
  15618. vmovdqu xmm4, OWORD PTR [rsp+128]
  15619. vmovdqu xmm15, OWORD PTR [rsp+144]
  15620. L_AES_GCM_decrypt_update_avx2_done_128:
  15621. cmp edi, r9d
  15622. jge L_AES_GCM_decrypt_update_avx2_done_dec
  15623. mov r13d, r9d
  15624. and r13d, 4294967280
  15625. cmp edi, r13d
  15626. jge L_AES_GCM_decrypt_update_avx2_last_block_done
  15627. L_AES_GCM_decrypt_update_avx2_last_block_start:
  15628. vmovdqu xmm11, OWORD PTR [r11+rdi]
  15629. vpshufb xmm10, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  15630. vpshufb xmm12, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15631. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  15632. vpxor xmm12, xmm12, xmm6
  15633. ; aesenc_gfmul_sb
  15634. vpclmulqdq xmm2, xmm12, xmm5, 1
  15635. vpclmulqdq xmm3, xmm12, xmm5, 16
  15636. vpclmulqdq xmm1, xmm12, xmm5, 0
  15637. vpclmulqdq xmm8, xmm12, xmm5, 17
  15638. vpxor xmm10, xmm10, [rax]
  15639. vaesenc xmm10, xmm10, [rax+16]
  15640. vpxor xmm3, xmm3, xmm2
  15641. vpslldq xmm2, xmm3, 8
  15642. vpsrldq xmm3, xmm3, 8
  15643. vaesenc xmm10, xmm10, [rax+32]
  15644. vpxor xmm2, xmm2, xmm1
  15645. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15646. vaesenc xmm10, xmm10, [rax+48]
  15647. vaesenc xmm10, xmm10, [rax+64]
  15648. vaesenc xmm10, xmm10, [rax+80]
  15649. vpshufd xmm2, xmm2, 78
  15650. vpxor xmm2, xmm2, xmm1
  15651. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15652. vaesenc xmm10, xmm10, [rax+96]
  15653. vaesenc xmm10, xmm10, [rax+112]
  15654. vaesenc xmm10, xmm10, [rax+128]
  15655. vpshufd xmm2, xmm2, 78
  15656. vaesenc xmm10, xmm10, [rax+144]
  15657. vpxor xmm8, xmm8, xmm3
  15658. vpxor xmm2, xmm2, xmm8
  15659. vmovdqu xmm0, OWORD PTR [rax+160]
  15660. cmp r8d, 11
  15661. jl L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last
  15662. vaesenc xmm10, xmm10, xmm0
  15663. vaesenc xmm10, xmm10, [rax+176]
  15664. vmovdqu xmm0, OWORD PTR [rax+192]
  15665. cmp r8d, 13
  15666. jl L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last
  15667. vaesenc xmm10, xmm10, xmm0
  15668. vaesenc xmm10, xmm10, [rax+208]
  15669. vmovdqu xmm0, OWORD PTR [rax+224]
  15670. L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last:
  15671. vaesenclast xmm10, xmm10, xmm0
  15672. vpxor xmm6, xmm2, xmm1
  15673. vpxor xmm10, xmm10, xmm11
  15674. vmovdqu OWORD PTR [r10+rdi], xmm10
  15675. add edi, 16
  15676. cmp edi, r13d
  15677. jl L_AES_GCM_decrypt_update_avx2_last_block_start
  15678. L_AES_GCM_decrypt_update_avx2_last_block_done:
  15679. L_AES_GCM_decrypt_update_avx2_done_dec:
  15680. vmovdqu OWORD PTR [r12], xmm6
  15681. vmovdqu OWORD PTR [r15], xmm4
  15682. vzeroupper
  15683. vmovdqu xmm6, OWORD PTR [rsp+168]
  15684. vmovdqu xmm7, OWORD PTR [rsp+184]
  15685. vmovdqu xmm8, OWORD PTR [rsp+200]
  15686. vmovdqu xmm9, OWORD PTR [rsp+216]
  15687. vmovdqu xmm10, OWORD PTR [rsp+232]
  15688. vmovdqu xmm11, OWORD PTR [rsp+248]
  15689. vmovdqu xmm12, OWORD PTR [rsp+264]
  15690. vmovdqu xmm13, OWORD PTR [rsp+280]
  15691. vmovdqu xmm14, OWORD PTR [rsp+296]
  15692. vmovdqu xmm15, OWORD PTR [rsp+312]
  15693. add rsp, 328
  15694. pop rdi
  15695. pop r15
  15696. pop r14
  15697. pop r12
  15698. pop r13
  15699. ret
  15700. AES_GCM_decrypt_update_avx2 ENDP
  15701. _text ENDS
  15702. _text SEGMENT READONLY PARA
  15703. AES_GCM_decrypt_final_avx2 PROC
  15704. push r12
  15705. push r13
  15706. push r14
  15707. mov eax, DWORD PTR [rsp+64]
  15708. mov r10, QWORD PTR [rsp+72]
  15709. mov r11, QWORD PTR [rsp+80]
  15710. mov r12, QWORD PTR [rsp+88]
  15711. sub rsp, 48
  15712. vmovdqu OWORD PTR [rsp+16], xmm6
  15713. vmovdqu OWORD PTR [rsp+32], xmm7
  15714. vmovdqu xmm4, OWORD PTR [rcx]
  15715. vmovdqu xmm5, OWORD PTR [r10]
  15716. vmovdqu xmm6, OWORD PTR [r11]
  15717. vpsrlq xmm1, xmm5, 63
  15718. vpsllq xmm0, xmm5, 1
  15719. vpslldq xmm1, xmm1, 8
  15720. vpor xmm0, xmm0, xmm1
  15721. vpshufd xmm5, xmm5, 255
  15722. vpsrad xmm5, xmm5, 31
  15723. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  15724. vpxor xmm5, xmm5, xmm0
  15725. ; calc_tag
  15726. shl r9, 3
  15727. shl rax, 3
  15728. vmovq xmm0, r9
  15729. vmovq xmm1, rax
  15730. vpunpcklqdq xmm0, xmm0, xmm1
  15731. vpxor xmm0, xmm0, xmm4
  15732. ; ghash_gfmul_red
  15733. vpclmulqdq xmm7, xmm0, xmm5, 16
  15734. vpclmulqdq xmm3, xmm0, xmm5, 1
  15735. vpclmulqdq xmm2, xmm0, xmm5, 0
  15736. vpxor xmm7, xmm7, xmm3
  15737. vpslldq xmm3, xmm7, 8
  15738. vpsrldq xmm7, xmm7, 8
  15739. vpxor xmm3, xmm3, xmm2
  15740. vpclmulqdq xmm0, xmm0, xmm5, 17
  15741. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15742. vpshufd xmm3, xmm3, 78
  15743. vpxor xmm3, xmm3, xmm2
  15744. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15745. vpshufd xmm3, xmm3, 78
  15746. vpxor xmm0, xmm0, xmm7
  15747. vpxor xmm0, xmm0, xmm3
  15748. vpxor xmm0, xmm0, xmm2
  15749. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15750. vpxor xmm0, xmm0, xmm6
  15751. ; cmp_tag
  15752. cmp r8d, 16
  15753. je L_AES_GCM_decrypt_final_avx2_cmp_tag_16
  15754. xor r13, r13
  15755. xor r10, r10
  15756. vmovdqu OWORD PTR [rsp], xmm0
  15757. L_AES_GCM_decrypt_final_avx2_cmp_tag_loop:
  15758. movzx r14d, BYTE PTR [rsp+r13]
  15759. xor r14b, BYTE PTR [rdx+r13]
  15760. or r10b, r14b
  15761. inc r13d
  15762. cmp r13d, r8d
  15763. jne L_AES_GCM_decrypt_final_avx2_cmp_tag_loop
  15764. cmp r10, 0
  15765. sete r10b
  15766. jmp L_AES_GCM_decrypt_final_avx2_cmp_tag_done
  15767. L_AES_GCM_decrypt_final_avx2_cmp_tag_16:
  15768. vmovdqu xmm1, OWORD PTR [rdx]
  15769. vpcmpeqb xmm0, xmm0, xmm1
  15770. vpmovmskb r13, xmm0
  15771. ; %%edx == 0xFFFF then return 1 else => return 0
  15772. xor r10d, r10d
  15773. cmp r13d, 65535
  15774. sete r10b
  15775. L_AES_GCM_decrypt_final_avx2_cmp_tag_done:
  15776. mov DWORD PTR [r12], r10d
  15777. vzeroupper
  15778. vmovdqu xmm6, OWORD PTR [rsp+16]
  15779. vmovdqu xmm7, OWORD PTR [rsp+32]
  15780. add rsp, 48
  15781. pop r14
  15782. pop r13
  15783. pop r12
  15784. ret
  15785. AES_GCM_decrypt_final_avx2 ENDP
  15786. _text ENDS
  15787. ENDIF
  15788. END