aesp8-ppc.pl 91 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807
  1. #! /usr/bin/env perl
  2. # Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the OpenSSL license (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  11. # project. The module is, however, dual licensed under OpenSSL and
  12. # CRYPTOGAMS licenses depending on where you obtain it. For further
  13. # details see http://www.openssl.org/~appro/cryptogams/.
  14. # ====================================================================
  15. #
  16. # This module implements support for AES instructions as per PowerISA
  17. # specification version 2.07, first implemented by POWER8 processor.
  18. # The module is endian-agnostic in sense that it supports both big-
  19. # and little-endian cases. Data alignment in parallelizable modes is
  20. # handled with VSX loads and stores, which implies MSR.VSX flag being
  21. # set. It should also be noted that ISA specification doesn't prohibit
  22. # alignment exceptions for these instructions on page boundaries.
  23. # Initially alignment was handled in pure AltiVec/VMX way [when data
  24. # is aligned programmatically, which in turn guarantees exception-
  25. # free execution], but it turned to hamper performance when vcipher
  26. # instructions are interleaved. It's reckoned that eventual
  27. # misalignment penalties at page boundaries are in average lower
  28. # than additional overhead in pure AltiVec approach.
  29. #
  30. # May 2016
  31. #
  32. # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
  33. # systems were measured.
  34. #
  35. ######################################################################
  36. # Current large-block performance in cycles per byte processed with
  37. # 128-bit key (less is better).
  38. #
  39. # CBC en-/decrypt CTR XTS
  40. # POWER8[le] 3.96/0.72 0.74 1.1
  41. # POWER8[be] 3.75/0.65 0.66 1.0
  42. # POWER9[le] 4.02/0.86 0.84 1.05
  43. # POWER9[be] 3.99/0.78 0.79 0.97
  44. $flavour = shift;
  45. if ($flavour =~ /64/) {
  46. $SIZE_T =8;
  47. $LRSAVE =2*$SIZE_T;
  48. $STU ="stdu";
  49. $POP ="ld";
  50. $PUSH ="std";
  51. $UCMP ="cmpld";
  52. $SHL ="sldi";
  53. } elsif ($flavour =~ /32/) {
  54. $SIZE_T =4;
  55. $LRSAVE =$SIZE_T;
  56. $STU ="stwu";
  57. $POP ="lwz";
  58. $PUSH ="stw";
  59. $UCMP ="cmplw";
  60. $SHL ="slwi";
  61. } else { die "nonsense $flavour"; }
  62. $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
  63. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  64. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  65. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  66. die "can't locate ppc-xlate.pl";
  67. open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
  68. $FRAME=8*$SIZE_T;
  69. $prefix="aes_p8";
  70. $sp="r1";
  71. $vrsave="r12";
  72. #########################################################################
  73. {{{ # Key setup procedures #
  74. my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
  75. my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
  76. my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
  77. $code.=<<___;
  78. .machine "any"
  79. .text
  80. .align 7
  81. rcon:
  82. .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
  83. .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
  84. .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
  85. .long 0,0,0,0 ?asis
  86. Lconsts:
  87. mflr r0
  88. bcl 20,31,\$+4
  89. mflr $ptr #vvvvv "distance between . and rcon
  90. addi $ptr,$ptr,-0x48
  91. mtlr r0
  92. blr
  93. .long 0
  94. .byte 0,12,0x14,0,0,0,0,0
  95. .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
  96. .globl .${prefix}_set_encrypt_key
  97. .align 5
  98. .${prefix}_set_encrypt_key:
  99. Lset_encrypt_key:
  100. mflr r11
  101. $PUSH r11,$LRSAVE($sp)
  102. li $ptr,-1
  103. ${UCMP}i $inp,0
  104. beq- Lenc_key_abort # if ($inp==0) return -1;
  105. ${UCMP}i $out,0
  106. beq- Lenc_key_abort # if ($out==0) return -1;
  107. li $ptr,-2
  108. cmpwi $bits,128
  109. blt- Lenc_key_abort
  110. cmpwi $bits,256
  111. bgt- Lenc_key_abort
  112. andi. r0,$bits,0x3f
  113. bne- Lenc_key_abort
  114. lis r0,0xfff0
  115. mfspr $vrsave,256
  116. mtspr 256,r0
  117. bl Lconsts
  118. mtlr r11
  119. neg r9,$inp
  120. lvx $in0,0,$inp
  121. addi $inp,$inp,15 # 15 is not typo
  122. lvsr $key,0,r9 # borrow $key
  123. li r8,0x20
  124. cmpwi $bits,192
  125. lvx $in1,0,$inp
  126. le?vspltisb $mask,0x0f # borrow $mask
  127. lvx $rcon,0,$ptr
  128. le?vxor $key,$key,$mask # adjust for byte swap
  129. lvx $mask,r8,$ptr
  130. addi $ptr,$ptr,0x10
  131. vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
  132. li $cnt,8
  133. vxor $zero,$zero,$zero
  134. mtctr $cnt
  135. ?lvsr $outperm,0,$out
  136. vspltisb $outmask,-1
  137. lvx $outhead,0,$out
  138. ?vperm $outmask,$zero,$outmask,$outperm
  139. blt Loop128
  140. addi $inp,$inp,8
  141. beq L192
  142. addi $inp,$inp,8
  143. b L256
  144. .align 4
  145. Loop128:
  146. vperm $key,$in0,$in0,$mask # rotate-n-splat
  147. vsldoi $tmp,$zero,$in0,12 # >>32
  148. vperm $outtail,$in0,$in0,$outperm # rotate
  149. vsel $stage,$outhead,$outtail,$outmask
  150. vmr $outhead,$outtail
  151. vcipherlast $key,$key,$rcon
  152. stvx $stage,0,$out
  153. addi $out,$out,16
  154. vxor $in0,$in0,$tmp
  155. vsldoi $tmp,$zero,$tmp,12 # >>32
  156. vxor $in0,$in0,$tmp
  157. vsldoi $tmp,$zero,$tmp,12 # >>32
  158. vxor $in0,$in0,$tmp
  159. vadduwm $rcon,$rcon,$rcon
  160. vxor $in0,$in0,$key
  161. bdnz Loop128
  162. lvx $rcon,0,$ptr # last two round keys
  163. vperm $key,$in0,$in0,$mask # rotate-n-splat
  164. vsldoi $tmp,$zero,$in0,12 # >>32
  165. vperm $outtail,$in0,$in0,$outperm # rotate
  166. vsel $stage,$outhead,$outtail,$outmask
  167. vmr $outhead,$outtail
  168. vcipherlast $key,$key,$rcon
  169. stvx $stage,0,$out
  170. addi $out,$out,16
  171. vxor $in0,$in0,$tmp
  172. vsldoi $tmp,$zero,$tmp,12 # >>32
  173. vxor $in0,$in0,$tmp
  174. vsldoi $tmp,$zero,$tmp,12 # >>32
  175. vxor $in0,$in0,$tmp
  176. vadduwm $rcon,$rcon,$rcon
  177. vxor $in0,$in0,$key
  178. vperm $key,$in0,$in0,$mask # rotate-n-splat
  179. vsldoi $tmp,$zero,$in0,12 # >>32
  180. vperm $outtail,$in0,$in0,$outperm # rotate
  181. vsel $stage,$outhead,$outtail,$outmask
  182. vmr $outhead,$outtail
  183. vcipherlast $key,$key,$rcon
  184. stvx $stage,0,$out
  185. addi $out,$out,16
  186. vxor $in0,$in0,$tmp
  187. vsldoi $tmp,$zero,$tmp,12 # >>32
  188. vxor $in0,$in0,$tmp
  189. vsldoi $tmp,$zero,$tmp,12 # >>32
  190. vxor $in0,$in0,$tmp
  191. vxor $in0,$in0,$key
  192. vperm $outtail,$in0,$in0,$outperm # rotate
  193. vsel $stage,$outhead,$outtail,$outmask
  194. vmr $outhead,$outtail
  195. stvx $stage,0,$out
  196. addi $inp,$out,15 # 15 is not typo
  197. addi $out,$out,0x50
  198. li $rounds,10
  199. b Ldone
  200. .align 4
  201. L192:
  202. lvx $tmp,0,$inp
  203. li $cnt,4
  204. vperm $outtail,$in0,$in0,$outperm # rotate
  205. vsel $stage,$outhead,$outtail,$outmask
  206. vmr $outhead,$outtail
  207. stvx $stage,0,$out
  208. addi $out,$out,16
  209. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  210. vspltisb $key,8 # borrow $key
  211. mtctr $cnt
  212. vsububm $mask,$mask,$key # adjust the mask
  213. Loop192:
  214. vperm $key,$in1,$in1,$mask # roate-n-splat
  215. vsldoi $tmp,$zero,$in0,12 # >>32
  216. vcipherlast $key,$key,$rcon
  217. vxor $in0,$in0,$tmp
  218. vsldoi $tmp,$zero,$tmp,12 # >>32
  219. vxor $in0,$in0,$tmp
  220. vsldoi $tmp,$zero,$tmp,12 # >>32
  221. vxor $in0,$in0,$tmp
  222. vsldoi $stage,$zero,$in1,8
  223. vspltw $tmp,$in0,3
  224. vxor $tmp,$tmp,$in1
  225. vsldoi $in1,$zero,$in1,12 # >>32
  226. vadduwm $rcon,$rcon,$rcon
  227. vxor $in1,$in1,$tmp
  228. vxor $in0,$in0,$key
  229. vxor $in1,$in1,$key
  230. vsldoi $stage,$stage,$in0,8
  231. vperm $key,$in1,$in1,$mask # rotate-n-splat
  232. vsldoi $tmp,$zero,$in0,12 # >>32
  233. vperm $outtail,$stage,$stage,$outperm # rotate
  234. vsel $stage,$outhead,$outtail,$outmask
  235. vmr $outhead,$outtail
  236. vcipherlast $key,$key,$rcon
  237. stvx $stage,0,$out
  238. addi $out,$out,16
  239. vsldoi $stage,$in0,$in1,8
  240. vxor $in0,$in0,$tmp
  241. vsldoi $tmp,$zero,$tmp,12 # >>32
  242. vperm $outtail,$stage,$stage,$outperm # rotate
  243. vsel $stage,$outhead,$outtail,$outmask
  244. vmr $outhead,$outtail
  245. vxor $in0,$in0,$tmp
  246. vsldoi $tmp,$zero,$tmp,12 # >>32
  247. vxor $in0,$in0,$tmp
  248. stvx $stage,0,$out
  249. addi $out,$out,16
  250. vspltw $tmp,$in0,3
  251. vxor $tmp,$tmp,$in1
  252. vsldoi $in1,$zero,$in1,12 # >>32
  253. vadduwm $rcon,$rcon,$rcon
  254. vxor $in1,$in1,$tmp
  255. vxor $in0,$in0,$key
  256. vxor $in1,$in1,$key
  257. vperm $outtail,$in0,$in0,$outperm # rotate
  258. vsel $stage,$outhead,$outtail,$outmask
  259. vmr $outhead,$outtail
  260. stvx $stage,0,$out
  261. addi $inp,$out,15 # 15 is not typo
  262. addi $out,$out,16
  263. bdnz Loop192
  264. li $rounds,12
  265. addi $out,$out,0x20
  266. b Ldone
  267. .align 4
  268. L256:
  269. lvx $tmp,0,$inp
  270. li $cnt,7
  271. li $rounds,14
  272. vperm $outtail,$in0,$in0,$outperm # rotate
  273. vsel $stage,$outhead,$outtail,$outmask
  274. vmr $outhead,$outtail
  275. stvx $stage,0,$out
  276. addi $out,$out,16
  277. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  278. mtctr $cnt
  279. Loop256:
  280. vperm $key,$in1,$in1,$mask # rotate-n-splat
  281. vsldoi $tmp,$zero,$in0,12 # >>32
  282. vperm $outtail,$in1,$in1,$outperm # rotate
  283. vsel $stage,$outhead,$outtail,$outmask
  284. vmr $outhead,$outtail
  285. vcipherlast $key,$key,$rcon
  286. stvx $stage,0,$out
  287. addi $out,$out,16
  288. vxor $in0,$in0,$tmp
  289. vsldoi $tmp,$zero,$tmp,12 # >>32
  290. vxor $in0,$in0,$tmp
  291. vsldoi $tmp,$zero,$tmp,12 # >>32
  292. vxor $in0,$in0,$tmp
  293. vadduwm $rcon,$rcon,$rcon
  294. vxor $in0,$in0,$key
  295. vperm $outtail,$in0,$in0,$outperm # rotate
  296. vsel $stage,$outhead,$outtail,$outmask
  297. vmr $outhead,$outtail
  298. stvx $stage,0,$out
  299. addi $inp,$out,15 # 15 is not typo
  300. addi $out,$out,16
  301. bdz Ldone
  302. vspltw $key,$in0,3 # just splat
  303. vsldoi $tmp,$zero,$in1,12 # >>32
  304. vsbox $key,$key
  305. vxor $in1,$in1,$tmp
  306. vsldoi $tmp,$zero,$tmp,12 # >>32
  307. vxor $in1,$in1,$tmp
  308. vsldoi $tmp,$zero,$tmp,12 # >>32
  309. vxor $in1,$in1,$tmp
  310. vxor $in1,$in1,$key
  311. b Loop256
  312. .align 4
  313. Ldone:
  314. lvx $in1,0,$inp # redundant in aligned case
  315. vsel $in1,$outhead,$in1,$outmask
  316. stvx $in1,0,$inp
  317. li $ptr,0
  318. mtspr 256,$vrsave
  319. stw $rounds,0($out)
  320. Lenc_key_abort:
  321. mr r3,$ptr
  322. blr
  323. .long 0
  324. .byte 0,12,0x14,1,0,0,3,0
  325. .long 0
  326. .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
  327. .globl .${prefix}_set_decrypt_key
  328. .align 5
  329. .${prefix}_set_decrypt_key:
  330. $STU $sp,-$FRAME($sp)
  331. mflr r10
  332. $PUSH r10,$FRAME+$LRSAVE($sp)
  333. bl Lset_encrypt_key
  334. mtlr r10
  335. cmpwi r3,0
  336. bne- Ldec_key_abort
  337. slwi $cnt,$rounds,4
  338. subi $inp,$out,240 # first round key
  339. srwi $rounds,$rounds,1
  340. add $out,$inp,$cnt # last round key
  341. mtctr $rounds
  342. Ldeckey:
  343. lwz r0, 0($inp)
  344. lwz r6, 4($inp)
  345. lwz r7, 8($inp)
  346. lwz r8, 12($inp)
  347. addi $inp,$inp,16
  348. lwz r9, 0($out)
  349. lwz r10,4($out)
  350. lwz r11,8($out)
  351. lwz r12,12($out)
  352. stw r0, 0($out)
  353. stw r6, 4($out)
  354. stw r7, 8($out)
  355. stw r8, 12($out)
  356. subi $out,$out,16
  357. stw r9, -16($inp)
  358. stw r10,-12($inp)
  359. stw r11,-8($inp)
  360. stw r12,-4($inp)
  361. bdnz Ldeckey
  362. xor r3,r3,r3 # return value
  363. Ldec_key_abort:
  364. addi $sp,$sp,$FRAME
  365. blr
  366. .long 0
  367. .byte 0,12,4,1,0x80,0,3,0
  368. .long 0
  369. .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
  370. ___
  371. }}}
  372. #########################################################################
  373. {{{ # Single block en- and decrypt procedures #
  374. sub gen_block () {
  375. my $dir = shift;
  376. my $n = $dir eq "de" ? "n" : "";
  377. my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
  378. $code.=<<___;
  379. .globl .${prefix}_${dir}crypt
  380. .align 5
  381. .${prefix}_${dir}crypt:
  382. lwz $rounds,240($key)
  383. lis r0,0xfc00
  384. mfspr $vrsave,256
  385. li $idx,15 # 15 is not typo
  386. mtspr 256,r0
  387. lvx v0,0,$inp
  388. neg r11,$out
  389. lvx v1,$idx,$inp
  390. lvsl v2,0,$inp # inpperm
  391. le?vspltisb v4,0x0f
  392. ?lvsl v3,0,r11 # outperm
  393. le?vxor v2,v2,v4
  394. li $idx,16
  395. vperm v0,v0,v1,v2 # align [and byte swap in LE]
  396. lvx v1,0,$key
  397. ?lvsl v5,0,$key # keyperm
  398. srwi $rounds,$rounds,1
  399. lvx v2,$idx,$key
  400. addi $idx,$idx,16
  401. subi $rounds,$rounds,1
  402. ?vperm v1,v1,v2,v5 # align round key
  403. vxor v0,v0,v1
  404. lvx v1,$idx,$key
  405. addi $idx,$idx,16
  406. mtctr $rounds
  407. Loop_${dir}c:
  408. ?vperm v2,v2,v1,v5
  409. v${n}cipher v0,v0,v2
  410. lvx v2,$idx,$key
  411. addi $idx,$idx,16
  412. ?vperm v1,v1,v2,v5
  413. v${n}cipher v0,v0,v1
  414. lvx v1,$idx,$key
  415. addi $idx,$idx,16
  416. bdnz Loop_${dir}c
  417. ?vperm v2,v2,v1,v5
  418. v${n}cipher v0,v0,v2
  419. lvx v2,$idx,$key
  420. ?vperm v1,v1,v2,v5
  421. v${n}cipherlast v0,v0,v1
  422. vspltisb v2,-1
  423. vxor v1,v1,v1
  424. li $idx,15 # 15 is not typo
  425. ?vperm v2,v1,v2,v3 # outmask
  426. le?vxor v3,v3,v4
  427. lvx v1,0,$out # outhead
  428. vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
  429. vsel v1,v1,v0,v2
  430. lvx v4,$idx,$out
  431. stvx v1,0,$out
  432. vsel v0,v0,v4,v2
  433. stvx v0,$idx,$out
  434. mtspr 256,$vrsave
  435. blr
  436. .long 0
  437. .byte 0,12,0x14,0,0,0,3,0
  438. .long 0
  439. .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
  440. ___
  441. }
  442. &gen_block("en");
  443. &gen_block("de");
  444. }}}
  445. #########################################################################
  446. {{{ # CBC en- and decrypt procedures #
  447. my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
  448. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  449. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
  450. map("v$_",(4..10));
  451. $code.=<<___;
  452. .globl .${prefix}_cbc_encrypt
  453. .align 5
  454. .${prefix}_cbc_encrypt:
  455. ${UCMP}i $len,16
  456. bltlr-
  457. cmpwi $enc,0 # test direction
  458. lis r0,0xffe0
  459. mfspr $vrsave,256
  460. mtspr 256,r0
  461. li $idx,15
  462. vxor $rndkey0,$rndkey0,$rndkey0
  463. le?vspltisb $tmp,0x0f
  464. lvx $ivec,0,$ivp # load [unaligned] iv
  465. lvsl $inpperm,0,$ivp
  466. lvx $inptail,$idx,$ivp
  467. le?vxor $inpperm,$inpperm,$tmp
  468. vperm $ivec,$ivec,$inptail,$inpperm
  469. neg r11,$inp
  470. ?lvsl $keyperm,0,$key # prepare for unaligned key
  471. lwz $rounds,240($key)
  472. lvsr $inpperm,0,r11 # prepare for unaligned load
  473. lvx $inptail,0,$inp
  474. addi $inp,$inp,15 # 15 is not typo
  475. le?vxor $inpperm,$inpperm,$tmp
  476. ?lvsr $outperm,0,$out # prepare for unaligned store
  477. vspltisb $outmask,-1
  478. lvx $outhead,0,$out
  479. ?vperm $outmask,$rndkey0,$outmask,$outperm
  480. le?vxor $outperm,$outperm,$tmp
  481. srwi $rounds,$rounds,1
  482. li $idx,16
  483. subi $rounds,$rounds,1
  484. beq Lcbc_dec
  485. Lcbc_enc:
  486. vmr $inout,$inptail
  487. lvx $inptail,0,$inp
  488. addi $inp,$inp,16
  489. mtctr $rounds
  490. subi $len,$len,16 # len-=16
  491. lvx $rndkey0,0,$key
  492. vperm $inout,$inout,$inptail,$inpperm
  493. lvx $rndkey1,$idx,$key
  494. addi $idx,$idx,16
  495. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  496. vxor $inout,$inout,$rndkey0
  497. lvx $rndkey0,$idx,$key
  498. addi $idx,$idx,16
  499. vxor $inout,$inout,$ivec
  500. Loop_cbc_enc:
  501. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  502. vcipher $inout,$inout,$rndkey1
  503. lvx $rndkey1,$idx,$key
  504. addi $idx,$idx,16
  505. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  506. vcipher $inout,$inout,$rndkey0
  507. lvx $rndkey0,$idx,$key
  508. addi $idx,$idx,16
  509. bdnz Loop_cbc_enc
  510. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  511. vcipher $inout,$inout,$rndkey1
  512. lvx $rndkey1,$idx,$key
  513. li $idx,16
  514. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  515. vcipherlast $ivec,$inout,$rndkey0
  516. ${UCMP}i $len,16
  517. vperm $tmp,$ivec,$ivec,$outperm
  518. vsel $inout,$outhead,$tmp,$outmask
  519. vmr $outhead,$tmp
  520. stvx $inout,0,$out
  521. addi $out,$out,16
  522. bge Lcbc_enc
  523. b Lcbc_done
  524. .align 4
  525. Lcbc_dec:
  526. ${UCMP}i $len,128
  527. bge _aesp8_cbc_decrypt8x
  528. vmr $tmp,$inptail
  529. lvx $inptail,0,$inp
  530. addi $inp,$inp,16
  531. mtctr $rounds
  532. subi $len,$len,16 # len-=16
  533. lvx $rndkey0,0,$key
  534. vperm $tmp,$tmp,$inptail,$inpperm
  535. lvx $rndkey1,$idx,$key
  536. addi $idx,$idx,16
  537. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  538. vxor $inout,$tmp,$rndkey0
  539. lvx $rndkey0,$idx,$key
  540. addi $idx,$idx,16
  541. Loop_cbc_dec:
  542. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  543. vncipher $inout,$inout,$rndkey1
  544. lvx $rndkey1,$idx,$key
  545. addi $idx,$idx,16
  546. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  547. vncipher $inout,$inout,$rndkey0
  548. lvx $rndkey0,$idx,$key
  549. addi $idx,$idx,16
  550. bdnz Loop_cbc_dec
  551. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  552. vncipher $inout,$inout,$rndkey1
  553. lvx $rndkey1,$idx,$key
  554. li $idx,16
  555. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  556. vncipherlast $inout,$inout,$rndkey0
  557. ${UCMP}i $len,16
  558. vxor $inout,$inout,$ivec
  559. vmr $ivec,$tmp
  560. vperm $tmp,$inout,$inout,$outperm
  561. vsel $inout,$outhead,$tmp,$outmask
  562. vmr $outhead,$tmp
  563. stvx $inout,0,$out
  564. addi $out,$out,16
  565. bge Lcbc_dec
  566. Lcbc_done:
  567. addi $out,$out,-1
  568. lvx $inout,0,$out # redundant in aligned case
  569. vsel $inout,$outhead,$inout,$outmask
  570. stvx $inout,0,$out
  571. neg $enc,$ivp # write [unaligned] iv
  572. li $idx,15 # 15 is not typo
  573. vxor $rndkey0,$rndkey0,$rndkey0
  574. vspltisb $outmask,-1
  575. le?vspltisb $tmp,0x0f
  576. ?lvsl $outperm,0,$enc
  577. ?vperm $outmask,$rndkey0,$outmask,$outperm
  578. le?vxor $outperm,$outperm,$tmp
  579. lvx $outhead,0,$ivp
  580. vperm $ivec,$ivec,$ivec,$outperm
  581. vsel $inout,$outhead,$ivec,$outmask
  582. lvx $inptail,$idx,$ivp
  583. stvx $inout,0,$ivp
  584. vsel $inout,$ivec,$inptail,$outmask
  585. stvx $inout,$idx,$ivp
  586. mtspr 256,$vrsave
  587. blr
  588. .long 0
  589. .byte 0,12,0x14,0,0,0,6,0
  590. .long 0
  591. ___
  592. #########################################################################
  593. {{ # Optimized CBC decrypt procedure #
  594. my $key_="r11";
  595. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  596. $x00=0 if ($flavour =~ /osx/);
  597. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
  598. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
  599. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  600. # v26-v31 last 6 round keys
  601. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  602. $code.=<<___;
  603. .align 5
  604. _aesp8_cbc_decrypt8x:
  605. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  606. li r10,`$FRAME+8*16+15`
  607. li r11,`$FRAME+8*16+31`
  608. stvx v20,r10,$sp # ABI says so
  609. addi r10,r10,32
  610. stvx v21,r11,$sp
  611. addi r11,r11,32
  612. stvx v22,r10,$sp
  613. addi r10,r10,32
  614. stvx v23,r11,$sp
  615. addi r11,r11,32
  616. stvx v24,r10,$sp
  617. addi r10,r10,32
  618. stvx v25,r11,$sp
  619. addi r11,r11,32
  620. stvx v26,r10,$sp
  621. addi r10,r10,32
  622. stvx v27,r11,$sp
  623. addi r11,r11,32
  624. stvx v28,r10,$sp
  625. addi r10,r10,32
  626. stvx v29,r11,$sp
  627. addi r11,r11,32
  628. stvx v30,r10,$sp
  629. stvx v31,r11,$sp
  630. li r0,-1
  631. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  632. li $x10,0x10
  633. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  634. li $x20,0x20
  635. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  636. li $x30,0x30
  637. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  638. li $x40,0x40
  639. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  640. li $x50,0x50
  641. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  642. li $x60,0x60
  643. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  644. li $x70,0x70
  645. mtspr 256,r0
  646. subi $rounds,$rounds,3 # -4 in total
  647. subi $len,$len,128 # bias
  648. lvx $rndkey0,$x00,$key # load key schedule
  649. lvx v30,$x10,$key
  650. addi $key,$key,0x20
  651. lvx v31,$x00,$key
  652. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  653. addi $key_,$sp,$FRAME+15
  654. mtctr $rounds
  655. Load_cbc_dec_key:
  656. ?vperm v24,v30,v31,$keyperm
  657. lvx v30,$x10,$key
  658. addi $key,$key,0x20
  659. stvx v24,$x00,$key_ # off-load round[1]
  660. ?vperm v25,v31,v30,$keyperm
  661. lvx v31,$x00,$key
  662. stvx v25,$x10,$key_ # off-load round[2]
  663. addi $key_,$key_,0x20
  664. bdnz Load_cbc_dec_key
  665. lvx v26,$x10,$key
  666. ?vperm v24,v30,v31,$keyperm
  667. lvx v27,$x20,$key
  668. stvx v24,$x00,$key_ # off-load round[3]
  669. ?vperm v25,v31,v26,$keyperm
  670. lvx v28,$x30,$key
  671. stvx v25,$x10,$key_ # off-load round[4]
  672. addi $key_,$sp,$FRAME+15 # rewind $key_
  673. ?vperm v26,v26,v27,$keyperm
  674. lvx v29,$x40,$key
  675. ?vperm v27,v27,v28,$keyperm
  676. lvx v30,$x50,$key
  677. ?vperm v28,v28,v29,$keyperm
  678. lvx v31,$x60,$key
  679. ?vperm v29,v29,v30,$keyperm
  680. lvx $out0,$x70,$key # borrow $out0
  681. ?vperm v30,v30,v31,$keyperm
  682. lvx v24,$x00,$key_ # pre-load round[1]
  683. ?vperm v31,v31,$out0,$keyperm
  684. lvx v25,$x10,$key_ # pre-load round[2]
  685. #lvx $inptail,0,$inp # "caller" already did this
  686. #addi $inp,$inp,15 # 15 is not typo
  687. subi $inp,$inp,15 # undo "caller"
  688. le?li $idx,8
  689. lvx_u $in0,$x00,$inp # load first 8 "words"
  690. le?lvsl $inpperm,0,$idx
  691. le?vspltisb $tmp,0x0f
  692. lvx_u $in1,$x10,$inp
  693. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  694. lvx_u $in2,$x20,$inp
  695. le?vperm $in0,$in0,$in0,$inpperm
  696. lvx_u $in3,$x30,$inp
  697. le?vperm $in1,$in1,$in1,$inpperm
  698. lvx_u $in4,$x40,$inp
  699. le?vperm $in2,$in2,$in2,$inpperm
  700. vxor $out0,$in0,$rndkey0
  701. lvx_u $in5,$x50,$inp
  702. le?vperm $in3,$in3,$in3,$inpperm
  703. vxor $out1,$in1,$rndkey0
  704. lvx_u $in6,$x60,$inp
  705. le?vperm $in4,$in4,$in4,$inpperm
  706. vxor $out2,$in2,$rndkey0
  707. lvx_u $in7,$x70,$inp
  708. addi $inp,$inp,0x80
  709. le?vperm $in5,$in5,$in5,$inpperm
  710. vxor $out3,$in3,$rndkey0
  711. le?vperm $in6,$in6,$in6,$inpperm
  712. vxor $out4,$in4,$rndkey0
  713. le?vperm $in7,$in7,$in7,$inpperm
  714. vxor $out5,$in5,$rndkey0
  715. vxor $out6,$in6,$rndkey0
  716. vxor $out7,$in7,$rndkey0
  717. mtctr $rounds
  718. b Loop_cbc_dec8x
  719. .align 5
  720. Loop_cbc_dec8x:
  721. vncipher $out0,$out0,v24
  722. vncipher $out1,$out1,v24
  723. vncipher $out2,$out2,v24
  724. vncipher $out3,$out3,v24
  725. vncipher $out4,$out4,v24
  726. vncipher $out5,$out5,v24
  727. vncipher $out6,$out6,v24
  728. vncipher $out7,$out7,v24
  729. lvx v24,$x20,$key_ # round[3]
  730. addi $key_,$key_,0x20
  731. vncipher $out0,$out0,v25
  732. vncipher $out1,$out1,v25
  733. vncipher $out2,$out2,v25
  734. vncipher $out3,$out3,v25
  735. vncipher $out4,$out4,v25
  736. vncipher $out5,$out5,v25
  737. vncipher $out6,$out6,v25
  738. vncipher $out7,$out7,v25
  739. lvx v25,$x10,$key_ # round[4]
  740. bdnz Loop_cbc_dec8x
  741. subic $len,$len,128 # $len-=128
  742. vncipher $out0,$out0,v24
  743. vncipher $out1,$out1,v24
  744. vncipher $out2,$out2,v24
  745. vncipher $out3,$out3,v24
  746. vncipher $out4,$out4,v24
  747. vncipher $out5,$out5,v24
  748. vncipher $out6,$out6,v24
  749. vncipher $out7,$out7,v24
  750. subfe. r0,r0,r0 # borrow?-1:0
  751. vncipher $out0,$out0,v25
  752. vncipher $out1,$out1,v25
  753. vncipher $out2,$out2,v25
  754. vncipher $out3,$out3,v25
  755. vncipher $out4,$out4,v25
  756. vncipher $out5,$out5,v25
  757. vncipher $out6,$out6,v25
  758. vncipher $out7,$out7,v25
  759. and r0,r0,$len
  760. vncipher $out0,$out0,v26
  761. vncipher $out1,$out1,v26
  762. vncipher $out2,$out2,v26
  763. vncipher $out3,$out3,v26
  764. vncipher $out4,$out4,v26
  765. vncipher $out5,$out5,v26
  766. vncipher $out6,$out6,v26
  767. vncipher $out7,$out7,v26
  768. add $inp,$inp,r0 # $inp is adjusted in such
  769. # way that at exit from the
  770. # loop inX-in7 are loaded
  771. # with last "words"
  772. vncipher $out0,$out0,v27
  773. vncipher $out1,$out1,v27
  774. vncipher $out2,$out2,v27
  775. vncipher $out3,$out3,v27
  776. vncipher $out4,$out4,v27
  777. vncipher $out5,$out5,v27
  778. vncipher $out6,$out6,v27
  779. vncipher $out7,$out7,v27
  780. addi $key_,$sp,$FRAME+15 # rewind $key_
  781. vncipher $out0,$out0,v28
  782. vncipher $out1,$out1,v28
  783. vncipher $out2,$out2,v28
  784. vncipher $out3,$out3,v28
  785. vncipher $out4,$out4,v28
  786. vncipher $out5,$out5,v28
  787. vncipher $out6,$out6,v28
  788. vncipher $out7,$out7,v28
  789. lvx v24,$x00,$key_ # re-pre-load round[1]
  790. vncipher $out0,$out0,v29
  791. vncipher $out1,$out1,v29
  792. vncipher $out2,$out2,v29
  793. vncipher $out3,$out3,v29
  794. vncipher $out4,$out4,v29
  795. vncipher $out5,$out5,v29
  796. vncipher $out6,$out6,v29
  797. vncipher $out7,$out7,v29
  798. lvx v25,$x10,$key_ # re-pre-load round[2]
  799. vncipher $out0,$out0,v30
  800. vxor $ivec,$ivec,v31 # xor with last round key
  801. vncipher $out1,$out1,v30
  802. vxor $in0,$in0,v31
  803. vncipher $out2,$out2,v30
  804. vxor $in1,$in1,v31
  805. vncipher $out3,$out3,v30
  806. vxor $in2,$in2,v31
  807. vncipher $out4,$out4,v30
  808. vxor $in3,$in3,v31
  809. vncipher $out5,$out5,v30
  810. vxor $in4,$in4,v31
  811. vncipher $out6,$out6,v30
  812. vxor $in5,$in5,v31
  813. vncipher $out7,$out7,v30
  814. vxor $in6,$in6,v31
  815. vncipherlast $out0,$out0,$ivec
  816. vncipherlast $out1,$out1,$in0
  817. lvx_u $in0,$x00,$inp # load next input block
  818. vncipherlast $out2,$out2,$in1
  819. lvx_u $in1,$x10,$inp
  820. vncipherlast $out3,$out3,$in2
  821. le?vperm $in0,$in0,$in0,$inpperm
  822. lvx_u $in2,$x20,$inp
  823. vncipherlast $out4,$out4,$in3
  824. le?vperm $in1,$in1,$in1,$inpperm
  825. lvx_u $in3,$x30,$inp
  826. vncipherlast $out5,$out5,$in4
  827. le?vperm $in2,$in2,$in2,$inpperm
  828. lvx_u $in4,$x40,$inp
  829. vncipherlast $out6,$out6,$in5
  830. le?vperm $in3,$in3,$in3,$inpperm
  831. lvx_u $in5,$x50,$inp
  832. vncipherlast $out7,$out7,$in6
  833. le?vperm $in4,$in4,$in4,$inpperm
  834. lvx_u $in6,$x60,$inp
  835. vmr $ivec,$in7
  836. le?vperm $in5,$in5,$in5,$inpperm
  837. lvx_u $in7,$x70,$inp
  838. addi $inp,$inp,0x80
  839. le?vperm $out0,$out0,$out0,$inpperm
  840. le?vperm $out1,$out1,$out1,$inpperm
  841. stvx_u $out0,$x00,$out
  842. le?vperm $in6,$in6,$in6,$inpperm
  843. vxor $out0,$in0,$rndkey0
  844. le?vperm $out2,$out2,$out2,$inpperm
  845. stvx_u $out1,$x10,$out
  846. le?vperm $in7,$in7,$in7,$inpperm
  847. vxor $out1,$in1,$rndkey0
  848. le?vperm $out3,$out3,$out3,$inpperm
  849. stvx_u $out2,$x20,$out
  850. vxor $out2,$in2,$rndkey0
  851. le?vperm $out4,$out4,$out4,$inpperm
  852. stvx_u $out3,$x30,$out
  853. vxor $out3,$in3,$rndkey0
  854. le?vperm $out5,$out5,$out5,$inpperm
  855. stvx_u $out4,$x40,$out
  856. vxor $out4,$in4,$rndkey0
  857. le?vperm $out6,$out6,$out6,$inpperm
  858. stvx_u $out5,$x50,$out
  859. vxor $out5,$in5,$rndkey0
  860. le?vperm $out7,$out7,$out7,$inpperm
  861. stvx_u $out6,$x60,$out
  862. vxor $out6,$in6,$rndkey0
  863. stvx_u $out7,$x70,$out
  864. addi $out,$out,0x80
  865. vxor $out7,$in7,$rndkey0
  866. mtctr $rounds
  867. beq Loop_cbc_dec8x # did $len-=128 borrow?
  868. addic. $len,$len,128
  869. beq Lcbc_dec8x_done
  870. nop
  871. nop
  872. Loop_cbc_dec8x_tail: # up to 7 "words" tail...
  873. vncipher $out1,$out1,v24
  874. vncipher $out2,$out2,v24
  875. vncipher $out3,$out3,v24
  876. vncipher $out4,$out4,v24
  877. vncipher $out5,$out5,v24
  878. vncipher $out6,$out6,v24
  879. vncipher $out7,$out7,v24
  880. lvx v24,$x20,$key_ # round[3]
  881. addi $key_,$key_,0x20
  882. vncipher $out1,$out1,v25
  883. vncipher $out2,$out2,v25
  884. vncipher $out3,$out3,v25
  885. vncipher $out4,$out4,v25
  886. vncipher $out5,$out5,v25
  887. vncipher $out6,$out6,v25
  888. vncipher $out7,$out7,v25
  889. lvx v25,$x10,$key_ # round[4]
  890. bdnz Loop_cbc_dec8x_tail
  891. vncipher $out1,$out1,v24
  892. vncipher $out2,$out2,v24
  893. vncipher $out3,$out3,v24
  894. vncipher $out4,$out4,v24
  895. vncipher $out5,$out5,v24
  896. vncipher $out6,$out6,v24
  897. vncipher $out7,$out7,v24
  898. vncipher $out1,$out1,v25
  899. vncipher $out2,$out2,v25
  900. vncipher $out3,$out3,v25
  901. vncipher $out4,$out4,v25
  902. vncipher $out5,$out5,v25
  903. vncipher $out6,$out6,v25
  904. vncipher $out7,$out7,v25
  905. vncipher $out1,$out1,v26
  906. vncipher $out2,$out2,v26
  907. vncipher $out3,$out3,v26
  908. vncipher $out4,$out4,v26
  909. vncipher $out5,$out5,v26
  910. vncipher $out6,$out6,v26
  911. vncipher $out7,$out7,v26
  912. vncipher $out1,$out1,v27
  913. vncipher $out2,$out2,v27
  914. vncipher $out3,$out3,v27
  915. vncipher $out4,$out4,v27
  916. vncipher $out5,$out5,v27
  917. vncipher $out6,$out6,v27
  918. vncipher $out7,$out7,v27
  919. vncipher $out1,$out1,v28
  920. vncipher $out2,$out2,v28
  921. vncipher $out3,$out3,v28
  922. vncipher $out4,$out4,v28
  923. vncipher $out5,$out5,v28
  924. vncipher $out6,$out6,v28
  925. vncipher $out7,$out7,v28
  926. vncipher $out1,$out1,v29
  927. vncipher $out2,$out2,v29
  928. vncipher $out3,$out3,v29
  929. vncipher $out4,$out4,v29
  930. vncipher $out5,$out5,v29
  931. vncipher $out6,$out6,v29
  932. vncipher $out7,$out7,v29
  933. vncipher $out1,$out1,v30
  934. vxor $ivec,$ivec,v31 # last round key
  935. vncipher $out2,$out2,v30
  936. vxor $in1,$in1,v31
  937. vncipher $out3,$out3,v30
  938. vxor $in2,$in2,v31
  939. vncipher $out4,$out4,v30
  940. vxor $in3,$in3,v31
  941. vncipher $out5,$out5,v30
  942. vxor $in4,$in4,v31
  943. vncipher $out6,$out6,v30
  944. vxor $in5,$in5,v31
  945. vncipher $out7,$out7,v30
  946. vxor $in6,$in6,v31
  947. cmplwi $len,32 # switch($len)
  948. blt Lcbc_dec8x_one
  949. nop
  950. beq Lcbc_dec8x_two
  951. cmplwi $len,64
  952. blt Lcbc_dec8x_three
  953. nop
  954. beq Lcbc_dec8x_four
  955. cmplwi $len,96
  956. blt Lcbc_dec8x_five
  957. nop
  958. beq Lcbc_dec8x_six
  959. Lcbc_dec8x_seven:
  960. vncipherlast $out1,$out1,$ivec
  961. vncipherlast $out2,$out2,$in1
  962. vncipherlast $out3,$out3,$in2
  963. vncipherlast $out4,$out4,$in3
  964. vncipherlast $out5,$out5,$in4
  965. vncipherlast $out6,$out6,$in5
  966. vncipherlast $out7,$out7,$in6
  967. vmr $ivec,$in7
  968. le?vperm $out1,$out1,$out1,$inpperm
  969. le?vperm $out2,$out2,$out2,$inpperm
  970. stvx_u $out1,$x00,$out
  971. le?vperm $out3,$out3,$out3,$inpperm
  972. stvx_u $out2,$x10,$out
  973. le?vperm $out4,$out4,$out4,$inpperm
  974. stvx_u $out3,$x20,$out
  975. le?vperm $out5,$out5,$out5,$inpperm
  976. stvx_u $out4,$x30,$out
  977. le?vperm $out6,$out6,$out6,$inpperm
  978. stvx_u $out5,$x40,$out
  979. le?vperm $out7,$out7,$out7,$inpperm
  980. stvx_u $out6,$x50,$out
  981. stvx_u $out7,$x60,$out
  982. addi $out,$out,0x70
  983. b Lcbc_dec8x_done
  984. .align 5
  985. Lcbc_dec8x_six:
  986. vncipherlast $out2,$out2,$ivec
  987. vncipherlast $out3,$out3,$in2
  988. vncipherlast $out4,$out4,$in3
  989. vncipherlast $out5,$out5,$in4
  990. vncipherlast $out6,$out6,$in5
  991. vncipherlast $out7,$out7,$in6
  992. vmr $ivec,$in7
  993. le?vperm $out2,$out2,$out2,$inpperm
  994. le?vperm $out3,$out3,$out3,$inpperm
  995. stvx_u $out2,$x00,$out
  996. le?vperm $out4,$out4,$out4,$inpperm
  997. stvx_u $out3,$x10,$out
  998. le?vperm $out5,$out5,$out5,$inpperm
  999. stvx_u $out4,$x20,$out
  1000. le?vperm $out6,$out6,$out6,$inpperm
  1001. stvx_u $out5,$x30,$out
  1002. le?vperm $out7,$out7,$out7,$inpperm
  1003. stvx_u $out6,$x40,$out
  1004. stvx_u $out7,$x50,$out
  1005. addi $out,$out,0x60
  1006. b Lcbc_dec8x_done
  1007. .align 5
  1008. Lcbc_dec8x_five:
  1009. vncipherlast $out3,$out3,$ivec
  1010. vncipherlast $out4,$out4,$in3
  1011. vncipherlast $out5,$out5,$in4
  1012. vncipherlast $out6,$out6,$in5
  1013. vncipherlast $out7,$out7,$in6
  1014. vmr $ivec,$in7
  1015. le?vperm $out3,$out3,$out3,$inpperm
  1016. le?vperm $out4,$out4,$out4,$inpperm
  1017. stvx_u $out3,$x00,$out
  1018. le?vperm $out5,$out5,$out5,$inpperm
  1019. stvx_u $out4,$x10,$out
  1020. le?vperm $out6,$out6,$out6,$inpperm
  1021. stvx_u $out5,$x20,$out
  1022. le?vperm $out7,$out7,$out7,$inpperm
  1023. stvx_u $out6,$x30,$out
  1024. stvx_u $out7,$x40,$out
  1025. addi $out,$out,0x50
  1026. b Lcbc_dec8x_done
  1027. .align 5
  1028. Lcbc_dec8x_four:
  1029. vncipherlast $out4,$out4,$ivec
  1030. vncipherlast $out5,$out5,$in4
  1031. vncipherlast $out6,$out6,$in5
  1032. vncipherlast $out7,$out7,$in6
  1033. vmr $ivec,$in7
  1034. le?vperm $out4,$out4,$out4,$inpperm
  1035. le?vperm $out5,$out5,$out5,$inpperm
  1036. stvx_u $out4,$x00,$out
  1037. le?vperm $out6,$out6,$out6,$inpperm
  1038. stvx_u $out5,$x10,$out
  1039. le?vperm $out7,$out7,$out7,$inpperm
  1040. stvx_u $out6,$x20,$out
  1041. stvx_u $out7,$x30,$out
  1042. addi $out,$out,0x40
  1043. b Lcbc_dec8x_done
  1044. .align 5
  1045. Lcbc_dec8x_three:
  1046. vncipherlast $out5,$out5,$ivec
  1047. vncipherlast $out6,$out6,$in5
  1048. vncipherlast $out7,$out7,$in6
  1049. vmr $ivec,$in7
  1050. le?vperm $out5,$out5,$out5,$inpperm
  1051. le?vperm $out6,$out6,$out6,$inpperm
  1052. stvx_u $out5,$x00,$out
  1053. le?vperm $out7,$out7,$out7,$inpperm
  1054. stvx_u $out6,$x10,$out
  1055. stvx_u $out7,$x20,$out
  1056. addi $out,$out,0x30
  1057. b Lcbc_dec8x_done
  1058. .align 5
  1059. Lcbc_dec8x_two:
  1060. vncipherlast $out6,$out6,$ivec
  1061. vncipherlast $out7,$out7,$in6
  1062. vmr $ivec,$in7
  1063. le?vperm $out6,$out6,$out6,$inpperm
  1064. le?vperm $out7,$out7,$out7,$inpperm
  1065. stvx_u $out6,$x00,$out
  1066. stvx_u $out7,$x10,$out
  1067. addi $out,$out,0x20
  1068. b Lcbc_dec8x_done
  1069. .align 5
  1070. Lcbc_dec8x_one:
  1071. vncipherlast $out7,$out7,$ivec
  1072. vmr $ivec,$in7
  1073. le?vperm $out7,$out7,$out7,$inpperm
  1074. stvx_u $out7,0,$out
  1075. addi $out,$out,0x10
  1076. Lcbc_dec8x_done:
  1077. le?vperm $ivec,$ivec,$ivec,$inpperm
  1078. stvx_u $ivec,0,$ivp # write [unaligned] iv
  1079. li r10,`$FRAME+15`
  1080. li r11,`$FRAME+31`
  1081. stvx $inpperm,r10,$sp # wipe copies of round keys
  1082. addi r10,r10,32
  1083. stvx $inpperm,r11,$sp
  1084. addi r11,r11,32
  1085. stvx $inpperm,r10,$sp
  1086. addi r10,r10,32
  1087. stvx $inpperm,r11,$sp
  1088. addi r11,r11,32
  1089. stvx $inpperm,r10,$sp
  1090. addi r10,r10,32
  1091. stvx $inpperm,r11,$sp
  1092. addi r11,r11,32
  1093. stvx $inpperm,r10,$sp
  1094. addi r10,r10,32
  1095. stvx $inpperm,r11,$sp
  1096. addi r11,r11,32
  1097. mtspr 256,$vrsave
  1098. lvx v20,r10,$sp # ABI says so
  1099. addi r10,r10,32
  1100. lvx v21,r11,$sp
  1101. addi r11,r11,32
  1102. lvx v22,r10,$sp
  1103. addi r10,r10,32
  1104. lvx v23,r11,$sp
  1105. addi r11,r11,32
  1106. lvx v24,r10,$sp
  1107. addi r10,r10,32
  1108. lvx v25,r11,$sp
  1109. addi r11,r11,32
  1110. lvx v26,r10,$sp
  1111. addi r10,r10,32
  1112. lvx v27,r11,$sp
  1113. addi r11,r11,32
  1114. lvx v28,r10,$sp
  1115. addi r10,r10,32
  1116. lvx v29,r11,$sp
  1117. addi r11,r11,32
  1118. lvx v30,r10,$sp
  1119. lvx v31,r11,$sp
  1120. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1121. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1122. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1123. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1124. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1125. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1126. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1127. blr
  1128. .long 0
  1129. .byte 0,12,0x04,0,0x80,6,6,0
  1130. .long 0
  1131. .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
  1132. ___
  1133. }} }}}
  1134. #########################################################################
  1135. {{{ # CTR procedure[s] #
  1136. my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
  1137. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  1138. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
  1139. map("v$_",(4..11));
  1140. my $dat=$tmp;
  1141. $code.=<<___;
  1142. .globl .${prefix}_ctr32_encrypt_blocks
  1143. .align 5
  1144. .${prefix}_ctr32_encrypt_blocks:
  1145. ${UCMP}i $len,1
  1146. bltlr-
  1147. lis r0,0xfff0
  1148. mfspr $vrsave,256
  1149. mtspr 256,r0
  1150. li $idx,15
  1151. vxor $rndkey0,$rndkey0,$rndkey0
  1152. le?vspltisb $tmp,0x0f
  1153. lvx $ivec,0,$ivp # load [unaligned] iv
  1154. lvsl $inpperm,0,$ivp
  1155. lvx $inptail,$idx,$ivp
  1156. vspltisb $one,1
  1157. le?vxor $inpperm,$inpperm,$tmp
  1158. vperm $ivec,$ivec,$inptail,$inpperm
  1159. vsldoi $one,$rndkey0,$one,1
  1160. neg r11,$inp
  1161. ?lvsl $keyperm,0,$key # prepare for unaligned key
  1162. lwz $rounds,240($key)
  1163. lvsr $inpperm,0,r11 # prepare for unaligned load
  1164. lvx $inptail,0,$inp
  1165. addi $inp,$inp,15 # 15 is not typo
  1166. le?vxor $inpperm,$inpperm,$tmp
  1167. srwi $rounds,$rounds,1
  1168. li $idx,16
  1169. subi $rounds,$rounds,1
  1170. ${UCMP}i $len,8
  1171. bge _aesp8_ctr32_encrypt8x
  1172. ?lvsr $outperm,0,$out # prepare for unaligned store
  1173. vspltisb $outmask,-1
  1174. lvx $outhead,0,$out
  1175. ?vperm $outmask,$rndkey0,$outmask,$outperm
  1176. le?vxor $outperm,$outperm,$tmp
  1177. lvx $rndkey0,0,$key
  1178. mtctr $rounds
  1179. lvx $rndkey1,$idx,$key
  1180. addi $idx,$idx,16
  1181. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1182. vxor $inout,$ivec,$rndkey0
  1183. lvx $rndkey0,$idx,$key
  1184. addi $idx,$idx,16
  1185. b Loop_ctr32_enc
  1186. .align 5
  1187. Loop_ctr32_enc:
  1188. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1189. vcipher $inout,$inout,$rndkey1
  1190. lvx $rndkey1,$idx,$key
  1191. addi $idx,$idx,16
  1192. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1193. vcipher $inout,$inout,$rndkey0
  1194. lvx $rndkey0,$idx,$key
  1195. addi $idx,$idx,16
  1196. bdnz Loop_ctr32_enc
  1197. vadduwm $ivec,$ivec,$one
  1198. vmr $dat,$inptail
  1199. lvx $inptail,0,$inp
  1200. addi $inp,$inp,16
  1201. subic. $len,$len,1 # blocks--
  1202. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1203. vcipher $inout,$inout,$rndkey1
  1204. lvx $rndkey1,$idx,$key
  1205. vperm $dat,$dat,$inptail,$inpperm
  1206. li $idx,16
  1207. ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
  1208. lvx $rndkey0,0,$key
  1209. vxor $dat,$dat,$rndkey1 # last round key
  1210. vcipherlast $inout,$inout,$dat
  1211. lvx $rndkey1,$idx,$key
  1212. addi $idx,$idx,16
  1213. vperm $inout,$inout,$inout,$outperm
  1214. vsel $dat,$outhead,$inout,$outmask
  1215. mtctr $rounds
  1216. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1217. vmr $outhead,$inout
  1218. vxor $inout,$ivec,$rndkey0
  1219. lvx $rndkey0,$idx,$key
  1220. addi $idx,$idx,16
  1221. stvx $dat,0,$out
  1222. addi $out,$out,16
  1223. bne Loop_ctr32_enc
  1224. addi $out,$out,-1
  1225. lvx $inout,0,$out # redundant in aligned case
  1226. vsel $inout,$outhead,$inout,$outmask
  1227. stvx $inout,0,$out
  1228. mtspr 256,$vrsave
  1229. blr
  1230. .long 0
  1231. .byte 0,12,0x14,0,0,0,6,0
  1232. .long 0
  1233. ___
  1234. #########################################################################
  1235. {{ # Optimized CTR procedure #
  1236. my $key_="r11";
  1237. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  1238. $x00=0 if ($flavour =~ /osx/);
  1239. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
  1240. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
  1241. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  1242. # v26-v31 last 6 round keys
  1243. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  1244. my ($two,$three,$four)=($outhead,$outperm,$outmask);
  1245. $code.=<<___;
  1246. .align 5
  1247. _aesp8_ctr32_encrypt8x:
  1248. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  1249. li r10,`$FRAME+8*16+15`
  1250. li r11,`$FRAME+8*16+31`
  1251. stvx v20,r10,$sp # ABI says so
  1252. addi r10,r10,32
  1253. stvx v21,r11,$sp
  1254. addi r11,r11,32
  1255. stvx v22,r10,$sp
  1256. addi r10,r10,32
  1257. stvx v23,r11,$sp
  1258. addi r11,r11,32
  1259. stvx v24,r10,$sp
  1260. addi r10,r10,32
  1261. stvx v25,r11,$sp
  1262. addi r11,r11,32
  1263. stvx v26,r10,$sp
  1264. addi r10,r10,32
  1265. stvx v27,r11,$sp
  1266. addi r11,r11,32
  1267. stvx v28,r10,$sp
  1268. addi r10,r10,32
  1269. stvx v29,r11,$sp
  1270. addi r11,r11,32
  1271. stvx v30,r10,$sp
  1272. stvx v31,r11,$sp
  1273. li r0,-1
  1274. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  1275. li $x10,0x10
  1276. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1277. li $x20,0x20
  1278. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1279. li $x30,0x30
  1280. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1281. li $x40,0x40
  1282. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1283. li $x50,0x50
  1284. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1285. li $x60,0x60
  1286. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1287. li $x70,0x70
  1288. mtspr 256,r0
  1289. subi $rounds,$rounds,3 # -4 in total
  1290. lvx $rndkey0,$x00,$key # load key schedule
  1291. lvx v30,$x10,$key
  1292. addi $key,$key,0x20
  1293. lvx v31,$x00,$key
  1294. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  1295. addi $key_,$sp,$FRAME+15
  1296. mtctr $rounds
  1297. Load_ctr32_enc_key:
  1298. ?vperm v24,v30,v31,$keyperm
  1299. lvx v30,$x10,$key
  1300. addi $key,$key,0x20
  1301. stvx v24,$x00,$key_ # off-load round[1]
  1302. ?vperm v25,v31,v30,$keyperm
  1303. lvx v31,$x00,$key
  1304. stvx v25,$x10,$key_ # off-load round[2]
  1305. addi $key_,$key_,0x20
  1306. bdnz Load_ctr32_enc_key
  1307. lvx v26,$x10,$key
  1308. ?vperm v24,v30,v31,$keyperm
  1309. lvx v27,$x20,$key
  1310. stvx v24,$x00,$key_ # off-load round[3]
  1311. ?vperm v25,v31,v26,$keyperm
  1312. lvx v28,$x30,$key
  1313. stvx v25,$x10,$key_ # off-load round[4]
  1314. addi $key_,$sp,$FRAME+15 # rewind $key_
  1315. ?vperm v26,v26,v27,$keyperm
  1316. lvx v29,$x40,$key
  1317. ?vperm v27,v27,v28,$keyperm
  1318. lvx v30,$x50,$key
  1319. ?vperm v28,v28,v29,$keyperm
  1320. lvx v31,$x60,$key
  1321. ?vperm v29,v29,v30,$keyperm
  1322. lvx $out0,$x70,$key # borrow $out0
  1323. ?vperm v30,v30,v31,$keyperm
  1324. lvx v24,$x00,$key_ # pre-load round[1]
  1325. ?vperm v31,v31,$out0,$keyperm
  1326. lvx v25,$x10,$key_ # pre-load round[2]
  1327. vadduwm $two,$one,$one
  1328. subi $inp,$inp,15 # undo "caller"
  1329. $SHL $len,$len,4
  1330. vadduwm $out1,$ivec,$one # counter values ...
  1331. vadduwm $out2,$ivec,$two
  1332. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1333. le?li $idx,8
  1334. vadduwm $out3,$out1,$two
  1335. vxor $out1,$out1,$rndkey0
  1336. le?lvsl $inpperm,0,$idx
  1337. vadduwm $out4,$out2,$two
  1338. vxor $out2,$out2,$rndkey0
  1339. le?vspltisb $tmp,0x0f
  1340. vadduwm $out5,$out3,$two
  1341. vxor $out3,$out3,$rndkey0
  1342. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  1343. vadduwm $out6,$out4,$two
  1344. vxor $out4,$out4,$rndkey0
  1345. vadduwm $out7,$out5,$two
  1346. vxor $out5,$out5,$rndkey0
  1347. vadduwm $ivec,$out6,$two # next counter value
  1348. vxor $out6,$out6,$rndkey0
  1349. vxor $out7,$out7,$rndkey0
  1350. mtctr $rounds
  1351. b Loop_ctr32_enc8x
  1352. .align 5
  1353. Loop_ctr32_enc8x:
  1354. vcipher $out0,$out0,v24
  1355. vcipher $out1,$out1,v24
  1356. vcipher $out2,$out2,v24
  1357. vcipher $out3,$out3,v24
  1358. vcipher $out4,$out4,v24
  1359. vcipher $out5,$out5,v24
  1360. vcipher $out6,$out6,v24
  1361. vcipher $out7,$out7,v24
  1362. Loop_ctr32_enc8x_middle:
  1363. lvx v24,$x20,$key_ # round[3]
  1364. addi $key_,$key_,0x20
  1365. vcipher $out0,$out0,v25
  1366. vcipher $out1,$out1,v25
  1367. vcipher $out2,$out2,v25
  1368. vcipher $out3,$out3,v25
  1369. vcipher $out4,$out4,v25
  1370. vcipher $out5,$out5,v25
  1371. vcipher $out6,$out6,v25
  1372. vcipher $out7,$out7,v25
  1373. lvx v25,$x10,$key_ # round[4]
  1374. bdnz Loop_ctr32_enc8x
  1375. subic r11,$len,256 # $len-256, borrow $key_
  1376. vcipher $out0,$out0,v24
  1377. vcipher $out1,$out1,v24
  1378. vcipher $out2,$out2,v24
  1379. vcipher $out3,$out3,v24
  1380. vcipher $out4,$out4,v24
  1381. vcipher $out5,$out5,v24
  1382. vcipher $out6,$out6,v24
  1383. vcipher $out7,$out7,v24
  1384. subfe r0,r0,r0 # borrow?-1:0
  1385. vcipher $out0,$out0,v25
  1386. vcipher $out1,$out1,v25
  1387. vcipher $out2,$out2,v25
  1388. vcipher $out3,$out3,v25
  1389. vcipher $out4,$out4,v25
  1390. vcipher $out5,$out5,v25
  1391. vcipher $out6,$out6,v25
  1392. vcipher $out7,$out7,v25
  1393. and r0,r0,r11
  1394. addi $key_,$sp,$FRAME+15 # rewind $key_
  1395. vcipher $out0,$out0,v26
  1396. vcipher $out1,$out1,v26
  1397. vcipher $out2,$out2,v26
  1398. vcipher $out3,$out3,v26
  1399. vcipher $out4,$out4,v26
  1400. vcipher $out5,$out5,v26
  1401. vcipher $out6,$out6,v26
  1402. vcipher $out7,$out7,v26
  1403. lvx v24,$x00,$key_ # re-pre-load round[1]
  1404. subic $len,$len,129 # $len-=129
  1405. vcipher $out0,$out0,v27
  1406. addi $len,$len,1 # $len-=128 really
  1407. vcipher $out1,$out1,v27
  1408. vcipher $out2,$out2,v27
  1409. vcipher $out3,$out3,v27
  1410. vcipher $out4,$out4,v27
  1411. vcipher $out5,$out5,v27
  1412. vcipher $out6,$out6,v27
  1413. vcipher $out7,$out7,v27
  1414. lvx v25,$x10,$key_ # re-pre-load round[2]
  1415. vcipher $out0,$out0,v28
  1416. lvx_u $in0,$x00,$inp # load input
  1417. vcipher $out1,$out1,v28
  1418. lvx_u $in1,$x10,$inp
  1419. vcipher $out2,$out2,v28
  1420. lvx_u $in2,$x20,$inp
  1421. vcipher $out3,$out3,v28
  1422. lvx_u $in3,$x30,$inp
  1423. vcipher $out4,$out4,v28
  1424. lvx_u $in4,$x40,$inp
  1425. vcipher $out5,$out5,v28
  1426. lvx_u $in5,$x50,$inp
  1427. vcipher $out6,$out6,v28
  1428. lvx_u $in6,$x60,$inp
  1429. vcipher $out7,$out7,v28
  1430. lvx_u $in7,$x70,$inp
  1431. addi $inp,$inp,0x80
  1432. vcipher $out0,$out0,v29
  1433. le?vperm $in0,$in0,$in0,$inpperm
  1434. vcipher $out1,$out1,v29
  1435. le?vperm $in1,$in1,$in1,$inpperm
  1436. vcipher $out2,$out2,v29
  1437. le?vperm $in2,$in2,$in2,$inpperm
  1438. vcipher $out3,$out3,v29
  1439. le?vperm $in3,$in3,$in3,$inpperm
  1440. vcipher $out4,$out4,v29
  1441. le?vperm $in4,$in4,$in4,$inpperm
  1442. vcipher $out5,$out5,v29
  1443. le?vperm $in5,$in5,$in5,$inpperm
  1444. vcipher $out6,$out6,v29
  1445. le?vperm $in6,$in6,$in6,$inpperm
  1446. vcipher $out7,$out7,v29
  1447. le?vperm $in7,$in7,$in7,$inpperm
  1448. add $inp,$inp,r0 # $inp is adjusted in such
  1449. # way that at exit from the
  1450. # loop inX-in7 are loaded
  1451. # with last "words"
  1452. subfe. r0,r0,r0 # borrow?-1:0
  1453. vcipher $out0,$out0,v30
  1454. vxor $in0,$in0,v31 # xor with last round key
  1455. vcipher $out1,$out1,v30
  1456. vxor $in1,$in1,v31
  1457. vcipher $out2,$out2,v30
  1458. vxor $in2,$in2,v31
  1459. vcipher $out3,$out3,v30
  1460. vxor $in3,$in3,v31
  1461. vcipher $out4,$out4,v30
  1462. vxor $in4,$in4,v31
  1463. vcipher $out5,$out5,v30
  1464. vxor $in5,$in5,v31
  1465. vcipher $out6,$out6,v30
  1466. vxor $in6,$in6,v31
  1467. vcipher $out7,$out7,v30
  1468. vxor $in7,$in7,v31
  1469. bne Lctr32_enc8x_break # did $len-129 borrow?
  1470. vcipherlast $in0,$out0,$in0
  1471. vcipherlast $in1,$out1,$in1
  1472. vadduwm $out1,$ivec,$one # counter values ...
  1473. vcipherlast $in2,$out2,$in2
  1474. vadduwm $out2,$ivec,$two
  1475. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1476. vcipherlast $in3,$out3,$in3
  1477. vadduwm $out3,$out1,$two
  1478. vxor $out1,$out1,$rndkey0
  1479. vcipherlast $in4,$out4,$in4
  1480. vadduwm $out4,$out2,$two
  1481. vxor $out2,$out2,$rndkey0
  1482. vcipherlast $in5,$out5,$in5
  1483. vadduwm $out5,$out3,$two
  1484. vxor $out3,$out3,$rndkey0
  1485. vcipherlast $in6,$out6,$in6
  1486. vadduwm $out6,$out4,$two
  1487. vxor $out4,$out4,$rndkey0
  1488. vcipherlast $in7,$out7,$in7
  1489. vadduwm $out7,$out5,$two
  1490. vxor $out5,$out5,$rndkey0
  1491. le?vperm $in0,$in0,$in0,$inpperm
  1492. vadduwm $ivec,$out6,$two # next counter value
  1493. vxor $out6,$out6,$rndkey0
  1494. le?vperm $in1,$in1,$in1,$inpperm
  1495. vxor $out7,$out7,$rndkey0
  1496. mtctr $rounds
  1497. vcipher $out0,$out0,v24
  1498. stvx_u $in0,$x00,$out
  1499. le?vperm $in2,$in2,$in2,$inpperm
  1500. vcipher $out1,$out1,v24
  1501. stvx_u $in1,$x10,$out
  1502. le?vperm $in3,$in3,$in3,$inpperm
  1503. vcipher $out2,$out2,v24
  1504. stvx_u $in2,$x20,$out
  1505. le?vperm $in4,$in4,$in4,$inpperm
  1506. vcipher $out3,$out3,v24
  1507. stvx_u $in3,$x30,$out
  1508. le?vperm $in5,$in5,$in5,$inpperm
  1509. vcipher $out4,$out4,v24
  1510. stvx_u $in4,$x40,$out
  1511. le?vperm $in6,$in6,$in6,$inpperm
  1512. vcipher $out5,$out5,v24
  1513. stvx_u $in5,$x50,$out
  1514. le?vperm $in7,$in7,$in7,$inpperm
  1515. vcipher $out6,$out6,v24
  1516. stvx_u $in6,$x60,$out
  1517. vcipher $out7,$out7,v24
  1518. stvx_u $in7,$x70,$out
  1519. addi $out,$out,0x80
  1520. b Loop_ctr32_enc8x_middle
  1521. .align 5
  1522. Lctr32_enc8x_break:
  1523. cmpwi $len,-0x60
  1524. blt Lctr32_enc8x_one
  1525. nop
  1526. beq Lctr32_enc8x_two
  1527. cmpwi $len,-0x40
  1528. blt Lctr32_enc8x_three
  1529. nop
  1530. beq Lctr32_enc8x_four
  1531. cmpwi $len,-0x20
  1532. blt Lctr32_enc8x_five
  1533. nop
  1534. beq Lctr32_enc8x_six
  1535. cmpwi $len,0x00
  1536. blt Lctr32_enc8x_seven
  1537. Lctr32_enc8x_eight:
  1538. vcipherlast $out0,$out0,$in0
  1539. vcipherlast $out1,$out1,$in1
  1540. vcipherlast $out2,$out2,$in2
  1541. vcipherlast $out3,$out3,$in3
  1542. vcipherlast $out4,$out4,$in4
  1543. vcipherlast $out5,$out5,$in5
  1544. vcipherlast $out6,$out6,$in6
  1545. vcipherlast $out7,$out7,$in7
  1546. le?vperm $out0,$out0,$out0,$inpperm
  1547. le?vperm $out1,$out1,$out1,$inpperm
  1548. stvx_u $out0,$x00,$out
  1549. le?vperm $out2,$out2,$out2,$inpperm
  1550. stvx_u $out1,$x10,$out
  1551. le?vperm $out3,$out3,$out3,$inpperm
  1552. stvx_u $out2,$x20,$out
  1553. le?vperm $out4,$out4,$out4,$inpperm
  1554. stvx_u $out3,$x30,$out
  1555. le?vperm $out5,$out5,$out5,$inpperm
  1556. stvx_u $out4,$x40,$out
  1557. le?vperm $out6,$out6,$out6,$inpperm
  1558. stvx_u $out5,$x50,$out
  1559. le?vperm $out7,$out7,$out7,$inpperm
  1560. stvx_u $out6,$x60,$out
  1561. stvx_u $out7,$x70,$out
  1562. addi $out,$out,0x80
  1563. b Lctr32_enc8x_done
  1564. .align 5
  1565. Lctr32_enc8x_seven:
  1566. vcipherlast $out0,$out0,$in1
  1567. vcipherlast $out1,$out1,$in2
  1568. vcipherlast $out2,$out2,$in3
  1569. vcipherlast $out3,$out3,$in4
  1570. vcipherlast $out4,$out4,$in5
  1571. vcipherlast $out5,$out5,$in6
  1572. vcipherlast $out6,$out6,$in7
  1573. le?vperm $out0,$out0,$out0,$inpperm
  1574. le?vperm $out1,$out1,$out1,$inpperm
  1575. stvx_u $out0,$x00,$out
  1576. le?vperm $out2,$out2,$out2,$inpperm
  1577. stvx_u $out1,$x10,$out
  1578. le?vperm $out3,$out3,$out3,$inpperm
  1579. stvx_u $out2,$x20,$out
  1580. le?vperm $out4,$out4,$out4,$inpperm
  1581. stvx_u $out3,$x30,$out
  1582. le?vperm $out5,$out5,$out5,$inpperm
  1583. stvx_u $out4,$x40,$out
  1584. le?vperm $out6,$out6,$out6,$inpperm
  1585. stvx_u $out5,$x50,$out
  1586. stvx_u $out6,$x60,$out
  1587. addi $out,$out,0x70
  1588. b Lctr32_enc8x_done
  1589. .align 5
  1590. Lctr32_enc8x_six:
  1591. vcipherlast $out0,$out0,$in2
  1592. vcipherlast $out1,$out1,$in3
  1593. vcipherlast $out2,$out2,$in4
  1594. vcipherlast $out3,$out3,$in5
  1595. vcipherlast $out4,$out4,$in6
  1596. vcipherlast $out5,$out5,$in7
  1597. le?vperm $out0,$out0,$out0,$inpperm
  1598. le?vperm $out1,$out1,$out1,$inpperm
  1599. stvx_u $out0,$x00,$out
  1600. le?vperm $out2,$out2,$out2,$inpperm
  1601. stvx_u $out1,$x10,$out
  1602. le?vperm $out3,$out3,$out3,$inpperm
  1603. stvx_u $out2,$x20,$out
  1604. le?vperm $out4,$out4,$out4,$inpperm
  1605. stvx_u $out3,$x30,$out
  1606. le?vperm $out5,$out5,$out5,$inpperm
  1607. stvx_u $out4,$x40,$out
  1608. stvx_u $out5,$x50,$out
  1609. addi $out,$out,0x60
  1610. b Lctr32_enc8x_done
  1611. .align 5
  1612. Lctr32_enc8x_five:
  1613. vcipherlast $out0,$out0,$in3
  1614. vcipherlast $out1,$out1,$in4
  1615. vcipherlast $out2,$out2,$in5
  1616. vcipherlast $out3,$out3,$in6
  1617. vcipherlast $out4,$out4,$in7
  1618. le?vperm $out0,$out0,$out0,$inpperm
  1619. le?vperm $out1,$out1,$out1,$inpperm
  1620. stvx_u $out0,$x00,$out
  1621. le?vperm $out2,$out2,$out2,$inpperm
  1622. stvx_u $out1,$x10,$out
  1623. le?vperm $out3,$out3,$out3,$inpperm
  1624. stvx_u $out2,$x20,$out
  1625. le?vperm $out4,$out4,$out4,$inpperm
  1626. stvx_u $out3,$x30,$out
  1627. stvx_u $out4,$x40,$out
  1628. addi $out,$out,0x50
  1629. b Lctr32_enc8x_done
  1630. .align 5
  1631. Lctr32_enc8x_four:
  1632. vcipherlast $out0,$out0,$in4
  1633. vcipherlast $out1,$out1,$in5
  1634. vcipherlast $out2,$out2,$in6
  1635. vcipherlast $out3,$out3,$in7
  1636. le?vperm $out0,$out0,$out0,$inpperm
  1637. le?vperm $out1,$out1,$out1,$inpperm
  1638. stvx_u $out0,$x00,$out
  1639. le?vperm $out2,$out2,$out2,$inpperm
  1640. stvx_u $out1,$x10,$out
  1641. le?vperm $out3,$out3,$out3,$inpperm
  1642. stvx_u $out2,$x20,$out
  1643. stvx_u $out3,$x30,$out
  1644. addi $out,$out,0x40
  1645. b Lctr32_enc8x_done
  1646. .align 5
  1647. Lctr32_enc8x_three:
  1648. vcipherlast $out0,$out0,$in5
  1649. vcipherlast $out1,$out1,$in6
  1650. vcipherlast $out2,$out2,$in7
  1651. le?vperm $out0,$out0,$out0,$inpperm
  1652. le?vperm $out1,$out1,$out1,$inpperm
  1653. stvx_u $out0,$x00,$out
  1654. le?vperm $out2,$out2,$out2,$inpperm
  1655. stvx_u $out1,$x10,$out
  1656. stvx_u $out2,$x20,$out
  1657. addi $out,$out,0x30
  1658. b Lcbc_dec8x_done
  1659. .align 5
  1660. Lctr32_enc8x_two:
  1661. vcipherlast $out0,$out0,$in6
  1662. vcipherlast $out1,$out1,$in7
  1663. le?vperm $out0,$out0,$out0,$inpperm
  1664. le?vperm $out1,$out1,$out1,$inpperm
  1665. stvx_u $out0,$x00,$out
  1666. stvx_u $out1,$x10,$out
  1667. addi $out,$out,0x20
  1668. b Lcbc_dec8x_done
  1669. .align 5
  1670. Lctr32_enc8x_one:
  1671. vcipherlast $out0,$out0,$in7
  1672. le?vperm $out0,$out0,$out0,$inpperm
  1673. stvx_u $out0,0,$out
  1674. addi $out,$out,0x10
  1675. Lctr32_enc8x_done:
  1676. li r10,`$FRAME+15`
  1677. li r11,`$FRAME+31`
  1678. stvx $inpperm,r10,$sp # wipe copies of round keys
  1679. addi r10,r10,32
  1680. stvx $inpperm,r11,$sp
  1681. addi r11,r11,32
  1682. stvx $inpperm,r10,$sp
  1683. addi r10,r10,32
  1684. stvx $inpperm,r11,$sp
  1685. addi r11,r11,32
  1686. stvx $inpperm,r10,$sp
  1687. addi r10,r10,32
  1688. stvx $inpperm,r11,$sp
  1689. addi r11,r11,32
  1690. stvx $inpperm,r10,$sp
  1691. addi r10,r10,32
  1692. stvx $inpperm,r11,$sp
  1693. addi r11,r11,32
  1694. mtspr 256,$vrsave
  1695. lvx v20,r10,$sp # ABI says so
  1696. addi r10,r10,32
  1697. lvx v21,r11,$sp
  1698. addi r11,r11,32
  1699. lvx v22,r10,$sp
  1700. addi r10,r10,32
  1701. lvx v23,r11,$sp
  1702. addi r11,r11,32
  1703. lvx v24,r10,$sp
  1704. addi r10,r10,32
  1705. lvx v25,r11,$sp
  1706. addi r11,r11,32
  1707. lvx v26,r10,$sp
  1708. addi r10,r10,32
  1709. lvx v27,r11,$sp
  1710. addi r11,r11,32
  1711. lvx v28,r10,$sp
  1712. addi r10,r10,32
  1713. lvx v29,r11,$sp
  1714. addi r11,r11,32
  1715. lvx v30,r10,$sp
  1716. lvx v31,r11,$sp
  1717. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1718. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1719. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1720. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1721. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1722. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1723. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1724. blr
  1725. .long 0
  1726. .byte 0,12,0x04,0,0x80,6,6,0
  1727. .long 0
  1728. .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
  1729. ___
  1730. }} }}}
  1731. #########################################################################
  1732. {{{ # XTS procedures #
  1733. # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
  1734. # const AES_KEY *key1, const AES_KEY *key2, #
  1735. # [const] unsigned char iv[16]); #
  1736. # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
  1737. # input tweak value is assumed to be encrypted already, and last tweak #
  1738. # value, one suitable for consecutive call on same chunk of data, is #
  1739. # written back to original buffer. In addition, in "tweak chaining" #
  1740. # mode only complete input blocks are processed. #
  1741. my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
  1742. my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
  1743. my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
  1744. my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
  1745. my $taillen = $key2;
  1746. ($inp,$idx) = ($idx,$inp); # reassign
  1747. $code.=<<___;
  1748. .globl .${prefix}_xts_encrypt
  1749. .align 5
  1750. .${prefix}_xts_encrypt:
  1751. mr $inp,r3 # reassign
  1752. li r3,-1
  1753. ${UCMP}i $len,16
  1754. bltlr-
  1755. lis r0,0xfff0
  1756. mfspr r12,256 # save vrsave
  1757. li r11,0
  1758. mtspr 256,r0
  1759. vspltisb $seven,0x07 # 0x070707..07
  1760. le?lvsl $leperm,r11,r11
  1761. le?vspltisb $tmp,0x0f
  1762. le?vxor $leperm,$leperm,$seven
  1763. li $idx,15
  1764. lvx $tweak,0,$ivp # load [unaligned] iv
  1765. lvsl $inpperm,0,$ivp
  1766. lvx $inptail,$idx,$ivp
  1767. le?vxor $inpperm,$inpperm,$tmp
  1768. vperm $tweak,$tweak,$inptail,$inpperm
  1769. neg r11,$inp
  1770. lvsr $inpperm,0,r11 # prepare for unaligned load
  1771. lvx $inout,0,$inp
  1772. addi $inp,$inp,15 # 15 is not typo
  1773. le?vxor $inpperm,$inpperm,$tmp
  1774. ${UCMP}i $key2,0 # key2==NULL?
  1775. beq Lxts_enc_no_key2
  1776. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1777. lwz $rounds,240($key2)
  1778. srwi $rounds,$rounds,1
  1779. subi $rounds,$rounds,1
  1780. li $idx,16
  1781. lvx $rndkey0,0,$key2
  1782. lvx $rndkey1,$idx,$key2
  1783. addi $idx,$idx,16
  1784. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1785. vxor $tweak,$tweak,$rndkey0
  1786. lvx $rndkey0,$idx,$key2
  1787. addi $idx,$idx,16
  1788. mtctr $rounds
  1789. Ltweak_xts_enc:
  1790. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1791. vcipher $tweak,$tweak,$rndkey1
  1792. lvx $rndkey1,$idx,$key2
  1793. addi $idx,$idx,16
  1794. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1795. vcipher $tweak,$tweak,$rndkey0
  1796. lvx $rndkey0,$idx,$key2
  1797. addi $idx,$idx,16
  1798. bdnz Ltweak_xts_enc
  1799. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1800. vcipher $tweak,$tweak,$rndkey1
  1801. lvx $rndkey1,$idx,$key2
  1802. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1803. vcipherlast $tweak,$tweak,$rndkey0
  1804. li $ivp,0 # don't chain the tweak
  1805. b Lxts_enc
  1806. Lxts_enc_no_key2:
  1807. li $idx,-16
  1808. and $len,$len,$idx # in "tweak chaining"
  1809. # mode only complete
  1810. # blocks are processed
  1811. Lxts_enc:
  1812. lvx $inptail,0,$inp
  1813. addi $inp,$inp,16
  1814. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  1815. lwz $rounds,240($key1)
  1816. srwi $rounds,$rounds,1
  1817. subi $rounds,$rounds,1
  1818. li $idx,16
  1819. vslb $eighty7,$seven,$seven # 0x808080..80
  1820. vor $eighty7,$eighty7,$seven # 0x878787..87
  1821. vspltisb $tmp,1 # 0x010101..01
  1822. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  1823. ${UCMP}i $len,96
  1824. bge _aesp8_xts_encrypt6x
  1825. andi. $taillen,$len,15
  1826. subic r0,$len,32
  1827. subi $taillen,$taillen,16
  1828. subfe r0,r0,r0
  1829. and r0,r0,$taillen
  1830. add $inp,$inp,r0
  1831. lvx $rndkey0,0,$key1
  1832. lvx $rndkey1,$idx,$key1
  1833. addi $idx,$idx,16
  1834. vperm $inout,$inout,$inptail,$inpperm
  1835. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1836. vxor $inout,$inout,$tweak
  1837. vxor $inout,$inout,$rndkey0
  1838. lvx $rndkey0,$idx,$key1
  1839. addi $idx,$idx,16
  1840. mtctr $rounds
  1841. b Loop_xts_enc
  1842. .align 5
  1843. Loop_xts_enc:
  1844. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1845. vcipher $inout,$inout,$rndkey1
  1846. lvx $rndkey1,$idx,$key1
  1847. addi $idx,$idx,16
  1848. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1849. vcipher $inout,$inout,$rndkey0
  1850. lvx $rndkey0,$idx,$key1
  1851. addi $idx,$idx,16
  1852. bdnz Loop_xts_enc
  1853. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1854. vcipher $inout,$inout,$rndkey1
  1855. lvx $rndkey1,$idx,$key1
  1856. li $idx,16
  1857. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1858. vxor $rndkey0,$rndkey0,$tweak
  1859. vcipherlast $output,$inout,$rndkey0
  1860. le?vperm $tmp,$output,$output,$leperm
  1861. be?nop
  1862. le?stvx_u $tmp,0,$out
  1863. be?stvx_u $output,0,$out
  1864. addi $out,$out,16
  1865. subic. $len,$len,16
  1866. beq Lxts_enc_done
  1867. vmr $inout,$inptail
  1868. lvx $inptail,0,$inp
  1869. addi $inp,$inp,16
  1870. lvx $rndkey0,0,$key1
  1871. lvx $rndkey1,$idx,$key1
  1872. addi $idx,$idx,16
  1873. subic r0,$len,32
  1874. subfe r0,r0,r0
  1875. and r0,r0,$taillen
  1876. add $inp,$inp,r0
  1877. vsrab $tmp,$tweak,$seven # next tweak value
  1878. vaddubm $tweak,$tweak,$tweak
  1879. vsldoi $tmp,$tmp,$tmp,15
  1880. vand $tmp,$tmp,$eighty7
  1881. vxor $tweak,$tweak,$tmp
  1882. vperm $inout,$inout,$inptail,$inpperm
  1883. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1884. vxor $inout,$inout,$tweak
  1885. vxor $output,$output,$rndkey0 # just in case $len<16
  1886. vxor $inout,$inout,$rndkey0
  1887. lvx $rndkey0,$idx,$key1
  1888. addi $idx,$idx,16
  1889. mtctr $rounds
  1890. ${UCMP}i $len,16
  1891. bge Loop_xts_enc
  1892. vxor $output,$output,$tweak
  1893. lvsr $inpperm,0,$len # $inpperm is no longer needed
  1894. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  1895. vspltisb $tmp,-1
  1896. vperm $inptail,$inptail,$tmp,$inpperm
  1897. vsel $inout,$inout,$output,$inptail
  1898. subi r11,$out,17
  1899. subi $out,$out,16
  1900. mtctr $len
  1901. li $len,16
  1902. Loop_xts_enc_steal:
  1903. lbzu r0,1(r11)
  1904. stb r0,16(r11)
  1905. bdnz Loop_xts_enc_steal
  1906. mtctr $rounds
  1907. b Loop_xts_enc # one more time...
  1908. Lxts_enc_done:
  1909. ${UCMP}i $ivp,0
  1910. beq Lxts_enc_ret
  1911. vsrab $tmp,$tweak,$seven # next tweak value
  1912. vaddubm $tweak,$tweak,$tweak
  1913. vsldoi $tmp,$tmp,$tmp,15
  1914. vand $tmp,$tmp,$eighty7
  1915. vxor $tweak,$tweak,$tmp
  1916. le?vperm $tweak,$tweak,$tweak,$leperm
  1917. stvx_u $tweak,0,$ivp
  1918. Lxts_enc_ret:
  1919. mtspr 256,r12 # restore vrsave
  1920. li r3,0
  1921. blr
  1922. .long 0
  1923. .byte 0,12,0x04,0,0x80,6,6,0
  1924. .long 0
  1925. .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
  1926. .globl .${prefix}_xts_decrypt
  1927. .align 5
  1928. .${prefix}_xts_decrypt:
  1929. mr $inp,r3 # reassign
  1930. li r3,-1
  1931. ${UCMP}i $len,16
  1932. bltlr-
  1933. lis r0,0xfff8
  1934. mfspr r12,256 # save vrsave
  1935. li r11,0
  1936. mtspr 256,r0
  1937. andi. r0,$len,15
  1938. neg r0,r0
  1939. andi. r0,r0,16
  1940. sub $len,$len,r0
  1941. vspltisb $seven,0x07 # 0x070707..07
  1942. le?lvsl $leperm,r11,r11
  1943. le?vspltisb $tmp,0x0f
  1944. le?vxor $leperm,$leperm,$seven
  1945. li $idx,15
  1946. lvx $tweak,0,$ivp # load [unaligned] iv
  1947. lvsl $inpperm,0,$ivp
  1948. lvx $inptail,$idx,$ivp
  1949. le?vxor $inpperm,$inpperm,$tmp
  1950. vperm $tweak,$tweak,$inptail,$inpperm
  1951. neg r11,$inp
  1952. lvsr $inpperm,0,r11 # prepare for unaligned load
  1953. lvx $inout,0,$inp
  1954. addi $inp,$inp,15 # 15 is not typo
  1955. le?vxor $inpperm,$inpperm,$tmp
  1956. ${UCMP}i $key2,0 # key2==NULL?
  1957. beq Lxts_dec_no_key2
  1958. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1959. lwz $rounds,240($key2)
  1960. srwi $rounds,$rounds,1
  1961. subi $rounds,$rounds,1
  1962. li $idx,16
  1963. lvx $rndkey0,0,$key2
  1964. lvx $rndkey1,$idx,$key2
  1965. addi $idx,$idx,16
  1966. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1967. vxor $tweak,$tweak,$rndkey0
  1968. lvx $rndkey0,$idx,$key2
  1969. addi $idx,$idx,16
  1970. mtctr $rounds
  1971. Ltweak_xts_dec:
  1972. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1973. vcipher $tweak,$tweak,$rndkey1
  1974. lvx $rndkey1,$idx,$key2
  1975. addi $idx,$idx,16
  1976. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1977. vcipher $tweak,$tweak,$rndkey0
  1978. lvx $rndkey0,$idx,$key2
  1979. addi $idx,$idx,16
  1980. bdnz Ltweak_xts_dec
  1981. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1982. vcipher $tweak,$tweak,$rndkey1
  1983. lvx $rndkey1,$idx,$key2
  1984. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1985. vcipherlast $tweak,$tweak,$rndkey0
  1986. li $ivp,0 # don't chain the tweak
  1987. b Lxts_dec
  1988. Lxts_dec_no_key2:
  1989. neg $idx,$len
  1990. andi. $idx,$idx,15
  1991. add $len,$len,$idx # in "tweak chaining"
  1992. # mode only complete
  1993. # blocks are processed
  1994. Lxts_dec:
  1995. lvx $inptail,0,$inp
  1996. addi $inp,$inp,16
  1997. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  1998. lwz $rounds,240($key1)
  1999. srwi $rounds,$rounds,1
  2000. subi $rounds,$rounds,1
  2001. li $idx,16
  2002. vslb $eighty7,$seven,$seven # 0x808080..80
  2003. vor $eighty7,$eighty7,$seven # 0x878787..87
  2004. vspltisb $tmp,1 # 0x010101..01
  2005. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  2006. ${UCMP}i $len,96
  2007. bge _aesp8_xts_decrypt6x
  2008. lvx $rndkey0,0,$key1
  2009. lvx $rndkey1,$idx,$key1
  2010. addi $idx,$idx,16
  2011. vperm $inout,$inout,$inptail,$inpperm
  2012. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2013. vxor $inout,$inout,$tweak
  2014. vxor $inout,$inout,$rndkey0
  2015. lvx $rndkey0,$idx,$key1
  2016. addi $idx,$idx,16
  2017. mtctr $rounds
  2018. ${UCMP}i $len,16
  2019. blt Ltail_xts_dec
  2020. be?b Loop_xts_dec
  2021. .align 5
  2022. Loop_xts_dec:
  2023. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2024. vncipher $inout,$inout,$rndkey1
  2025. lvx $rndkey1,$idx,$key1
  2026. addi $idx,$idx,16
  2027. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2028. vncipher $inout,$inout,$rndkey0
  2029. lvx $rndkey0,$idx,$key1
  2030. addi $idx,$idx,16
  2031. bdnz Loop_xts_dec
  2032. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2033. vncipher $inout,$inout,$rndkey1
  2034. lvx $rndkey1,$idx,$key1
  2035. li $idx,16
  2036. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2037. vxor $rndkey0,$rndkey0,$tweak
  2038. vncipherlast $output,$inout,$rndkey0
  2039. le?vperm $tmp,$output,$output,$leperm
  2040. be?nop
  2041. le?stvx_u $tmp,0,$out
  2042. be?stvx_u $output,0,$out
  2043. addi $out,$out,16
  2044. subic. $len,$len,16
  2045. beq Lxts_dec_done
  2046. vmr $inout,$inptail
  2047. lvx $inptail,0,$inp
  2048. addi $inp,$inp,16
  2049. lvx $rndkey0,0,$key1
  2050. lvx $rndkey1,$idx,$key1
  2051. addi $idx,$idx,16
  2052. vsrab $tmp,$tweak,$seven # next tweak value
  2053. vaddubm $tweak,$tweak,$tweak
  2054. vsldoi $tmp,$tmp,$tmp,15
  2055. vand $tmp,$tmp,$eighty7
  2056. vxor $tweak,$tweak,$tmp
  2057. vperm $inout,$inout,$inptail,$inpperm
  2058. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2059. vxor $inout,$inout,$tweak
  2060. vxor $inout,$inout,$rndkey0
  2061. lvx $rndkey0,$idx,$key1
  2062. addi $idx,$idx,16
  2063. mtctr $rounds
  2064. ${UCMP}i $len,16
  2065. bge Loop_xts_dec
  2066. Ltail_xts_dec:
  2067. vsrab $tmp,$tweak,$seven # next tweak value
  2068. vaddubm $tweak1,$tweak,$tweak
  2069. vsldoi $tmp,$tmp,$tmp,15
  2070. vand $tmp,$tmp,$eighty7
  2071. vxor $tweak1,$tweak1,$tmp
  2072. subi $inp,$inp,16
  2073. add $inp,$inp,$len
  2074. vxor $inout,$inout,$tweak # :-(
  2075. vxor $inout,$inout,$tweak1 # :-)
  2076. Loop_xts_dec_short:
  2077. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2078. vncipher $inout,$inout,$rndkey1
  2079. lvx $rndkey1,$idx,$key1
  2080. addi $idx,$idx,16
  2081. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2082. vncipher $inout,$inout,$rndkey0
  2083. lvx $rndkey0,$idx,$key1
  2084. addi $idx,$idx,16
  2085. bdnz Loop_xts_dec_short
  2086. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2087. vncipher $inout,$inout,$rndkey1
  2088. lvx $rndkey1,$idx,$key1
  2089. li $idx,16
  2090. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2091. vxor $rndkey0,$rndkey0,$tweak1
  2092. vncipherlast $output,$inout,$rndkey0
  2093. le?vperm $tmp,$output,$output,$leperm
  2094. be?nop
  2095. le?stvx_u $tmp,0,$out
  2096. be?stvx_u $output,0,$out
  2097. vmr $inout,$inptail
  2098. lvx $inptail,0,$inp
  2099. #addi $inp,$inp,16
  2100. lvx $rndkey0,0,$key1
  2101. lvx $rndkey1,$idx,$key1
  2102. addi $idx,$idx,16
  2103. vperm $inout,$inout,$inptail,$inpperm
  2104. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2105. lvsr $inpperm,0,$len # $inpperm is no longer needed
  2106. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  2107. vspltisb $tmp,-1
  2108. vperm $inptail,$inptail,$tmp,$inpperm
  2109. vsel $inout,$inout,$output,$inptail
  2110. vxor $rndkey0,$rndkey0,$tweak
  2111. vxor $inout,$inout,$rndkey0
  2112. lvx $rndkey0,$idx,$key1
  2113. addi $idx,$idx,16
  2114. subi r11,$out,1
  2115. mtctr $len
  2116. li $len,16
  2117. Loop_xts_dec_steal:
  2118. lbzu r0,1(r11)
  2119. stb r0,16(r11)
  2120. bdnz Loop_xts_dec_steal
  2121. mtctr $rounds
  2122. b Loop_xts_dec # one more time...
  2123. Lxts_dec_done:
  2124. ${UCMP}i $ivp,0
  2125. beq Lxts_dec_ret
  2126. vsrab $tmp,$tweak,$seven # next tweak value
  2127. vaddubm $tweak,$tweak,$tweak
  2128. vsldoi $tmp,$tmp,$tmp,15
  2129. vand $tmp,$tmp,$eighty7
  2130. vxor $tweak,$tweak,$tmp
  2131. le?vperm $tweak,$tweak,$tweak,$leperm
  2132. stvx_u $tweak,0,$ivp
  2133. Lxts_dec_ret:
  2134. mtspr 256,r12 # restore vrsave
  2135. li r3,0
  2136. blr
  2137. .long 0
  2138. .byte 0,12,0x04,0,0x80,6,6,0
  2139. .long 0
  2140. .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
  2141. ___
  2142. #########################################################################
  2143. {{ # Optimized XTS procedures #
  2144. my $key_=$key2;
  2145. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
  2146. $x00=0 if ($flavour =~ /osx/);
  2147. my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
  2148. my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
  2149. my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
  2150. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  2151. # v26-v31 last 6 round keys
  2152. my ($keyperm)=($out0); # aliases with "caller", redundant assignment
  2153. my $taillen=$x70;
  2154. $code.=<<___;
  2155. .align 5
  2156. _aesp8_xts_encrypt6x:
  2157. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2158. mflr r11
  2159. li r7,`$FRAME+8*16+15`
  2160. li r3,`$FRAME+8*16+31`
  2161. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2162. stvx v20,r7,$sp # ABI says so
  2163. addi r7,r7,32
  2164. stvx v21,r3,$sp
  2165. addi r3,r3,32
  2166. stvx v22,r7,$sp
  2167. addi r7,r7,32
  2168. stvx v23,r3,$sp
  2169. addi r3,r3,32
  2170. stvx v24,r7,$sp
  2171. addi r7,r7,32
  2172. stvx v25,r3,$sp
  2173. addi r3,r3,32
  2174. stvx v26,r7,$sp
  2175. addi r7,r7,32
  2176. stvx v27,r3,$sp
  2177. addi r3,r3,32
  2178. stvx v28,r7,$sp
  2179. addi r7,r7,32
  2180. stvx v29,r3,$sp
  2181. addi r3,r3,32
  2182. stvx v30,r7,$sp
  2183. stvx v31,r3,$sp
  2184. li r0,-1
  2185. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2186. li $x10,0x10
  2187. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2188. li $x20,0x20
  2189. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2190. li $x30,0x30
  2191. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2192. li $x40,0x40
  2193. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2194. li $x50,0x50
  2195. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2196. li $x60,0x60
  2197. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2198. li $x70,0x70
  2199. mtspr 256,r0
  2200. subi $rounds,$rounds,3 # -4 in total
  2201. lvx $rndkey0,$x00,$key1 # load key schedule
  2202. lvx v30,$x10,$key1
  2203. addi $key1,$key1,0x20
  2204. lvx v31,$x00,$key1
  2205. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2206. addi $key_,$sp,$FRAME+15
  2207. mtctr $rounds
  2208. Load_xts_enc_key:
  2209. ?vperm v24,v30,v31,$keyperm
  2210. lvx v30,$x10,$key1
  2211. addi $key1,$key1,0x20
  2212. stvx v24,$x00,$key_ # off-load round[1]
  2213. ?vperm v25,v31,v30,$keyperm
  2214. lvx v31,$x00,$key1
  2215. stvx v25,$x10,$key_ # off-load round[2]
  2216. addi $key_,$key_,0x20
  2217. bdnz Load_xts_enc_key
  2218. lvx v26,$x10,$key1
  2219. ?vperm v24,v30,v31,$keyperm
  2220. lvx v27,$x20,$key1
  2221. stvx v24,$x00,$key_ # off-load round[3]
  2222. ?vperm v25,v31,v26,$keyperm
  2223. lvx v28,$x30,$key1
  2224. stvx v25,$x10,$key_ # off-load round[4]
  2225. addi $key_,$sp,$FRAME+15 # rewind $key_
  2226. ?vperm v26,v26,v27,$keyperm
  2227. lvx v29,$x40,$key1
  2228. ?vperm v27,v27,v28,$keyperm
  2229. lvx v30,$x50,$key1
  2230. ?vperm v28,v28,v29,$keyperm
  2231. lvx v31,$x60,$key1
  2232. ?vperm v29,v29,v30,$keyperm
  2233. lvx $twk5,$x70,$key1 # borrow $twk5
  2234. ?vperm v30,v30,v31,$keyperm
  2235. lvx v24,$x00,$key_ # pre-load round[1]
  2236. ?vperm v31,v31,$twk5,$keyperm
  2237. lvx v25,$x10,$key_ # pre-load round[2]
  2238. vperm $in0,$inout,$inptail,$inpperm
  2239. subi $inp,$inp,31 # undo "caller"
  2240. vxor $twk0,$tweak,$rndkey0
  2241. vsrab $tmp,$tweak,$seven # next tweak value
  2242. vaddubm $tweak,$tweak,$tweak
  2243. vsldoi $tmp,$tmp,$tmp,15
  2244. vand $tmp,$tmp,$eighty7
  2245. vxor $out0,$in0,$twk0
  2246. vxor $tweak,$tweak,$tmp
  2247. lvx_u $in1,$x10,$inp
  2248. vxor $twk1,$tweak,$rndkey0
  2249. vsrab $tmp,$tweak,$seven # next tweak value
  2250. vaddubm $tweak,$tweak,$tweak
  2251. vsldoi $tmp,$tmp,$tmp,15
  2252. le?vperm $in1,$in1,$in1,$leperm
  2253. vand $tmp,$tmp,$eighty7
  2254. vxor $out1,$in1,$twk1
  2255. vxor $tweak,$tweak,$tmp
  2256. lvx_u $in2,$x20,$inp
  2257. andi. $taillen,$len,15
  2258. vxor $twk2,$tweak,$rndkey0
  2259. vsrab $tmp,$tweak,$seven # next tweak value
  2260. vaddubm $tweak,$tweak,$tweak
  2261. vsldoi $tmp,$tmp,$tmp,15
  2262. le?vperm $in2,$in2,$in2,$leperm
  2263. vand $tmp,$tmp,$eighty7
  2264. vxor $out2,$in2,$twk2
  2265. vxor $tweak,$tweak,$tmp
  2266. lvx_u $in3,$x30,$inp
  2267. sub $len,$len,$taillen
  2268. vxor $twk3,$tweak,$rndkey0
  2269. vsrab $tmp,$tweak,$seven # next tweak value
  2270. vaddubm $tweak,$tweak,$tweak
  2271. vsldoi $tmp,$tmp,$tmp,15
  2272. le?vperm $in3,$in3,$in3,$leperm
  2273. vand $tmp,$tmp,$eighty7
  2274. vxor $out3,$in3,$twk3
  2275. vxor $tweak,$tweak,$tmp
  2276. lvx_u $in4,$x40,$inp
  2277. subi $len,$len,0x60
  2278. vxor $twk4,$tweak,$rndkey0
  2279. vsrab $tmp,$tweak,$seven # next tweak value
  2280. vaddubm $tweak,$tweak,$tweak
  2281. vsldoi $tmp,$tmp,$tmp,15
  2282. le?vperm $in4,$in4,$in4,$leperm
  2283. vand $tmp,$tmp,$eighty7
  2284. vxor $out4,$in4,$twk4
  2285. vxor $tweak,$tweak,$tmp
  2286. lvx_u $in5,$x50,$inp
  2287. addi $inp,$inp,0x60
  2288. vxor $twk5,$tweak,$rndkey0
  2289. vsrab $tmp,$tweak,$seven # next tweak value
  2290. vaddubm $tweak,$tweak,$tweak
  2291. vsldoi $tmp,$tmp,$tmp,15
  2292. le?vperm $in5,$in5,$in5,$leperm
  2293. vand $tmp,$tmp,$eighty7
  2294. vxor $out5,$in5,$twk5
  2295. vxor $tweak,$tweak,$tmp
  2296. vxor v31,v31,$rndkey0
  2297. mtctr $rounds
  2298. b Loop_xts_enc6x
  2299. .align 5
  2300. Loop_xts_enc6x:
  2301. vcipher $out0,$out0,v24
  2302. vcipher $out1,$out1,v24
  2303. vcipher $out2,$out2,v24
  2304. vcipher $out3,$out3,v24
  2305. vcipher $out4,$out4,v24
  2306. vcipher $out5,$out5,v24
  2307. lvx v24,$x20,$key_ # round[3]
  2308. addi $key_,$key_,0x20
  2309. vcipher $out0,$out0,v25
  2310. vcipher $out1,$out1,v25
  2311. vcipher $out2,$out2,v25
  2312. vcipher $out3,$out3,v25
  2313. vcipher $out4,$out4,v25
  2314. vcipher $out5,$out5,v25
  2315. lvx v25,$x10,$key_ # round[4]
  2316. bdnz Loop_xts_enc6x
  2317. subic $len,$len,96 # $len-=96
  2318. vxor $in0,$twk0,v31 # xor with last round key
  2319. vcipher $out0,$out0,v24
  2320. vcipher $out1,$out1,v24
  2321. vsrab $tmp,$tweak,$seven # next tweak value
  2322. vxor $twk0,$tweak,$rndkey0
  2323. vaddubm $tweak,$tweak,$tweak
  2324. vcipher $out2,$out2,v24
  2325. vcipher $out3,$out3,v24
  2326. vsldoi $tmp,$tmp,$tmp,15
  2327. vcipher $out4,$out4,v24
  2328. vcipher $out5,$out5,v24
  2329. subfe. r0,r0,r0 # borrow?-1:0
  2330. vand $tmp,$tmp,$eighty7
  2331. vcipher $out0,$out0,v25
  2332. vcipher $out1,$out1,v25
  2333. vxor $tweak,$tweak,$tmp
  2334. vcipher $out2,$out2,v25
  2335. vcipher $out3,$out3,v25
  2336. vxor $in1,$twk1,v31
  2337. vsrab $tmp,$tweak,$seven # next tweak value
  2338. vxor $twk1,$tweak,$rndkey0
  2339. vcipher $out4,$out4,v25
  2340. vcipher $out5,$out5,v25
  2341. and r0,r0,$len
  2342. vaddubm $tweak,$tweak,$tweak
  2343. vsldoi $tmp,$tmp,$tmp,15
  2344. vcipher $out0,$out0,v26
  2345. vcipher $out1,$out1,v26
  2346. vand $tmp,$tmp,$eighty7
  2347. vcipher $out2,$out2,v26
  2348. vcipher $out3,$out3,v26
  2349. vxor $tweak,$tweak,$tmp
  2350. vcipher $out4,$out4,v26
  2351. vcipher $out5,$out5,v26
  2352. add $inp,$inp,r0 # $inp is adjusted in such
  2353. # way that at exit from the
  2354. # loop inX-in5 are loaded
  2355. # with last "words"
  2356. vxor $in2,$twk2,v31
  2357. vsrab $tmp,$tweak,$seven # next tweak value
  2358. vxor $twk2,$tweak,$rndkey0
  2359. vaddubm $tweak,$tweak,$tweak
  2360. vcipher $out0,$out0,v27
  2361. vcipher $out1,$out1,v27
  2362. vsldoi $tmp,$tmp,$tmp,15
  2363. vcipher $out2,$out2,v27
  2364. vcipher $out3,$out3,v27
  2365. vand $tmp,$tmp,$eighty7
  2366. vcipher $out4,$out4,v27
  2367. vcipher $out5,$out5,v27
  2368. addi $key_,$sp,$FRAME+15 # rewind $key_
  2369. vxor $tweak,$tweak,$tmp
  2370. vcipher $out0,$out0,v28
  2371. vcipher $out1,$out1,v28
  2372. vxor $in3,$twk3,v31
  2373. vsrab $tmp,$tweak,$seven # next tweak value
  2374. vxor $twk3,$tweak,$rndkey0
  2375. vcipher $out2,$out2,v28
  2376. vcipher $out3,$out3,v28
  2377. vaddubm $tweak,$tweak,$tweak
  2378. vsldoi $tmp,$tmp,$tmp,15
  2379. vcipher $out4,$out4,v28
  2380. vcipher $out5,$out5,v28
  2381. lvx v24,$x00,$key_ # re-pre-load round[1]
  2382. vand $tmp,$tmp,$eighty7
  2383. vcipher $out0,$out0,v29
  2384. vcipher $out1,$out1,v29
  2385. vxor $tweak,$tweak,$tmp
  2386. vcipher $out2,$out2,v29
  2387. vcipher $out3,$out3,v29
  2388. vxor $in4,$twk4,v31
  2389. vsrab $tmp,$tweak,$seven # next tweak value
  2390. vxor $twk4,$tweak,$rndkey0
  2391. vcipher $out4,$out4,v29
  2392. vcipher $out5,$out5,v29
  2393. lvx v25,$x10,$key_ # re-pre-load round[2]
  2394. vaddubm $tweak,$tweak,$tweak
  2395. vsldoi $tmp,$tmp,$tmp,15
  2396. vcipher $out0,$out0,v30
  2397. vcipher $out1,$out1,v30
  2398. vand $tmp,$tmp,$eighty7
  2399. vcipher $out2,$out2,v30
  2400. vcipher $out3,$out3,v30
  2401. vxor $tweak,$tweak,$tmp
  2402. vcipher $out4,$out4,v30
  2403. vcipher $out5,$out5,v30
  2404. vxor $in5,$twk5,v31
  2405. vsrab $tmp,$tweak,$seven # next tweak value
  2406. vxor $twk5,$tweak,$rndkey0
  2407. vcipherlast $out0,$out0,$in0
  2408. lvx_u $in0,$x00,$inp # load next input block
  2409. vaddubm $tweak,$tweak,$tweak
  2410. vsldoi $tmp,$tmp,$tmp,15
  2411. vcipherlast $out1,$out1,$in1
  2412. lvx_u $in1,$x10,$inp
  2413. vcipherlast $out2,$out2,$in2
  2414. le?vperm $in0,$in0,$in0,$leperm
  2415. lvx_u $in2,$x20,$inp
  2416. vand $tmp,$tmp,$eighty7
  2417. vcipherlast $out3,$out3,$in3
  2418. le?vperm $in1,$in1,$in1,$leperm
  2419. lvx_u $in3,$x30,$inp
  2420. vcipherlast $out4,$out4,$in4
  2421. le?vperm $in2,$in2,$in2,$leperm
  2422. lvx_u $in4,$x40,$inp
  2423. vxor $tweak,$tweak,$tmp
  2424. vcipherlast $tmp,$out5,$in5 # last block might be needed
  2425. # in stealing mode
  2426. le?vperm $in3,$in3,$in3,$leperm
  2427. lvx_u $in5,$x50,$inp
  2428. addi $inp,$inp,0x60
  2429. le?vperm $in4,$in4,$in4,$leperm
  2430. le?vperm $in5,$in5,$in5,$leperm
  2431. le?vperm $out0,$out0,$out0,$leperm
  2432. le?vperm $out1,$out1,$out1,$leperm
  2433. stvx_u $out0,$x00,$out # store output
  2434. vxor $out0,$in0,$twk0
  2435. le?vperm $out2,$out2,$out2,$leperm
  2436. stvx_u $out1,$x10,$out
  2437. vxor $out1,$in1,$twk1
  2438. le?vperm $out3,$out3,$out3,$leperm
  2439. stvx_u $out2,$x20,$out
  2440. vxor $out2,$in2,$twk2
  2441. le?vperm $out4,$out4,$out4,$leperm
  2442. stvx_u $out3,$x30,$out
  2443. vxor $out3,$in3,$twk3
  2444. le?vperm $out5,$tmp,$tmp,$leperm
  2445. stvx_u $out4,$x40,$out
  2446. vxor $out4,$in4,$twk4
  2447. le?stvx_u $out5,$x50,$out
  2448. be?stvx_u $tmp, $x50,$out
  2449. vxor $out5,$in5,$twk5
  2450. addi $out,$out,0x60
  2451. mtctr $rounds
  2452. beq Loop_xts_enc6x # did $len-=96 borrow?
  2453. addic. $len,$len,0x60
  2454. beq Lxts_enc6x_zero
  2455. cmpwi $len,0x20
  2456. blt Lxts_enc6x_one
  2457. nop
  2458. beq Lxts_enc6x_two
  2459. cmpwi $len,0x40
  2460. blt Lxts_enc6x_three
  2461. nop
  2462. beq Lxts_enc6x_four
  2463. Lxts_enc6x_five:
  2464. vxor $out0,$in1,$twk0
  2465. vxor $out1,$in2,$twk1
  2466. vxor $out2,$in3,$twk2
  2467. vxor $out3,$in4,$twk3
  2468. vxor $out4,$in5,$twk4
  2469. bl _aesp8_xts_enc5x
  2470. le?vperm $out0,$out0,$out0,$leperm
  2471. vmr $twk0,$twk5 # unused tweak
  2472. le?vperm $out1,$out1,$out1,$leperm
  2473. stvx_u $out0,$x00,$out # store output
  2474. le?vperm $out2,$out2,$out2,$leperm
  2475. stvx_u $out1,$x10,$out
  2476. le?vperm $out3,$out3,$out3,$leperm
  2477. stvx_u $out2,$x20,$out
  2478. vxor $tmp,$out4,$twk5 # last block prep for stealing
  2479. le?vperm $out4,$out4,$out4,$leperm
  2480. stvx_u $out3,$x30,$out
  2481. stvx_u $out4,$x40,$out
  2482. addi $out,$out,0x50
  2483. bne Lxts_enc6x_steal
  2484. b Lxts_enc6x_done
  2485. .align 4
  2486. Lxts_enc6x_four:
  2487. vxor $out0,$in2,$twk0
  2488. vxor $out1,$in3,$twk1
  2489. vxor $out2,$in4,$twk2
  2490. vxor $out3,$in5,$twk3
  2491. vxor $out4,$out4,$out4
  2492. bl _aesp8_xts_enc5x
  2493. le?vperm $out0,$out0,$out0,$leperm
  2494. vmr $twk0,$twk4 # unused tweak
  2495. le?vperm $out1,$out1,$out1,$leperm
  2496. stvx_u $out0,$x00,$out # store output
  2497. le?vperm $out2,$out2,$out2,$leperm
  2498. stvx_u $out1,$x10,$out
  2499. vxor $tmp,$out3,$twk4 # last block prep for stealing
  2500. le?vperm $out3,$out3,$out3,$leperm
  2501. stvx_u $out2,$x20,$out
  2502. stvx_u $out3,$x30,$out
  2503. addi $out,$out,0x40
  2504. bne Lxts_enc6x_steal
  2505. b Lxts_enc6x_done
  2506. .align 4
  2507. Lxts_enc6x_three:
  2508. vxor $out0,$in3,$twk0
  2509. vxor $out1,$in4,$twk1
  2510. vxor $out2,$in5,$twk2
  2511. vxor $out3,$out3,$out3
  2512. vxor $out4,$out4,$out4
  2513. bl _aesp8_xts_enc5x
  2514. le?vperm $out0,$out0,$out0,$leperm
  2515. vmr $twk0,$twk3 # unused tweak
  2516. le?vperm $out1,$out1,$out1,$leperm
  2517. stvx_u $out0,$x00,$out # store output
  2518. vxor $tmp,$out2,$twk3 # last block prep for stealing
  2519. le?vperm $out2,$out2,$out2,$leperm
  2520. stvx_u $out1,$x10,$out
  2521. stvx_u $out2,$x20,$out
  2522. addi $out,$out,0x30
  2523. bne Lxts_enc6x_steal
  2524. b Lxts_enc6x_done
  2525. .align 4
  2526. Lxts_enc6x_two:
  2527. vxor $out0,$in4,$twk0
  2528. vxor $out1,$in5,$twk1
  2529. vxor $out2,$out2,$out2
  2530. vxor $out3,$out3,$out3
  2531. vxor $out4,$out4,$out4
  2532. bl _aesp8_xts_enc5x
  2533. le?vperm $out0,$out0,$out0,$leperm
  2534. vmr $twk0,$twk2 # unused tweak
  2535. vxor $tmp,$out1,$twk2 # last block prep for stealing
  2536. le?vperm $out1,$out1,$out1,$leperm
  2537. stvx_u $out0,$x00,$out # store output
  2538. stvx_u $out1,$x10,$out
  2539. addi $out,$out,0x20
  2540. bne Lxts_enc6x_steal
  2541. b Lxts_enc6x_done
  2542. .align 4
  2543. Lxts_enc6x_one:
  2544. vxor $out0,$in5,$twk0
  2545. nop
  2546. Loop_xts_enc1x:
  2547. vcipher $out0,$out0,v24
  2548. lvx v24,$x20,$key_ # round[3]
  2549. addi $key_,$key_,0x20
  2550. vcipher $out0,$out0,v25
  2551. lvx v25,$x10,$key_ # round[4]
  2552. bdnz Loop_xts_enc1x
  2553. add $inp,$inp,$taillen
  2554. cmpwi $taillen,0
  2555. vcipher $out0,$out0,v24
  2556. subi $inp,$inp,16
  2557. vcipher $out0,$out0,v25
  2558. lvsr $inpperm,0,$taillen
  2559. vcipher $out0,$out0,v26
  2560. lvx_u $in0,0,$inp
  2561. vcipher $out0,$out0,v27
  2562. addi $key_,$sp,$FRAME+15 # rewind $key_
  2563. vcipher $out0,$out0,v28
  2564. lvx v24,$x00,$key_ # re-pre-load round[1]
  2565. vcipher $out0,$out0,v29
  2566. lvx v25,$x10,$key_ # re-pre-load round[2]
  2567. vxor $twk0,$twk0,v31
  2568. le?vperm $in0,$in0,$in0,$leperm
  2569. vcipher $out0,$out0,v30
  2570. vperm $in0,$in0,$in0,$inpperm
  2571. vcipherlast $out0,$out0,$twk0
  2572. vmr $twk0,$twk1 # unused tweak
  2573. vxor $tmp,$out0,$twk1 # last block prep for stealing
  2574. le?vperm $out0,$out0,$out0,$leperm
  2575. stvx_u $out0,$x00,$out # store output
  2576. addi $out,$out,0x10
  2577. bne Lxts_enc6x_steal
  2578. b Lxts_enc6x_done
  2579. .align 4
  2580. Lxts_enc6x_zero:
  2581. cmpwi $taillen,0
  2582. beq Lxts_enc6x_done
  2583. add $inp,$inp,$taillen
  2584. subi $inp,$inp,16
  2585. lvx_u $in0,0,$inp
  2586. lvsr $inpperm,0,$taillen # $in5 is no more
  2587. le?vperm $in0,$in0,$in0,$leperm
  2588. vperm $in0,$in0,$in0,$inpperm
  2589. vxor $tmp,$tmp,$twk0
  2590. Lxts_enc6x_steal:
  2591. vxor $in0,$in0,$twk0
  2592. vxor $out0,$out0,$out0
  2593. vspltisb $out1,-1
  2594. vperm $out0,$out0,$out1,$inpperm
  2595. vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
  2596. subi r30,$out,17
  2597. subi $out,$out,16
  2598. mtctr $taillen
  2599. Loop_xts_enc6x_steal:
  2600. lbzu r0,1(r30)
  2601. stb r0,16(r30)
  2602. bdnz Loop_xts_enc6x_steal
  2603. li $taillen,0
  2604. mtctr $rounds
  2605. b Loop_xts_enc1x # one more time...
  2606. .align 4
  2607. Lxts_enc6x_done:
  2608. ${UCMP}i $ivp,0
  2609. beq Lxts_enc6x_ret
  2610. vxor $tweak,$twk0,$rndkey0
  2611. le?vperm $tweak,$tweak,$tweak,$leperm
  2612. stvx_u $tweak,0,$ivp
  2613. Lxts_enc6x_ret:
  2614. mtlr r11
  2615. li r10,`$FRAME+15`
  2616. li r11,`$FRAME+31`
  2617. stvx $seven,r10,$sp # wipe copies of round keys
  2618. addi r10,r10,32
  2619. stvx $seven,r11,$sp
  2620. addi r11,r11,32
  2621. stvx $seven,r10,$sp
  2622. addi r10,r10,32
  2623. stvx $seven,r11,$sp
  2624. addi r11,r11,32
  2625. stvx $seven,r10,$sp
  2626. addi r10,r10,32
  2627. stvx $seven,r11,$sp
  2628. addi r11,r11,32
  2629. stvx $seven,r10,$sp
  2630. addi r10,r10,32
  2631. stvx $seven,r11,$sp
  2632. addi r11,r11,32
  2633. mtspr 256,$vrsave
  2634. lvx v20,r10,$sp # ABI says so
  2635. addi r10,r10,32
  2636. lvx v21,r11,$sp
  2637. addi r11,r11,32
  2638. lvx v22,r10,$sp
  2639. addi r10,r10,32
  2640. lvx v23,r11,$sp
  2641. addi r11,r11,32
  2642. lvx v24,r10,$sp
  2643. addi r10,r10,32
  2644. lvx v25,r11,$sp
  2645. addi r11,r11,32
  2646. lvx v26,r10,$sp
  2647. addi r10,r10,32
  2648. lvx v27,r11,$sp
  2649. addi r11,r11,32
  2650. lvx v28,r10,$sp
  2651. addi r10,r10,32
  2652. lvx v29,r11,$sp
  2653. addi r11,r11,32
  2654. lvx v30,r10,$sp
  2655. lvx v31,r11,$sp
  2656. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2657. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2658. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2659. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2660. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2661. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2662. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  2663. blr
  2664. .long 0
  2665. .byte 0,12,0x04,1,0x80,6,6,0
  2666. .long 0
  2667. .align 5
  2668. _aesp8_xts_enc5x:
  2669. vcipher $out0,$out0,v24
  2670. vcipher $out1,$out1,v24
  2671. vcipher $out2,$out2,v24
  2672. vcipher $out3,$out3,v24
  2673. vcipher $out4,$out4,v24
  2674. lvx v24,$x20,$key_ # round[3]
  2675. addi $key_,$key_,0x20
  2676. vcipher $out0,$out0,v25
  2677. vcipher $out1,$out1,v25
  2678. vcipher $out2,$out2,v25
  2679. vcipher $out3,$out3,v25
  2680. vcipher $out4,$out4,v25
  2681. lvx v25,$x10,$key_ # round[4]
  2682. bdnz _aesp8_xts_enc5x
  2683. add $inp,$inp,$taillen
  2684. cmpwi $taillen,0
  2685. vcipher $out0,$out0,v24
  2686. vcipher $out1,$out1,v24
  2687. vcipher $out2,$out2,v24
  2688. vcipher $out3,$out3,v24
  2689. vcipher $out4,$out4,v24
  2690. subi $inp,$inp,16
  2691. vcipher $out0,$out0,v25
  2692. vcipher $out1,$out1,v25
  2693. vcipher $out2,$out2,v25
  2694. vcipher $out3,$out3,v25
  2695. vcipher $out4,$out4,v25
  2696. vxor $twk0,$twk0,v31
  2697. vcipher $out0,$out0,v26
  2698. lvsr $inpperm,0,$taillen # $in5 is no more
  2699. vcipher $out1,$out1,v26
  2700. vcipher $out2,$out2,v26
  2701. vcipher $out3,$out3,v26
  2702. vcipher $out4,$out4,v26
  2703. vxor $in1,$twk1,v31
  2704. vcipher $out0,$out0,v27
  2705. lvx_u $in0,0,$inp
  2706. vcipher $out1,$out1,v27
  2707. vcipher $out2,$out2,v27
  2708. vcipher $out3,$out3,v27
  2709. vcipher $out4,$out4,v27
  2710. vxor $in2,$twk2,v31
  2711. addi $key_,$sp,$FRAME+15 # rewind $key_
  2712. vcipher $out0,$out0,v28
  2713. vcipher $out1,$out1,v28
  2714. vcipher $out2,$out2,v28
  2715. vcipher $out3,$out3,v28
  2716. vcipher $out4,$out4,v28
  2717. lvx v24,$x00,$key_ # re-pre-load round[1]
  2718. vxor $in3,$twk3,v31
  2719. vcipher $out0,$out0,v29
  2720. le?vperm $in0,$in0,$in0,$leperm
  2721. vcipher $out1,$out1,v29
  2722. vcipher $out2,$out2,v29
  2723. vcipher $out3,$out3,v29
  2724. vcipher $out4,$out4,v29
  2725. lvx v25,$x10,$key_ # re-pre-load round[2]
  2726. vxor $in4,$twk4,v31
  2727. vcipher $out0,$out0,v30
  2728. vperm $in0,$in0,$in0,$inpperm
  2729. vcipher $out1,$out1,v30
  2730. vcipher $out2,$out2,v30
  2731. vcipher $out3,$out3,v30
  2732. vcipher $out4,$out4,v30
  2733. vcipherlast $out0,$out0,$twk0
  2734. vcipherlast $out1,$out1,$in1
  2735. vcipherlast $out2,$out2,$in2
  2736. vcipherlast $out3,$out3,$in3
  2737. vcipherlast $out4,$out4,$in4
  2738. blr
  2739. .long 0
  2740. .byte 0,12,0x14,0,0,0,0,0
  2741. .align 5
  2742. _aesp8_xts_decrypt6x:
  2743. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2744. mflr r11
  2745. li r7,`$FRAME+8*16+15`
  2746. li r3,`$FRAME+8*16+31`
  2747. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2748. stvx v20,r7,$sp # ABI says so
  2749. addi r7,r7,32
  2750. stvx v21,r3,$sp
  2751. addi r3,r3,32
  2752. stvx v22,r7,$sp
  2753. addi r7,r7,32
  2754. stvx v23,r3,$sp
  2755. addi r3,r3,32
  2756. stvx v24,r7,$sp
  2757. addi r7,r7,32
  2758. stvx v25,r3,$sp
  2759. addi r3,r3,32
  2760. stvx v26,r7,$sp
  2761. addi r7,r7,32
  2762. stvx v27,r3,$sp
  2763. addi r3,r3,32
  2764. stvx v28,r7,$sp
  2765. addi r7,r7,32
  2766. stvx v29,r3,$sp
  2767. addi r3,r3,32
  2768. stvx v30,r7,$sp
  2769. stvx v31,r3,$sp
  2770. li r0,-1
  2771. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2772. li $x10,0x10
  2773. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2774. li $x20,0x20
  2775. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2776. li $x30,0x30
  2777. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2778. li $x40,0x40
  2779. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2780. li $x50,0x50
  2781. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2782. li $x60,0x60
  2783. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2784. li $x70,0x70
  2785. mtspr 256,r0
  2786. subi $rounds,$rounds,3 # -4 in total
  2787. lvx $rndkey0,$x00,$key1 # load key schedule
  2788. lvx v30,$x10,$key1
  2789. addi $key1,$key1,0x20
  2790. lvx v31,$x00,$key1
  2791. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2792. addi $key_,$sp,$FRAME+15
  2793. mtctr $rounds
  2794. Load_xts_dec_key:
  2795. ?vperm v24,v30,v31,$keyperm
  2796. lvx v30,$x10,$key1
  2797. addi $key1,$key1,0x20
  2798. stvx v24,$x00,$key_ # off-load round[1]
  2799. ?vperm v25,v31,v30,$keyperm
  2800. lvx v31,$x00,$key1
  2801. stvx v25,$x10,$key_ # off-load round[2]
  2802. addi $key_,$key_,0x20
  2803. bdnz Load_xts_dec_key
  2804. lvx v26,$x10,$key1
  2805. ?vperm v24,v30,v31,$keyperm
  2806. lvx v27,$x20,$key1
  2807. stvx v24,$x00,$key_ # off-load round[3]
  2808. ?vperm v25,v31,v26,$keyperm
  2809. lvx v28,$x30,$key1
  2810. stvx v25,$x10,$key_ # off-load round[4]
  2811. addi $key_,$sp,$FRAME+15 # rewind $key_
  2812. ?vperm v26,v26,v27,$keyperm
  2813. lvx v29,$x40,$key1
  2814. ?vperm v27,v27,v28,$keyperm
  2815. lvx v30,$x50,$key1
  2816. ?vperm v28,v28,v29,$keyperm
  2817. lvx v31,$x60,$key1
  2818. ?vperm v29,v29,v30,$keyperm
  2819. lvx $twk5,$x70,$key1 # borrow $twk5
  2820. ?vperm v30,v30,v31,$keyperm
  2821. lvx v24,$x00,$key_ # pre-load round[1]
  2822. ?vperm v31,v31,$twk5,$keyperm
  2823. lvx v25,$x10,$key_ # pre-load round[2]
  2824. vperm $in0,$inout,$inptail,$inpperm
  2825. subi $inp,$inp,31 # undo "caller"
  2826. vxor $twk0,$tweak,$rndkey0
  2827. vsrab $tmp,$tweak,$seven # next tweak value
  2828. vaddubm $tweak,$tweak,$tweak
  2829. vsldoi $tmp,$tmp,$tmp,15
  2830. vand $tmp,$tmp,$eighty7
  2831. vxor $out0,$in0,$twk0
  2832. vxor $tweak,$tweak,$tmp
  2833. lvx_u $in1,$x10,$inp
  2834. vxor $twk1,$tweak,$rndkey0
  2835. vsrab $tmp,$tweak,$seven # next tweak value
  2836. vaddubm $tweak,$tweak,$tweak
  2837. vsldoi $tmp,$tmp,$tmp,15
  2838. le?vperm $in1,$in1,$in1,$leperm
  2839. vand $tmp,$tmp,$eighty7
  2840. vxor $out1,$in1,$twk1
  2841. vxor $tweak,$tweak,$tmp
  2842. lvx_u $in2,$x20,$inp
  2843. andi. $taillen,$len,15
  2844. vxor $twk2,$tweak,$rndkey0
  2845. vsrab $tmp,$tweak,$seven # next tweak value
  2846. vaddubm $tweak,$tweak,$tweak
  2847. vsldoi $tmp,$tmp,$tmp,15
  2848. le?vperm $in2,$in2,$in2,$leperm
  2849. vand $tmp,$tmp,$eighty7
  2850. vxor $out2,$in2,$twk2
  2851. vxor $tweak,$tweak,$tmp
  2852. lvx_u $in3,$x30,$inp
  2853. sub $len,$len,$taillen
  2854. vxor $twk3,$tweak,$rndkey0
  2855. vsrab $tmp,$tweak,$seven # next tweak value
  2856. vaddubm $tweak,$tweak,$tweak
  2857. vsldoi $tmp,$tmp,$tmp,15
  2858. le?vperm $in3,$in3,$in3,$leperm
  2859. vand $tmp,$tmp,$eighty7
  2860. vxor $out3,$in3,$twk3
  2861. vxor $tweak,$tweak,$tmp
  2862. lvx_u $in4,$x40,$inp
  2863. subi $len,$len,0x60
  2864. vxor $twk4,$tweak,$rndkey0
  2865. vsrab $tmp,$tweak,$seven # next tweak value
  2866. vaddubm $tweak,$tweak,$tweak
  2867. vsldoi $tmp,$tmp,$tmp,15
  2868. le?vperm $in4,$in4,$in4,$leperm
  2869. vand $tmp,$tmp,$eighty7
  2870. vxor $out4,$in4,$twk4
  2871. vxor $tweak,$tweak,$tmp
  2872. lvx_u $in5,$x50,$inp
  2873. addi $inp,$inp,0x60
  2874. vxor $twk5,$tweak,$rndkey0
  2875. vsrab $tmp,$tweak,$seven # next tweak value
  2876. vaddubm $tweak,$tweak,$tweak
  2877. vsldoi $tmp,$tmp,$tmp,15
  2878. le?vperm $in5,$in5,$in5,$leperm
  2879. vand $tmp,$tmp,$eighty7
  2880. vxor $out5,$in5,$twk5
  2881. vxor $tweak,$tweak,$tmp
  2882. vxor v31,v31,$rndkey0
  2883. mtctr $rounds
  2884. b Loop_xts_dec6x
  2885. .align 5
  2886. Loop_xts_dec6x:
  2887. vncipher $out0,$out0,v24
  2888. vncipher $out1,$out1,v24
  2889. vncipher $out2,$out2,v24
  2890. vncipher $out3,$out3,v24
  2891. vncipher $out4,$out4,v24
  2892. vncipher $out5,$out5,v24
  2893. lvx v24,$x20,$key_ # round[3]
  2894. addi $key_,$key_,0x20
  2895. vncipher $out0,$out0,v25
  2896. vncipher $out1,$out1,v25
  2897. vncipher $out2,$out2,v25
  2898. vncipher $out3,$out3,v25
  2899. vncipher $out4,$out4,v25
  2900. vncipher $out5,$out5,v25
  2901. lvx v25,$x10,$key_ # round[4]
  2902. bdnz Loop_xts_dec6x
  2903. subic $len,$len,96 # $len-=96
  2904. vxor $in0,$twk0,v31 # xor with last round key
  2905. vncipher $out0,$out0,v24
  2906. vncipher $out1,$out1,v24
  2907. vsrab $tmp,$tweak,$seven # next tweak value
  2908. vxor $twk0,$tweak,$rndkey0
  2909. vaddubm $tweak,$tweak,$tweak
  2910. vncipher $out2,$out2,v24
  2911. vncipher $out3,$out3,v24
  2912. vsldoi $tmp,$tmp,$tmp,15
  2913. vncipher $out4,$out4,v24
  2914. vncipher $out5,$out5,v24
  2915. subfe. r0,r0,r0 # borrow?-1:0
  2916. vand $tmp,$tmp,$eighty7
  2917. vncipher $out0,$out0,v25
  2918. vncipher $out1,$out1,v25
  2919. vxor $tweak,$tweak,$tmp
  2920. vncipher $out2,$out2,v25
  2921. vncipher $out3,$out3,v25
  2922. vxor $in1,$twk1,v31
  2923. vsrab $tmp,$tweak,$seven # next tweak value
  2924. vxor $twk1,$tweak,$rndkey0
  2925. vncipher $out4,$out4,v25
  2926. vncipher $out5,$out5,v25
  2927. and r0,r0,$len
  2928. vaddubm $tweak,$tweak,$tweak
  2929. vsldoi $tmp,$tmp,$tmp,15
  2930. vncipher $out0,$out0,v26
  2931. vncipher $out1,$out1,v26
  2932. vand $tmp,$tmp,$eighty7
  2933. vncipher $out2,$out2,v26
  2934. vncipher $out3,$out3,v26
  2935. vxor $tweak,$tweak,$tmp
  2936. vncipher $out4,$out4,v26
  2937. vncipher $out5,$out5,v26
  2938. add $inp,$inp,r0 # $inp is adjusted in such
  2939. # way that at exit from the
  2940. # loop inX-in5 are loaded
  2941. # with last "words"
  2942. vxor $in2,$twk2,v31
  2943. vsrab $tmp,$tweak,$seven # next tweak value
  2944. vxor $twk2,$tweak,$rndkey0
  2945. vaddubm $tweak,$tweak,$tweak
  2946. vncipher $out0,$out0,v27
  2947. vncipher $out1,$out1,v27
  2948. vsldoi $tmp,$tmp,$tmp,15
  2949. vncipher $out2,$out2,v27
  2950. vncipher $out3,$out3,v27
  2951. vand $tmp,$tmp,$eighty7
  2952. vncipher $out4,$out4,v27
  2953. vncipher $out5,$out5,v27
  2954. addi $key_,$sp,$FRAME+15 # rewind $key_
  2955. vxor $tweak,$tweak,$tmp
  2956. vncipher $out0,$out0,v28
  2957. vncipher $out1,$out1,v28
  2958. vxor $in3,$twk3,v31
  2959. vsrab $tmp,$tweak,$seven # next tweak value
  2960. vxor $twk3,$tweak,$rndkey0
  2961. vncipher $out2,$out2,v28
  2962. vncipher $out3,$out3,v28
  2963. vaddubm $tweak,$tweak,$tweak
  2964. vsldoi $tmp,$tmp,$tmp,15
  2965. vncipher $out4,$out4,v28
  2966. vncipher $out5,$out5,v28
  2967. lvx v24,$x00,$key_ # re-pre-load round[1]
  2968. vand $tmp,$tmp,$eighty7
  2969. vncipher $out0,$out0,v29
  2970. vncipher $out1,$out1,v29
  2971. vxor $tweak,$tweak,$tmp
  2972. vncipher $out2,$out2,v29
  2973. vncipher $out3,$out3,v29
  2974. vxor $in4,$twk4,v31
  2975. vsrab $tmp,$tweak,$seven # next tweak value
  2976. vxor $twk4,$tweak,$rndkey0
  2977. vncipher $out4,$out4,v29
  2978. vncipher $out5,$out5,v29
  2979. lvx v25,$x10,$key_ # re-pre-load round[2]
  2980. vaddubm $tweak,$tweak,$tweak
  2981. vsldoi $tmp,$tmp,$tmp,15
  2982. vncipher $out0,$out0,v30
  2983. vncipher $out1,$out1,v30
  2984. vand $tmp,$tmp,$eighty7
  2985. vncipher $out2,$out2,v30
  2986. vncipher $out3,$out3,v30
  2987. vxor $tweak,$tweak,$tmp
  2988. vncipher $out4,$out4,v30
  2989. vncipher $out5,$out5,v30
  2990. vxor $in5,$twk5,v31
  2991. vsrab $tmp,$tweak,$seven # next tweak value
  2992. vxor $twk5,$tweak,$rndkey0
  2993. vncipherlast $out0,$out0,$in0
  2994. lvx_u $in0,$x00,$inp # load next input block
  2995. vaddubm $tweak,$tweak,$tweak
  2996. vsldoi $tmp,$tmp,$tmp,15
  2997. vncipherlast $out1,$out1,$in1
  2998. lvx_u $in1,$x10,$inp
  2999. vncipherlast $out2,$out2,$in2
  3000. le?vperm $in0,$in0,$in0,$leperm
  3001. lvx_u $in2,$x20,$inp
  3002. vand $tmp,$tmp,$eighty7
  3003. vncipherlast $out3,$out3,$in3
  3004. le?vperm $in1,$in1,$in1,$leperm
  3005. lvx_u $in3,$x30,$inp
  3006. vncipherlast $out4,$out4,$in4
  3007. le?vperm $in2,$in2,$in2,$leperm
  3008. lvx_u $in4,$x40,$inp
  3009. vxor $tweak,$tweak,$tmp
  3010. vncipherlast $out5,$out5,$in5
  3011. le?vperm $in3,$in3,$in3,$leperm
  3012. lvx_u $in5,$x50,$inp
  3013. addi $inp,$inp,0x60
  3014. le?vperm $in4,$in4,$in4,$leperm
  3015. le?vperm $in5,$in5,$in5,$leperm
  3016. le?vperm $out0,$out0,$out0,$leperm
  3017. le?vperm $out1,$out1,$out1,$leperm
  3018. stvx_u $out0,$x00,$out # store output
  3019. vxor $out0,$in0,$twk0
  3020. le?vperm $out2,$out2,$out2,$leperm
  3021. stvx_u $out1,$x10,$out
  3022. vxor $out1,$in1,$twk1
  3023. le?vperm $out3,$out3,$out3,$leperm
  3024. stvx_u $out2,$x20,$out
  3025. vxor $out2,$in2,$twk2
  3026. le?vperm $out4,$out4,$out4,$leperm
  3027. stvx_u $out3,$x30,$out
  3028. vxor $out3,$in3,$twk3
  3029. le?vperm $out5,$out5,$out5,$leperm
  3030. stvx_u $out4,$x40,$out
  3031. vxor $out4,$in4,$twk4
  3032. stvx_u $out5,$x50,$out
  3033. vxor $out5,$in5,$twk5
  3034. addi $out,$out,0x60
  3035. mtctr $rounds
  3036. beq Loop_xts_dec6x # did $len-=96 borrow?
  3037. addic. $len,$len,0x60
  3038. beq Lxts_dec6x_zero
  3039. cmpwi $len,0x20
  3040. blt Lxts_dec6x_one
  3041. nop
  3042. beq Lxts_dec6x_two
  3043. cmpwi $len,0x40
  3044. blt Lxts_dec6x_three
  3045. nop
  3046. beq Lxts_dec6x_four
  3047. Lxts_dec6x_five:
  3048. vxor $out0,$in1,$twk0
  3049. vxor $out1,$in2,$twk1
  3050. vxor $out2,$in3,$twk2
  3051. vxor $out3,$in4,$twk3
  3052. vxor $out4,$in5,$twk4
  3053. bl _aesp8_xts_dec5x
  3054. le?vperm $out0,$out0,$out0,$leperm
  3055. vmr $twk0,$twk5 # unused tweak
  3056. vxor $twk1,$tweak,$rndkey0
  3057. le?vperm $out1,$out1,$out1,$leperm
  3058. stvx_u $out0,$x00,$out # store output
  3059. vxor $out0,$in0,$twk1
  3060. le?vperm $out2,$out2,$out2,$leperm
  3061. stvx_u $out1,$x10,$out
  3062. le?vperm $out3,$out3,$out3,$leperm
  3063. stvx_u $out2,$x20,$out
  3064. le?vperm $out4,$out4,$out4,$leperm
  3065. stvx_u $out3,$x30,$out
  3066. stvx_u $out4,$x40,$out
  3067. addi $out,$out,0x50
  3068. bne Lxts_dec6x_steal
  3069. b Lxts_dec6x_done
  3070. .align 4
  3071. Lxts_dec6x_four:
  3072. vxor $out0,$in2,$twk0
  3073. vxor $out1,$in3,$twk1
  3074. vxor $out2,$in4,$twk2
  3075. vxor $out3,$in5,$twk3
  3076. vxor $out4,$out4,$out4
  3077. bl _aesp8_xts_dec5x
  3078. le?vperm $out0,$out0,$out0,$leperm
  3079. vmr $twk0,$twk4 # unused tweak
  3080. vmr $twk1,$twk5
  3081. le?vperm $out1,$out1,$out1,$leperm
  3082. stvx_u $out0,$x00,$out # store output
  3083. vxor $out0,$in0,$twk5
  3084. le?vperm $out2,$out2,$out2,$leperm
  3085. stvx_u $out1,$x10,$out
  3086. le?vperm $out3,$out3,$out3,$leperm
  3087. stvx_u $out2,$x20,$out
  3088. stvx_u $out3,$x30,$out
  3089. addi $out,$out,0x40
  3090. bne Lxts_dec6x_steal
  3091. b Lxts_dec6x_done
  3092. .align 4
  3093. Lxts_dec6x_three:
  3094. vxor $out0,$in3,$twk0
  3095. vxor $out1,$in4,$twk1
  3096. vxor $out2,$in5,$twk2
  3097. vxor $out3,$out3,$out3
  3098. vxor $out4,$out4,$out4
  3099. bl _aesp8_xts_dec5x
  3100. le?vperm $out0,$out0,$out0,$leperm
  3101. vmr $twk0,$twk3 # unused tweak
  3102. vmr $twk1,$twk4
  3103. le?vperm $out1,$out1,$out1,$leperm
  3104. stvx_u $out0,$x00,$out # store output
  3105. vxor $out0,$in0,$twk4
  3106. le?vperm $out2,$out2,$out2,$leperm
  3107. stvx_u $out1,$x10,$out
  3108. stvx_u $out2,$x20,$out
  3109. addi $out,$out,0x30
  3110. bne Lxts_dec6x_steal
  3111. b Lxts_dec6x_done
  3112. .align 4
  3113. Lxts_dec6x_two:
  3114. vxor $out0,$in4,$twk0
  3115. vxor $out1,$in5,$twk1
  3116. vxor $out2,$out2,$out2
  3117. vxor $out3,$out3,$out3
  3118. vxor $out4,$out4,$out4
  3119. bl _aesp8_xts_dec5x
  3120. le?vperm $out0,$out0,$out0,$leperm
  3121. vmr $twk0,$twk2 # unused tweak
  3122. vmr $twk1,$twk3
  3123. le?vperm $out1,$out1,$out1,$leperm
  3124. stvx_u $out0,$x00,$out # store output
  3125. vxor $out0,$in0,$twk3
  3126. stvx_u $out1,$x10,$out
  3127. addi $out,$out,0x20
  3128. bne Lxts_dec6x_steal
  3129. b Lxts_dec6x_done
  3130. .align 4
  3131. Lxts_dec6x_one:
  3132. vxor $out0,$in5,$twk0
  3133. nop
  3134. Loop_xts_dec1x:
  3135. vncipher $out0,$out0,v24
  3136. lvx v24,$x20,$key_ # round[3]
  3137. addi $key_,$key_,0x20
  3138. vncipher $out0,$out0,v25
  3139. lvx v25,$x10,$key_ # round[4]
  3140. bdnz Loop_xts_dec1x
  3141. subi r0,$taillen,1
  3142. vncipher $out0,$out0,v24
  3143. andi. r0,r0,16
  3144. cmpwi $taillen,0
  3145. vncipher $out0,$out0,v25
  3146. sub $inp,$inp,r0
  3147. vncipher $out0,$out0,v26
  3148. lvx_u $in0,0,$inp
  3149. vncipher $out0,$out0,v27
  3150. addi $key_,$sp,$FRAME+15 # rewind $key_
  3151. vncipher $out0,$out0,v28
  3152. lvx v24,$x00,$key_ # re-pre-load round[1]
  3153. vncipher $out0,$out0,v29
  3154. lvx v25,$x10,$key_ # re-pre-load round[2]
  3155. vxor $twk0,$twk0,v31
  3156. le?vperm $in0,$in0,$in0,$leperm
  3157. vncipher $out0,$out0,v30
  3158. mtctr $rounds
  3159. vncipherlast $out0,$out0,$twk0
  3160. vmr $twk0,$twk1 # unused tweak
  3161. vmr $twk1,$twk2
  3162. le?vperm $out0,$out0,$out0,$leperm
  3163. stvx_u $out0,$x00,$out # store output
  3164. addi $out,$out,0x10
  3165. vxor $out0,$in0,$twk2
  3166. bne Lxts_dec6x_steal
  3167. b Lxts_dec6x_done
  3168. .align 4
  3169. Lxts_dec6x_zero:
  3170. cmpwi $taillen,0
  3171. beq Lxts_dec6x_done
  3172. lvx_u $in0,0,$inp
  3173. le?vperm $in0,$in0,$in0,$leperm
  3174. vxor $out0,$in0,$twk1
  3175. Lxts_dec6x_steal:
  3176. vncipher $out0,$out0,v24
  3177. lvx v24,$x20,$key_ # round[3]
  3178. addi $key_,$key_,0x20
  3179. vncipher $out0,$out0,v25
  3180. lvx v25,$x10,$key_ # round[4]
  3181. bdnz Lxts_dec6x_steal
  3182. add $inp,$inp,$taillen
  3183. vncipher $out0,$out0,v24
  3184. cmpwi $taillen,0
  3185. vncipher $out0,$out0,v25
  3186. lvx_u $in0,0,$inp
  3187. vncipher $out0,$out0,v26
  3188. lvsr $inpperm,0,$taillen # $in5 is no more
  3189. vncipher $out0,$out0,v27
  3190. addi $key_,$sp,$FRAME+15 # rewind $key_
  3191. vncipher $out0,$out0,v28
  3192. lvx v24,$x00,$key_ # re-pre-load round[1]
  3193. vncipher $out0,$out0,v29
  3194. lvx v25,$x10,$key_ # re-pre-load round[2]
  3195. vxor $twk1,$twk1,v31
  3196. le?vperm $in0,$in0,$in0,$leperm
  3197. vncipher $out0,$out0,v30
  3198. vperm $in0,$in0,$in0,$inpperm
  3199. vncipherlast $tmp,$out0,$twk1
  3200. le?vperm $out0,$tmp,$tmp,$leperm
  3201. le?stvx_u $out0,0,$out
  3202. be?stvx_u $tmp,0,$out
  3203. vxor $out0,$out0,$out0
  3204. vspltisb $out1,-1
  3205. vperm $out0,$out0,$out1,$inpperm
  3206. vsel $out0,$in0,$tmp,$out0
  3207. vxor $out0,$out0,$twk0
  3208. subi r30,$out,1
  3209. mtctr $taillen
  3210. Loop_xts_dec6x_steal:
  3211. lbzu r0,1(r30)
  3212. stb r0,16(r30)
  3213. bdnz Loop_xts_dec6x_steal
  3214. li $taillen,0
  3215. mtctr $rounds
  3216. b Loop_xts_dec1x # one more time...
  3217. .align 4
  3218. Lxts_dec6x_done:
  3219. ${UCMP}i $ivp,0
  3220. beq Lxts_dec6x_ret
  3221. vxor $tweak,$twk0,$rndkey0
  3222. le?vperm $tweak,$tweak,$tweak,$leperm
  3223. stvx_u $tweak,0,$ivp
  3224. Lxts_dec6x_ret:
  3225. mtlr r11
  3226. li r10,`$FRAME+15`
  3227. li r11,`$FRAME+31`
  3228. stvx $seven,r10,$sp # wipe copies of round keys
  3229. addi r10,r10,32
  3230. stvx $seven,r11,$sp
  3231. addi r11,r11,32
  3232. stvx $seven,r10,$sp
  3233. addi r10,r10,32
  3234. stvx $seven,r11,$sp
  3235. addi r11,r11,32
  3236. stvx $seven,r10,$sp
  3237. addi r10,r10,32
  3238. stvx $seven,r11,$sp
  3239. addi r11,r11,32
  3240. stvx $seven,r10,$sp
  3241. addi r10,r10,32
  3242. stvx $seven,r11,$sp
  3243. addi r11,r11,32
  3244. mtspr 256,$vrsave
  3245. lvx v20,r10,$sp # ABI says so
  3246. addi r10,r10,32
  3247. lvx v21,r11,$sp
  3248. addi r11,r11,32
  3249. lvx v22,r10,$sp
  3250. addi r10,r10,32
  3251. lvx v23,r11,$sp
  3252. addi r11,r11,32
  3253. lvx v24,r10,$sp
  3254. addi r10,r10,32
  3255. lvx v25,r11,$sp
  3256. addi r11,r11,32
  3257. lvx v26,r10,$sp
  3258. addi r10,r10,32
  3259. lvx v27,r11,$sp
  3260. addi r11,r11,32
  3261. lvx v28,r10,$sp
  3262. addi r10,r10,32
  3263. lvx v29,r11,$sp
  3264. addi r11,r11,32
  3265. lvx v30,r10,$sp
  3266. lvx v31,r11,$sp
  3267. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  3268. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  3269. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  3270. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  3271. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  3272. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  3273. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  3274. blr
  3275. .long 0
  3276. .byte 0,12,0x04,1,0x80,6,6,0
  3277. .long 0
  3278. .align 5
  3279. _aesp8_xts_dec5x:
  3280. vncipher $out0,$out0,v24
  3281. vncipher $out1,$out1,v24
  3282. vncipher $out2,$out2,v24
  3283. vncipher $out3,$out3,v24
  3284. vncipher $out4,$out4,v24
  3285. lvx v24,$x20,$key_ # round[3]
  3286. addi $key_,$key_,0x20
  3287. vncipher $out0,$out0,v25
  3288. vncipher $out1,$out1,v25
  3289. vncipher $out2,$out2,v25
  3290. vncipher $out3,$out3,v25
  3291. vncipher $out4,$out4,v25
  3292. lvx v25,$x10,$key_ # round[4]
  3293. bdnz _aesp8_xts_dec5x
  3294. subi r0,$taillen,1
  3295. vncipher $out0,$out0,v24
  3296. vncipher $out1,$out1,v24
  3297. vncipher $out2,$out2,v24
  3298. vncipher $out3,$out3,v24
  3299. vncipher $out4,$out4,v24
  3300. andi. r0,r0,16
  3301. cmpwi $taillen,0
  3302. vncipher $out0,$out0,v25
  3303. vncipher $out1,$out1,v25
  3304. vncipher $out2,$out2,v25
  3305. vncipher $out3,$out3,v25
  3306. vncipher $out4,$out4,v25
  3307. vxor $twk0,$twk0,v31
  3308. sub $inp,$inp,r0
  3309. vncipher $out0,$out0,v26
  3310. vncipher $out1,$out1,v26
  3311. vncipher $out2,$out2,v26
  3312. vncipher $out3,$out3,v26
  3313. vncipher $out4,$out4,v26
  3314. vxor $in1,$twk1,v31
  3315. vncipher $out0,$out0,v27
  3316. lvx_u $in0,0,$inp
  3317. vncipher $out1,$out1,v27
  3318. vncipher $out2,$out2,v27
  3319. vncipher $out3,$out3,v27
  3320. vncipher $out4,$out4,v27
  3321. vxor $in2,$twk2,v31
  3322. addi $key_,$sp,$FRAME+15 # rewind $key_
  3323. vncipher $out0,$out0,v28
  3324. vncipher $out1,$out1,v28
  3325. vncipher $out2,$out2,v28
  3326. vncipher $out3,$out3,v28
  3327. vncipher $out4,$out4,v28
  3328. lvx v24,$x00,$key_ # re-pre-load round[1]
  3329. vxor $in3,$twk3,v31
  3330. vncipher $out0,$out0,v29
  3331. le?vperm $in0,$in0,$in0,$leperm
  3332. vncipher $out1,$out1,v29
  3333. vncipher $out2,$out2,v29
  3334. vncipher $out3,$out3,v29
  3335. vncipher $out4,$out4,v29
  3336. lvx v25,$x10,$key_ # re-pre-load round[2]
  3337. vxor $in4,$twk4,v31
  3338. vncipher $out0,$out0,v30
  3339. vncipher $out1,$out1,v30
  3340. vncipher $out2,$out2,v30
  3341. vncipher $out3,$out3,v30
  3342. vncipher $out4,$out4,v30
  3343. vncipherlast $out0,$out0,$twk0
  3344. vncipherlast $out1,$out1,$in1
  3345. vncipherlast $out2,$out2,$in2
  3346. vncipherlast $out3,$out3,$in3
  3347. vncipherlast $out4,$out4,$in4
  3348. mtctr $rounds
  3349. blr
  3350. .long 0
  3351. .byte 0,12,0x14,0,0,0,0,0
  3352. ___
  3353. }} }}}
  3354. my $consts=1;
  3355. foreach(split("\n",$code)) {
  3356. s/\`([^\`]*)\`/eval($1)/geo;
  3357. # constants table endian-specific conversion
  3358. if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
  3359. my $conv=$3;
  3360. my @bytes=();
  3361. # convert to endian-agnostic format
  3362. if ($1 eq "long") {
  3363. foreach (split(/,\s*/,$2)) {
  3364. my $l = /^0/?oct:int;
  3365. push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
  3366. }
  3367. } else {
  3368. @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
  3369. }
  3370. # little-endian conversion
  3371. if ($flavour =~ /le$/o) {
  3372. SWITCH: for($conv) {
  3373. /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
  3374. /\?rev/ && do { @bytes=reverse(@bytes); last; };
  3375. }
  3376. }
  3377. #emit
  3378. print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
  3379. next;
  3380. }
  3381. $consts=0 if (m/Lconsts:/o); # end of table
  3382. # instructions prefixed with '?' are endian-specific and need
  3383. # to be adjusted accordingly...
  3384. if ($flavour =~ /le$/o) { # little-endian
  3385. s/le\?//o or
  3386. s/be\?/#be#/o or
  3387. s/\?lvsr/lvsl/o or
  3388. s/\?lvsl/lvsr/o or
  3389. s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
  3390. s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
  3391. s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
  3392. } else { # big-endian
  3393. s/le\?/#le#/o or
  3394. s/be\?//o or
  3395. s/\?([a-z]+)/$1/o;
  3396. }
  3397. print $_,"\n";
  3398. }
  3399. close STDOUT;