aesp8-ppc.pl 89 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726
  1. #!/usr/bin/env perl
  2. #
  3. # ====================================================================
  4. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  5. # project. The module is, however, dual licensed under OpenSSL and
  6. # CRYPTOGAMS licenses depending on where you obtain it. For further
  7. # details see http://www.openssl.org/~appro/cryptogams/.
  8. # ====================================================================
  9. #
  10. # This module implements support for AES instructions as per PowerISA
  11. # specification version 2.07, first implemented by POWER8 processor.
  12. # The module is endian-agnostic in sense that it supports both big-
  13. # and little-endian cases. Data alignment in parallelizable modes is
  14. # handled with VSX loads and stores, which implies MSR.VSX flag being
  15. # set. It should also be noted that ISA specification doesn't prohibit
  16. # alignment exceptions for these instructions on page boundaries.
  17. # Initially alignment was handled in pure AltiVec/VMX way [when data
  18. # is aligned programmatically, which in turn guarantees exception-
  19. # free execution], but it turned to hamper performance when vcipher
  20. # instructions are interleaved. It's reckoned that eventual
  21. # misalignment penalties at page boundaries are in average lower
  22. # than additional overhead in pure AltiVec approach.
  23. #
  24. # May 2016
  25. #
  26. # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
  27. # systems were measured.
  28. #
  29. ######################################################################
  30. # Current large-block performance in cycles per byte processed with
  31. # 128-bit key (less is better).
  32. #
  33. # CBC en-/decrypt CTR XTS
  34. # POWER8[le] 3.96/0.72 0.74 1.1
  35. # POWER8[be] 3.75/0.65 0.66 1.0
  36. $flavour = shift;
  37. if ($flavour =~ /64/) {
  38. $SIZE_T =8;
  39. $LRSAVE =2*$SIZE_T;
  40. $STU ="stdu";
  41. $POP ="ld";
  42. $PUSH ="std";
  43. $UCMP ="cmpld";
  44. $SHL ="sldi";
  45. } elsif ($flavour =~ /32/) {
  46. $SIZE_T =4;
  47. $LRSAVE =$SIZE_T;
  48. $STU ="stwu";
  49. $POP ="lwz";
  50. $PUSH ="stw";
  51. $UCMP ="cmplw";
  52. $SHL ="slwi";
  53. } else { die "nonsense $flavour"; }
  54. $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
  55. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  56. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  57. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  58. die "can't locate ppc-xlate.pl";
  59. open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
  60. $FRAME=8*$SIZE_T;
  61. $prefix="aes_p8";
  62. $sp="r1";
  63. $vrsave="r12";
  64. #########################################################################
  65. {{{ # Key setup procedures #
  66. my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
  67. my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
  68. my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
  69. $code.=<<___;
  70. .machine "any"
  71. .text
  72. .align 7
  73. rcon:
  74. .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
  75. .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
  76. .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
  77. .long 0,0,0,0 ?asis
  78. Lconsts:
  79. mflr r0
  80. bcl 20,31,\$+4
  81. mflr $ptr #vvvvv "distance between . and rcon
  82. addi $ptr,$ptr,-0x48
  83. mtlr r0
  84. blr
  85. .long 0
  86. .byte 0,12,0x14,0,0,0,0,0
  87. .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
  88. .globl .${prefix}_set_encrypt_key
  89. .align 5
  90. .${prefix}_set_encrypt_key:
  91. Lset_encrypt_key:
  92. mflr r11
  93. $PUSH r11,$LRSAVE($sp)
  94. li $ptr,-1
  95. ${UCMP}i $inp,0
  96. beq- Lenc_key_abort # if ($inp==0) return -1;
  97. ${UCMP}i $out,0
  98. beq- Lenc_key_abort # if ($out==0) return -1;
  99. li $ptr,-2
  100. cmpwi $bits,128
  101. blt- Lenc_key_abort
  102. cmpwi $bits,256
  103. bgt- Lenc_key_abort
  104. andi. r0,$bits,0x3f
  105. bne- Lenc_key_abort
  106. lis r0,0xfff0
  107. mfspr $vrsave,256
  108. mtspr 256,r0
  109. bl Lconsts
  110. mtlr r11
  111. neg r9,$inp
  112. lvx $in0,0,$inp
  113. addi $inp,$inp,15 # 15 is not typo
  114. lvsr $key,0,r9 # borrow $key
  115. li r8,0x20
  116. cmpwi $bits,192
  117. lvx $in1,0,$inp
  118. le?vspltisb $mask,0x0f # borrow $mask
  119. lvx $rcon,0,$ptr
  120. le?vxor $key,$key,$mask # adjust for byte swap
  121. lvx $mask,r8,$ptr
  122. addi $ptr,$ptr,0x10
  123. vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
  124. li $cnt,8
  125. vxor $zero,$zero,$zero
  126. mtctr $cnt
  127. ?lvsr $outperm,0,$out
  128. vspltisb $outmask,-1
  129. lvx $outhead,0,$out
  130. ?vperm $outmask,$zero,$outmask,$outperm
  131. blt Loop128
  132. addi $inp,$inp,8
  133. beq L192
  134. addi $inp,$inp,8
  135. b L256
  136. .align 4
  137. Loop128:
  138. vperm $key,$in0,$in0,$mask # rotate-n-splat
  139. vsldoi $tmp,$zero,$in0,12 # >>32
  140. vperm $outtail,$in0,$in0,$outperm # rotate
  141. vsel $stage,$outhead,$outtail,$outmask
  142. vmr $outhead,$outtail
  143. vcipherlast $key,$key,$rcon
  144. stvx $stage,0,$out
  145. addi $out,$out,16
  146. vxor $in0,$in0,$tmp
  147. vsldoi $tmp,$zero,$tmp,12 # >>32
  148. vxor $in0,$in0,$tmp
  149. vsldoi $tmp,$zero,$tmp,12 # >>32
  150. vxor $in0,$in0,$tmp
  151. vadduwm $rcon,$rcon,$rcon
  152. vxor $in0,$in0,$key
  153. bdnz Loop128
  154. lvx $rcon,0,$ptr # last two round keys
  155. vperm $key,$in0,$in0,$mask # rotate-n-splat
  156. vsldoi $tmp,$zero,$in0,12 # >>32
  157. vperm $outtail,$in0,$in0,$outperm # rotate
  158. vsel $stage,$outhead,$outtail,$outmask
  159. vmr $outhead,$outtail
  160. vcipherlast $key,$key,$rcon
  161. stvx $stage,0,$out
  162. addi $out,$out,16
  163. vxor $in0,$in0,$tmp
  164. vsldoi $tmp,$zero,$tmp,12 # >>32
  165. vxor $in0,$in0,$tmp
  166. vsldoi $tmp,$zero,$tmp,12 # >>32
  167. vxor $in0,$in0,$tmp
  168. vadduwm $rcon,$rcon,$rcon
  169. vxor $in0,$in0,$key
  170. vperm $key,$in0,$in0,$mask # rotate-n-splat
  171. vsldoi $tmp,$zero,$in0,12 # >>32
  172. vperm $outtail,$in0,$in0,$outperm # rotate
  173. vsel $stage,$outhead,$outtail,$outmask
  174. vmr $outhead,$outtail
  175. vcipherlast $key,$key,$rcon
  176. stvx $stage,0,$out
  177. addi $out,$out,16
  178. vxor $in0,$in0,$tmp
  179. vsldoi $tmp,$zero,$tmp,12 # >>32
  180. vxor $in0,$in0,$tmp
  181. vsldoi $tmp,$zero,$tmp,12 # >>32
  182. vxor $in0,$in0,$tmp
  183. vxor $in0,$in0,$key
  184. vperm $outtail,$in0,$in0,$outperm # rotate
  185. vsel $stage,$outhead,$outtail,$outmask
  186. vmr $outhead,$outtail
  187. stvx $stage,0,$out
  188. addi $inp,$out,15 # 15 is not typo
  189. addi $out,$out,0x50
  190. li $rounds,10
  191. b Ldone
  192. .align 4
  193. L192:
  194. lvx $tmp,0,$inp
  195. li $cnt,4
  196. vperm $outtail,$in0,$in0,$outperm # rotate
  197. vsel $stage,$outhead,$outtail,$outmask
  198. vmr $outhead,$outtail
  199. stvx $stage,0,$out
  200. addi $out,$out,16
  201. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  202. vspltisb $key,8 # borrow $key
  203. mtctr $cnt
  204. vsububm $mask,$mask,$key # adjust the mask
  205. Loop192:
  206. vperm $key,$in1,$in1,$mask # roate-n-splat
  207. vsldoi $tmp,$zero,$in0,12 # >>32
  208. vcipherlast $key,$key,$rcon
  209. vxor $in0,$in0,$tmp
  210. vsldoi $tmp,$zero,$tmp,12 # >>32
  211. vxor $in0,$in0,$tmp
  212. vsldoi $tmp,$zero,$tmp,12 # >>32
  213. vxor $in0,$in0,$tmp
  214. vsldoi $stage,$zero,$in1,8
  215. vspltw $tmp,$in0,3
  216. vxor $tmp,$tmp,$in1
  217. vsldoi $in1,$zero,$in1,12 # >>32
  218. vadduwm $rcon,$rcon,$rcon
  219. vxor $in1,$in1,$tmp
  220. vxor $in0,$in0,$key
  221. vxor $in1,$in1,$key
  222. vsldoi $stage,$stage,$in0,8
  223. vperm $key,$in1,$in1,$mask # rotate-n-splat
  224. vsldoi $tmp,$zero,$in0,12 # >>32
  225. vperm $outtail,$stage,$stage,$outperm # rotate
  226. vsel $stage,$outhead,$outtail,$outmask
  227. vmr $outhead,$outtail
  228. vcipherlast $key,$key,$rcon
  229. stvx $stage,0,$out
  230. addi $out,$out,16
  231. vsldoi $stage,$in0,$in1,8
  232. vxor $in0,$in0,$tmp
  233. vsldoi $tmp,$zero,$tmp,12 # >>32
  234. vperm $outtail,$stage,$stage,$outperm # rotate
  235. vsel $stage,$outhead,$outtail,$outmask
  236. vmr $outhead,$outtail
  237. vxor $in0,$in0,$tmp
  238. vsldoi $tmp,$zero,$tmp,12 # >>32
  239. vxor $in0,$in0,$tmp
  240. stvx $stage,0,$out
  241. addi $out,$out,16
  242. vspltw $tmp,$in0,3
  243. vxor $tmp,$tmp,$in1
  244. vsldoi $in1,$zero,$in1,12 # >>32
  245. vadduwm $rcon,$rcon,$rcon
  246. vxor $in1,$in1,$tmp
  247. vxor $in0,$in0,$key
  248. vxor $in1,$in1,$key
  249. vperm $outtail,$in0,$in0,$outperm # rotate
  250. vsel $stage,$outhead,$outtail,$outmask
  251. vmr $outhead,$outtail
  252. stvx $stage,0,$out
  253. addi $inp,$out,15 # 15 is not typo
  254. addi $out,$out,16
  255. bdnz Loop192
  256. li $rounds,12
  257. addi $out,$out,0x20
  258. b Ldone
  259. .align 4
  260. L256:
  261. lvx $tmp,0,$inp
  262. li $cnt,7
  263. li $rounds,14
  264. vperm $outtail,$in0,$in0,$outperm # rotate
  265. vsel $stage,$outhead,$outtail,$outmask
  266. vmr $outhead,$outtail
  267. stvx $stage,0,$out
  268. addi $out,$out,16
  269. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  270. mtctr $cnt
  271. Loop256:
  272. vperm $key,$in1,$in1,$mask # rotate-n-splat
  273. vsldoi $tmp,$zero,$in0,12 # >>32
  274. vperm $outtail,$in1,$in1,$outperm # rotate
  275. vsel $stage,$outhead,$outtail,$outmask
  276. vmr $outhead,$outtail
  277. vcipherlast $key,$key,$rcon
  278. stvx $stage,0,$out
  279. addi $out,$out,16
  280. vxor $in0,$in0,$tmp
  281. vsldoi $tmp,$zero,$tmp,12 # >>32
  282. vxor $in0,$in0,$tmp
  283. vsldoi $tmp,$zero,$tmp,12 # >>32
  284. vxor $in0,$in0,$tmp
  285. vadduwm $rcon,$rcon,$rcon
  286. vxor $in0,$in0,$key
  287. vperm $outtail,$in0,$in0,$outperm # rotate
  288. vsel $stage,$outhead,$outtail,$outmask
  289. vmr $outhead,$outtail
  290. stvx $stage,0,$out
  291. addi $inp,$out,15 # 15 is not typo
  292. addi $out,$out,16
  293. bdz Ldone
  294. vspltw $key,$in0,3 # just splat
  295. vsldoi $tmp,$zero,$in1,12 # >>32
  296. vsbox $key,$key
  297. vxor $in1,$in1,$tmp
  298. vsldoi $tmp,$zero,$tmp,12 # >>32
  299. vxor $in1,$in1,$tmp
  300. vsldoi $tmp,$zero,$tmp,12 # >>32
  301. vxor $in1,$in1,$tmp
  302. vxor $in1,$in1,$key
  303. b Loop256
  304. .align 4
  305. Ldone:
  306. lvx $in1,0,$inp # redundant in aligned case
  307. vsel $in1,$outhead,$in1,$outmask
  308. stvx $in1,0,$inp
  309. li $ptr,0
  310. mtspr 256,$vrsave
  311. stw $rounds,0($out)
  312. Lenc_key_abort:
  313. mr r3,$ptr
  314. blr
  315. .long 0
  316. .byte 0,12,0x14,1,0,0,3,0
  317. .long 0
  318. .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
  319. .globl .${prefix}_set_decrypt_key
  320. .align 5
  321. .${prefix}_set_decrypt_key:
  322. $STU $sp,-$FRAME($sp)
  323. mflr r10
  324. $PUSH r10,$FRAME+$LRSAVE($sp)
  325. bl Lset_encrypt_key
  326. mtlr r10
  327. cmpwi r3,0
  328. bne- Ldec_key_abort
  329. slwi $cnt,$rounds,4
  330. subi $inp,$out,240 # first round key
  331. srwi $rounds,$rounds,1
  332. add $out,$inp,$cnt # last round key
  333. mtctr $rounds
  334. Ldeckey:
  335. lwz r0, 0($inp)
  336. lwz r6, 4($inp)
  337. lwz r7, 8($inp)
  338. lwz r8, 12($inp)
  339. addi $inp,$inp,16
  340. lwz r9, 0($out)
  341. lwz r10,4($out)
  342. lwz r11,8($out)
  343. lwz r12,12($out)
  344. stw r0, 0($out)
  345. stw r6, 4($out)
  346. stw r7, 8($out)
  347. stw r8, 12($out)
  348. subi $out,$out,16
  349. stw r9, -16($inp)
  350. stw r10,-12($inp)
  351. stw r11,-8($inp)
  352. stw r12,-4($inp)
  353. bdnz Ldeckey
  354. xor r3,r3,r3 # return value
  355. Ldec_key_abort:
  356. addi $sp,$sp,$FRAME
  357. blr
  358. .long 0
  359. .byte 0,12,4,1,0x80,0,3,0
  360. .long 0
  361. .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
  362. ___
  363. }}}
  364. #########################################################################
  365. {{{ # Single block en- and decrypt procedures #
  366. sub gen_block () {
  367. my $dir = shift;
  368. my $n = $dir eq "de" ? "n" : "";
  369. my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
  370. $code.=<<___;
  371. .globl .${prefix}_${dir}crypt
  372. .align 5
  373. .${prefix}_${dir}crypt:
  374. lwz $rounds,240($key)
  375. lis r0,0xfc00
  376. mfspr $vrsave,256
  377. li $idx,15 # 15 is not typo
  378. mtspr 256,r0
  379. lvx v0,0,$inp
  380. neg r11,$out
  381. lvx v1,$idx,$inp
  382. lvsl v2,0,$inp # inpperm
  383. le?vspltisb v4,0x0f
  384. ?lvsl v3,0,r11 # outperm
  385. le?vxor v2,v2,v4
  386. li $idx,16
  387. vperm v0,v0,v1,v2 # align [and byte swap in LE]
  388. lvx v1,0,$key
  389. ?lvsl v5,0,$key # keyperm
  390. srwi $rounds,$rounds,1
  391. lvx v2,$idx,$key
  392. addi $idx,$idx,16
  393. subi $rounds,$rounds,1
  394. ?vperm v1,v1,v2,v5 # align round key
  395. vxor v0,v0,v1
  396. lvx v1,$idx,$key
  397. addi $idx,$idx,16
  398. mtctr $rounds
  399. Loop_${dir}c:
  400. ?vperm v2,v2,v1,v5
  401. v${n}cipher v0,v0,v2
  402. lvx v2,$idx,$key
  403. addi $idx,$idx,16
  404. ?vperm v1,v1,v2,v5
  405. v${n}cipher v0,v0,v1
  406. lvx v1,$idx,$key
  407. addi $idx,$idx,16
  408. bdnz Loop_${dir}c
  409. ?vperm v2,v2,v1,v5
  410. v${n}cipher v0,v0,v2
  411. lvx v2,$idx,$key
  412. ?vperm v1,v1,v2,v5
  413. v${n}cipherlast v0,v0,v1
  414. vspltisb v2,-1
  415. vxor v1,v1,v1
  416. li $idx,15 # 15 is not typo
  417. ?vperm v2,v1,v2,v3 # outmask
  418. le?vxor v3,v3,v4
  419. lvx v1,0,$out # outhead
  420. vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
  421. vsel v1,v1,v0,v2
  422. lvx v4,$idx,$out
  423. stvx v1,0,$out
  424. vsel v0,v0,v4,v2
  425. stvx v0,$idx,$out
  426. mtspr 256,$vrsave
  427. blr
  428. .long 0
  429. .byte 0,12,0x14,0,0,0,3,0
  430. .long 0
  431. .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
  432. ___
  433. }
  434. &gen_block("en");
  435. &gen_block("de");
  436. }}}
  437. #########################################################################
  438. {{{ # CBC en- and decrypt procedures #
  439. my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
  440. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  441. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
  442. map("v$_",(4..10));
  443. $code.=<<___;
  444. .globl .${prefix}_cbc_encrypt
  445. .align 5
  446. .${prefix}_cbc_encrypt:
  447. ${UCMP}i $len,16
  448. bltlr-
  449. cmpwi $enc,0 # test direction
  450. lis r0,0xffe0
  451. mfspr $vrsave,256
  452. mtspr 256,r0
  453. li $idx,15
  454. vxor $rndkey0,$rndkey0,$rndkey0
  455. le?vspltisb $tmp,0x0f
  456. lvx $ivec,0,$ivp # load [unaligned] iv
  457. lvsl $inpperm,0,$ivp
  458. lvx $inptail,$idx,$ivp
  459. le?vxor $inpperm,$inpperm,$tmp
  460. vperm $ivec,$ivec,$inptail,$inpperm
  461. neg r11,$inp
  462. ?lvsl $keyperm,0,$key # prepare for unaligned key
  463. lwz $rounds,240($key)
  464. lvsr $inpperm,0,r11 # prepare for unaligned load
  465. lvx $inptail,0,$inp
  466. addi $inp,$inp,15 # 15 is not typo
  467. le?vxor $inpperm,$inpperm,$tmp
  468. ?lvsr $outperm,0,$out # prepare for unaligned store
  469. vspltisb $outmask,-1
  470. lvx $outhead,0,$out
  471. ?vperm $outmask,$rndkey0,$outmask,$outperm
  472. le?vxor $outperm,$outperm,$tmp
  473. srwi $rounds,$rounds,1
  474. li $idx,16
  475. subi $rounds,$rounds,1
  476. beq Lcbc_dec
  477. Lcbc_enc:
  478. vmr $inout,$inptail
  479. lvx $inptail,0,$inp
  480. addi $inp,$inp,16
  481. mtctr $rounds
  482. subi $len,$len,16 # len-=16
  483. lvx $rndkey0,0,$key
  484. vperm $inout,$inout,$inptail,$inpperm
  485. lvx $rndkey1,$idx,$key
  486. addi $idx,$idx,16
  487. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  488. vxor $inout,$inout,$rndkey0
  489. lvx $rndkey0,$idx,$key
  490. addi $idx,$idx,16
  491. vxor $inout,$inout,$ivec
  492. Loop_cbc_enc:
  493. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  494. vcipher $inout,$inout,$rndkey1
  495. lvx $rndkey1,$idx,$key
  496. addi $idx,$idx,16
  497. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  498. vcipher $inout,$inout,$rndkey0
  499. lvx $rndkey0,$idx,$key
  500. addi $idx,$idx,16
  501. bdnz Loop_cbc_enc
  502. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  503. vcipher $inout,$inout,$rndkey1
  504. lvx $rndkey1,$idx,$key
  505. li $idx,16
  506. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  507. vcipherlast $ivec,$inout,$rndkey0
  508. ${UCMP}i $len,16
  509. vperm $tmp,$ivec,$ivec,$outperm
  510. vsel $inout,$outhead,$tmp,$outmask
  511. vmr $outhead,$tmp
  512. stvx $inout,0,$out
  513. addi $out,$out,16
  514. bge Lcbc_enc
  515. b Lcbc_done
  516. .align 4
  517. Lcbc_dec:
  518. ${UCMP}i $len,128
  519. bge _aesp8_cbc_decrypt8x
  520. vmr $tmp,$inptail
  521. lvx $inptail,0,$inp
  522. addi $inp,$inp,16
  523. mtctr $rounds
  524. subi $len,$len,16 # len-=16
  525. lvx $rndkey0,0,$key
  526. vperm $tmp,$tmp,$inptail,$inpperm
  527. lvx $rndkey1,$idx,$key
  528. addi $idx,$idx,16
  529. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  530. vxor $inout,$tmp,$rndkey0
  531. lvx $rndkey0,$idx,$key
  532. addi $idx,$idx,16
  533. Loop_cbc_dec:
  534. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  535. vncipher $inout,$inout,$rndkey1
  536. lvx $rndkey1,$idx,$key
  537. addi $idx,$idx,16
  538. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  539. vncipher $inout,$inout,$rndkey0
  540. lvx $rndkey0,$idx,$key
  541. addi $idx,$idx,16
  542. bdnz Loop_cbc_dec
  543. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  544. vncipher $inout,$inout,$rndkey1
  545. lvx $rndkey1,$idx,$key
  546. li $idx,16
  547. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  548. vncipherlast $inout,$inout,$rndkey0
  549. ${UCMP}i $len,16
  550. vxor $inout,$inout,$ivec
  551. vmr $ivec,$tmp
  552. vperm $tmp,$inout,$inout,$outperm
  553. vsel $inout,$outhead,$tmp,$outmask
  554. vmr $outhead,$tmp
  555. stvx $inout,0,$out
  556. addi $out,$out,16
  557. bge Lcbc_dec
  558. Lcbc_done:
  559. addi $out,$out,-1
  560. lvx $inout,0,$out # redundant in aligned case
  561. vsel $inout,$outhead,$inout,$outmask
  562. stvx $inout,0,$out
  563. neg $enc,$ivp # write [unaligned] iv
  564. li $idx,15 # 15 is not typo
  565. vxor $rndkey0,$rndkey0,$rndkey0
  566. vspltisb $outmask,-1
  567. le?vspltisb $tmp,0x0f
  568. ?lvsl $outperm,0,$enc
  569. ?vperm $outmask,$rndkey0,$outmask,$outperm
  570. le?vxor $outperm,$outperm,$tmp
  571. lvx $outhead,0,$ivp
  572. vperm $ivec,$ivec,$ivec,$outperm
  573. vsel $inout,$outhead,$ivec,$outmask
  574. lvx $inptail,$idx,$ivp
  575. stvx $inout,0,$ivp
  576. vsel $inout,$ivec,$inptail,$outmask
  577. stvx $inout,$idx,$ivp
  578. mtspr 256,$vrsave
  579. blr
  580. .long 0
  581. .byte 0,12,0x14,0,0,0,6,0
  582. .long 0
  583. ___
  584. #########################################################################
  585. {{ # Optimized CBC decrypt procedure #
  586. my $key_="r11";
  587. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  588. $x00=0 if ($flavour =~ /osx/);
  589. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
  590. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
  591. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  592. # v26-v31 last 6 round keys
  593. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  594. $code.=<<___;
  595. .align 5
  596. _aesp8_cbc_decrypt8x:
  597. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  598. li r10,`$FRAME+8*16+15`
  599. li r11,`$FRAME+8*16+31`
  600. stvx v20,r10,$sp # ABI says so
  601. addi r10,r10,32
  602. stvx v21,r11,$sp
  603. addi r11,r11,32
  604. stvx v22,r10,$sp
  605. addi r10,r10,32
  606. stvx v23,r11,$sp
  607. addi r11,r11,32
  608. stvx v24,r10,$sp
  609. addi r10,r10,32
  610. stvx v25,r11,$sp
  611. addi r11,r11,32
  612. stvx v26,r10,$sp
  613. addi r10,r10,32
  614. stvx v27,r11,$sp
  615. addi r11,r11,32
  616. stvx v28,r10,$sp
  617. addi r10,r10,32
  618. stvx v29,r11,$sp
  619. addi r11,r11,32
  620. stvx v30,r10,$sp
  621. stvx v31,r11,$sp
  622. li r0,-1
  623. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  624. li $x10,0x10
  625. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  626. li $x20,0x20
  627. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  628. li $x30,0x30
  629. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  630. li $x40,0x40
  631. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  632. li $x50,0x50
  633. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  634. li $x60,0x60
  635. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  636. li $x70,0x70
  637. mtspr 256,r0
  638. subi $rounds,$rounds,3 # -4 in total
  639. subi $len,$len,128 # bias
  640. lvx $rndkey0,$x00,$key # load key schedule
  641. lvx v30,$x10,$key
  642. addi $key,$key,0x20
  643. lvx v31,$x00,$key
  644. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  645. addi $key_,$sp,$FRAME+15
  646. mtctr $rounds
  647. Load_cbc_dec_key:
  648. ?vperm v24,v30,v31,$keyperm
  649. lvx v30,$x10,$key
  650. addi $key,$key,0x20
  651. stvx v24,$x00,$key_ # off-load round[1]
  652. ?vperm v25,v31,v30,$keyperm
  653. lvx v31,$x00,$key
  654. stvx v25,$x10,$key_ # off-load round[2]
  655. addi $key_,$key_,0x20
  656. bdnz Load_cbc_dec_key
  657. lvx v26,$x10,$key
  658. ?vperm v24,v30,v31,$keyperm
  659. lvx v27,$x20,$key
  660. stvx v24,$x00,$key_ # off-load round[3]
  661. ?vperm v25,v31,v26,$keyperm
  662. lvx v28,$x30,$key
  663. stvx v25,$x10,$key_ # off-load round[4]
  664. addi $key_,$sp,$FRAME+15 # rewind $key_
  665. ?vperm v26,v26,v27,$keyperm
  666. lvx v29,$x40,$key
  667. ?vperm v27,v27,v28,$keyperm
  668. lvx v30,$x50,$key
  669. ?vperm v28,v28,v29,$keyperm
  670. lvx v31,$x60,$key
  671. ?vperm v29,v29,v30,$keyperm
  672. lvx $out0,$x70,$key # borrow $out0
  673. ?vperm v30,v30,v31,$keyperm
  674. lvx v24,$x00,$key_ # pre-load round[1]
  675. ?vperm v31,v31,$out0,$keyperm
  676. lvx v25,$x10,$key_ # pre-load round[2]
  677. #lvx $inptail,0,$inp # "caller" already did this
  678. #addi $inp,$inp,15 # 15 is not typo
  679. subi $inp,$inp,15 # undo "caller"
  680. le?li $idx,8
  681. lvx_u $in0,$x00,$inp # load first 8 "words"
  682. le?lvsl $inpperm,0,$idx
  683. le?vspltisb $tmp,0x0f
  684. lvx_u $in1,$x10,$inp
  685. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  686. lvx_u $in2,$x20,$inp
  687. le?vperm $in0,$in0,$in0,$inpperm
  688. lvx_u $in3,$x30,$inp
  689. le?vperm $in1,$in1,$in1,$inpperm
  690. lvx_u $in4,$x40,$inp
  691. le?vperm $in2,$in2,$in2,$inpperm
  692. vxor $out0,$in0,$rndkey0
  693. lvx_u $in5,$x50,$inp
  694. le?vperm $in3,$in3,$in3,$inpperm
  695. vxor $out1,$in1,$rndkey0
  696. lvx_u $in6,$x60,$inp
  697. le?vperm $in4,$in4,$in4,$inpperm
  698. vxor $out2,$in2,$rndkey0
  699. lvx_u $in7,$x70,$inp
  700. addi $inp,$inp,0x80
  701. le?vperm $in5,$in5,$in5,$inpperm
  702. vxor $out3,$in3,$rndkey0
  703. le?vperm $in6,$in6,$in6,$inpperm
  704. vxor $out4,$in4,$rndkey0
  705. le?vperm $in7,$in7,$in7,$inpperm
  706. vxor $out5,$in5,$rndkey0
  707. vxor $out6,$in6,$rndkey0
  708. vxor $out7,$in7,$rndkey0
  709. mtctr $rounds
  710. b Loop_cbc_dec8x
  711. .align 5
  712. Loop_cbc_dec8x:
  713. vncipher $out0,$out0,v24
  714. vncipher $out1,$out1,v24
  715. vncipher $out2,$out2,v24
  716. vncipher $out3,$out3,v24
  717. vncipher $out4,$out4,v24
  718. vncipher $out5,$out5,v24
  719. vncipher $out6,$out6,v24
  720. vncipher $out7,$out7,v24
  721. lvx v24,$x20,$key_ # round[3]
  722. addi $key_,$key_,0x20
  723. vncipher $out0,$out0,v25
  724. vncipher $out1,$out1,v25
  725. vncipher $out2,$out2,v25
  726. vncipher $out3,$out3,v25
  727. vncipher $out4,$out4,v25
  728. vncipher $out5,$out5,v25
  729. vncipher $out6,$out6,v25
  730. vncipher $out7,$out7,v25
  731. lvx v25,$x10,$key_ # round[4]
  732. bdnz Loop_cbc_dec8x
  733. subic $len,$len,128 # $len-=128
  734. vncipher $out0,$out0,v24
  735. vncipher $out1,$out1,v24
  736. vncipher $out2,$out2,v24
  737. vncipher $out3,$out3,v24
  738. vncipher $out4,$out4,v24
  739. vncipher $out5,$out5,v24
  740. vncipher $out6,$out6,v24
  741. vncipher $out7,$out7,v24
  742. subfe. r0,r0,r0 # borrow?-1:0
  743. vncipher $out0,$out0,v25
  744. vncipher $out1,$out1,v25
  745. vncipher $out2,$out2,v25
  746. vncipher $out3,$out3,v25
  747. vncipher $out4,$out4,v25
  748. vncipher $out5,$out5,v25
  749. vncipher $out6,$out6,v25
  750. vncipher $out7,$out7,v25
  751. and r0,r0,$len
  752. vncipher $out0,$out0,v26
  753. vncipher $out1,$out1,v26
  754. vncipher $out2,$out2,v26
  755. vncipher $out3,$out3,v26
  756. vncipher $out4,$out4,v26
  757. vncipher $out5,$out5,v26
  758. vncipher $out6,$out6,v26
  759. vncipher $out7,$out7,v26
  760. add $inp,$inp,r0 # $inp is adjusted in such
  761. # way that at exit from the
  762. # loop inX-in7 are loaded
  763. # with last "words"
  764. vncipher $out0,$out0,v27
  765. vncipher $out1,$out1,v27
  766. vncipher $out2,$out2,v27
  767. vncipher $out3,$out3,v27
  768. vncipher $out4,$out4,v27
  769. vncipher $out5,$out5,v27
  770. vncipher $out6,$out6,v27
  771. vncipher $out7,$out7,v27
  772. addi $key_,$sp,$FRAME+15 # rewind $key_
  773. vncipher $out0,$out0,v28
  774. vncipher $out1,$out1,v28
  775. vncipher $out2,$out2,v28
  776. vncipher $out3,$out3,v28
  777. vncipher $out4,$out4,v28
  778. vncipher $out5,$out5,v28
  779. vncipher $out6,$out6,v28
  780. vncipher $out7,$out7,v28
  781. lvx v24,$x00,$key_ # re-pre-load round[1]
  782. vncipher $out0,$out0,v29
  783. vncipher $out1,$out1,v29
  784. vncipher $out2,$out2,v29
  785. vncipher $out3,$out3,v29
  786. vncipher $out4,$out4,v29
  787. vncipher $out5,$out5,v29
  788. vncipher $out6,$out6,v29
  789. vncipher $out7,$out7,v29
  790. lvx v25,$x10,$key_ # re-pre-load round[2]
  791. vncipher $out0,$out0,v30
  792. vxor $ivec,$ivec,v31 # xor with last round key
  793. vncipher $out1,$out1,v30
  794. vxor $in0,$in0,v31
  795. vncipher $out2,$out2,v30
  796. vxor $in1,$in1,v31
  797. vncipher $out3,$out3,v30
  798. vxor $in2,$in2,v31
  799. vncipher $out4,$out4,v30
  800. vxor $in3,$in3,v31
  801. vncipher $out5,$out5,v30
  802. vxor $in4,$in4,v31
  803. vncipher $out6,$out6,v30
  804. vxor $in5,$in5,v31
  805. vncipher $out7,$out7,v30
  806. vxor $in6,$in6,v31
  807. vncipherlast $out0,$out0,$ivec
  808. vncipherlast $out1,$out1,$in0
  809. lvx_u $in0,$x00,$inp # load next input block
  810. vncipherlast $out2,$out2,$in1
  811. lvx_u $in1,$x10,$inp
  812. vncipherlast $out3,$out3,$in2
  813. le?vperm $in0,$in0,$in0,$inpperm
  814. lvx_u $in2,$x20,$inp
  815. vncipherlast $out4,$out4,$in3
  816. le?vperm $in1,$in1,$in1,$inpperm
  817. lvx_u $in3,$x30,$inp
  818. vncipherlast $out5,$out5,$in4
  819. le?vperm $in2,$in2,$in2,$inpperm
  820. lvx_u $in4,$x40,$inp
  821. vncipherlast $out6,$out6,$in5
  822. le?vperm $in3,$in3,$in3,$inpperm
  823. lvx_u $in5,$x50,$inp
  824. vncipherlast $out7,$out7,$in6
  825. le?vperm $in4,$in4,$in4,$inpperm
  826. lvx_u $in6,$x60,$inp
  827. vmr $ivec,$in7
  828. le?vperm $in5,$in5,$in5,$inpperm
  829. lvx_u $in7,$x70,$inp
  830. addi $inp,$inp,0x80
  831. le?vperm $out0,$out0,$out0,$inpperm
  832. le?vperm $out1,$out1,$out1,$inpperm
  833. stvx_u $out0,$x00,$out
  834. le?vperm $in6,$in6,$in6,$inpperm
  835. vxor $out0,$in0,$rndkey0
  836. le?vperm $out2,$out2,$out2,$inpperm
  837. stvx_u $out1,$x10,$out
  838. le?vperm $in7,$in7,$in7,$inpperm
  839. vxor $out1,$in1,$rndkey0
  840. le?vperm $out3,$out3,$out3,$inpperm
  841. stvx_u $out2,$x20,$out
  842. vxor $out2,$in2,$rndkey0
  843. le?vperm $out4,$out4,$out4,$inpperm
  844. stvx_u $out3,$x30,$out
  845. vxor $out3,$in3,$rndkey0
  846. le?vperm $out5,$out5,$out5,$inpperm
  847. stvx_u $out4,$x40,$out
  848. vxor $out4,$in4,$rndkey0
  849. le?vperm $out6,$out6,$out6,$inpperm
  850. stvx_u $out5,$x50,$out
  851. vxor $out5,$in5,$rndkey0
  852. le?vperm $out7,$out7,$out7,$inpperm
  853. stvx_u $out6,$x60,$out
  854. vxor $out6,$in6,$rndkey0
  855. stvx_u $out7,$x70,$out
  856. addi $out,$out,0x80
  857. vxor $out7,$in7,$rndkey0
  858. mtctr $rounds
  859. beq Loop_cbc_dec8x # did $len-=128 borrow?
  860. addic. $len,$len,128
  861. beq Lcbc_dec8x_done
  862. nop
  863. nop
  864. Loop_cbc_dec8x_tail: # up to 7 "words" tail...
  865. vncipher $out1,$out1,v24
  866. vncipher $out2,$out2,v24
  867. vncipher $out3,$out3,v24
  868. vncipher $out4,$out4,v24
  869. vncipher $out5,$out5,v24
  870. vncipher $out6,$out6,v24
  871. vncipher $out7,$out7,v24
  872. lvx v24,$x20,$key_ # round[3]
  873. addi $key_,$key_,0x20
  874. vncipher $out1,$out1,v25
  875. vncipher $out2,$out2,v25
  876. vncipher $out3,$out3,v25
  877. vncipher $out4,$out4,v25
  878. vncipher $out5,$out5,v25
  879. vncipher $out6,$out6,v25
  880. vncipher $out7,$out7,v25
  881. lvx v25,$x10,$key_ # round[4]
  882. bdnz Loop_cbc_dec8x_tail
  883. vncipher $out1,$out1,v24
  884. vncipher $out2,$out2,v24
  885. vncipher $out3,$out3,v24
  886. vncipher $out4,$out4,v24
  887. vncipher $out5,$out5,v24
  888. vncipher $out6,$out6,v24
  889. vncipher $out7,$out7,v24
  890. vncipher $out1,$out1,v25
  891. vncipher $out2,$out2,v25
  892. vncipher $out3,$out3,v25
  893. vncipher $out4,$out4,v25
  894. vncipher $out5,$out5,v25
  895. vncipher $out6,$out6,v25
  896. vncipher $out7,$out7,v25
  897. vncipher $out1,$out1,v26
  898. vncipher $out2,$out2,v26
  899. vncipher $out3,$out3,v26
  900. vncipher $out4,$out4,v26
  901. vncipher $out5,$out5,v26
  902. vncipher $out6,$out6,v26
  903. vncipher $out7,$out7,v26
  904. vncipher $out1,$out1,v27
  905. vncipher $out2,$out2,v27
  906. vncipher $out3,$out3,v27
  907. vncipher $out4,$out4,v27
  908. vncipher $out5,$out5,v27
  909. vncipher $out6,$out6,v27
  910. vncipher $out7,$out7,v27
  911. vncipher $out1,$out1,v28
  912. vncipher $out2,$out2,v28
  913. vncipher $out3,$out3,v28
  914. vncipher $out4,$out4,v28
  915. vncipher $out5,$out5,v28
  916. vncipher $out6,$out6,v28
  917. vncipher $out7,$out7,v28
  918. vncipher $out1,$out1,v29
  919. vncipher $out2,$out2,v29
  920. vncipher $out3,$out3,v29
  921. vncipher $out4,$out4,v29
  922. vncipher $out5,$out5,v29
  923. vncipher $out6,$out6,v29
  924. vncipher $out7,$out7,v29
  925. vncipher $out1,$out1,v30
  926. vxor $ivec,$ivec,v31 # last round key
  927. vncipher $out2,$out2,v30
  928. vxor $in1,$in1,v31
  929. vncipher $out3,$out3,v30
  930. vxor $in2,$in2,v31
  931. vncipher $out4,$out4,v30
  932. vxor $in3,$in3,v31
  933. vncipher $out5,$out5,v30
  934. vxor $in4,$in4,v31
  935. vncipher $out6,$out6,v30
  936. vxor $in5,$in5,v31
  937. vncipher $out7,$out7,v30
  938. vxor $in6,$in6,v31
  939. cmplwi $len,32 # switch($len)
  940. blt Lcbc_dec8x_one
  941. nop
  942. beq Lcbc_dec8x_two
  943. cmplwi $len,64
  944. blt Lcbc_dec8x_three
  945. nop
  946. beq Lcbc_dec8x_four
  947. cmplwi $len,96
  948. blt Lcbc_dec8x_five
  949. nop
  950. beq Lcbc_dec8x_six
  951. Lcbc_dec8x_seven:
  952. vncipherlast $out1,$out1,$ivec
  953. vncipherlast $out2,$out2,$in1
  954. vncipherlast $out3,$out3,$in2
  955. vncipherlast $out4,$out4,$in3
  956. vncipherlast $out5,$out5,$in4
  957. vncipherlast $out6,$out6,$in5
  958. vncipherlast $out7,$out7,$in6
  959. vmr $ivec,$in7
  960. le?vperm $out1,$out1,$out1,$inpperm
  961. le?vperm $out2,$out2,$out2,$inpperm
  962. stvx_u $out1,$x00,$out
  963. le?vperm $out3,$out3,$out3,$inpperm
  964. stvx_u $out2,$x10,$out
  965. le?vperm $out4,$out4,$out4,$inpperm
  966. stvx_u $out3,$x20,$out
  967. le?vperm $out5,$out5,$out5,$inpperm
  968. stvx_u $out4,$x30,$out
  969. le?vperm $out6,$out6,$out6,$inpperm
  970. stvx_u $out5,$x40,$out
  971. le?vperm $out7,$out7,$out7,$inpperm
  972. stvx_u $out6,$x50,$out
  973. stvx_u $out7,$x60,$out
  974. addi $out,$out,0x70
  975. b Lcbc_dec8x_done
  976. .align 5
  977. Lcbc_dec8x_six:
  978. vncipherlast $out2,$out2,$ivec
  979. vncipherlast $out3,$out3,$in2
  980. vncipherlast $out4,$out4,$in3
  981. vncipherlast $out5,$out5,$in4
  982. vncipherlast $out6,$out6,$in5
  983. vncipherlast $out7,$out7,$in6
  984. vmr $ivec,$in7
  985. le?vperm $out2,$out2,$out2,$inpperm
  986. le?vperm $out3,$out3,$out3,$inpperm
  987. stvx_u $out2,$x00,$out
  988. le?vperm $out4,$out4,$out4,$inpperm
  989. stvx_u $out3,$x10,$out
  990. le?vperm $out5,$out5,$out5,$inpperm
  991. stvx_u $out4,$x20,$out
  992. le?vperm $out6,$out6,$out6,$inpperm
  993. stvx_u $out5,$x30,$out
  994. le?vperm $out7,$out7,$out7,$inpperm
  995. stvx_u $out6,$x40,$out
  996. stvx_u $out7,$x50,$out
  997. addi $out,$out,0x60
  998. b Lcbc_dec8x_done
  999. .align 5
  1000. Lcbc_dec8x_five:
  1001. vncipherlast $out3,$out3,$ivec
  1002. vncipherlast $out4,$out4,$in3
  1003. vncipherlast $out5,$out5,$in4
  1004. vncipherlast $out6,$out6,$in5
  1005. vncipherlast $out7,$out7,$in6
  1006. vmr $ivec,$in7
  1007. le?vperm $out3,$out3,$out3,$inpperm
  1008. le?vperm $out4,$out4,$out4,$inpperm
  1009. stvx_u $out3,$x00,$out
  1010. le?vperm $out5,$out5,$out5,$inpperm
  1011. stvx_u $out4,$x10,$out
  1012. le?vperm $out6,$out6,$out6,$inpperm
  1013. stvx_u $out5,$x20,$out
  1014. le?vperm $out7,$out7,$out7,$inpperm
  1015. stvx_u $out6,$x30,$out
  1016. stvx_u $out7,$x40,$out
  1017. addi $out,$out,0x50
  1018. b Lcbc_dec8x_done
  1019. .align 5
  1020. Lcbc_dec8x_four:
  1021. vncipherlast $out4,$out4,$ivec
  1022. vncipherlast $out5,$out5,$in4
  1023. vncipherlast $out6,$out6,$in5
  1024. vncipherlast $out7,$out7,$in6
  1025. vmr $ivec,$in7
  1026. le?vperm $out4,$out4,$out4,$inpperm
  1027. le?vperm $out5,$out5,$out5,$inpperm
  1028. stvx_u $out4,$x00,$out
  1029. le?vperm $out6,$out6,$out6,$inpperm
  1030. stvx_u $out5,$x10,$out
  1031. le?vperm $out7,$out7,$out7,$inpperm
  1032. stvx_u $out6,$x20,$out
  1033. stvx_u $out7,$x30,$out
  1034. addi $out,$out,0x40
  1035. b Lcbc_dec8x_done
  1036. .align 5
  1037. Lcbc_dec8x_three:
  1038. vncipherlast $out5,$out5,$ivec
  1039. vncipherlast $out6,$out6,$in5
  1040. vncipherlast $out7,$out7,$in6
  1041. vmr $ivec,$in7
  1042. le?vperm $out5,$out5,$out5,$inpperm
  1043. le?vperm $out6,$out6,$out6,$inpperm
  1044. stvx_u $out5,$x00,$out
  1045. le?vperm $out7,$out7,$out7,$inpperm
  1046. stvx_u $out6,$x10,$out
  1047. stvx_u $out7,$x20,$out
  1048. addi $out,$out,0x30
  1049. b Lcbc_dec8x_done
  1050. .align 5
  1051. Lcbc_dec8x_two:
  1052. vncipherlast $out6,$out6,$ivec
  1053. vncipherlast $out7,$out7,$in6
  1054. vmr $ivec,$in7
  1055. le?vperm $out6,$out6,$out6,$inpperm
  1056. le?vperm $out7,$out7,$out7,$inpperm
  1057. stvx_u $out6,$x00,$out
  1058. stvx_u $out7,$x10,$out
  1059. addi $out,$out,0x20
  1060. b Lcbc_dec8x_done
  1061. .align 5
  1062. Lcbc_dec8x_one:
  1063. vncipherlast $out7,$out7,$ivec
  1064. vmr $ivec,$in7
  1065. le?vperm $out7,$out7,$out7,$inpperm
  1066. stvx_u $out7,0,$out
  1067. addi $out,$out,0x10
  1068. Lcbc_dec8x_done:
  1069. le?vperm $ivec,$ivec,$ivec,$inpperm
  1070. stvx_u $ivec,0,$ivp # write [unaligned] iv
  1071. li r10,`$FRAME+15`
  1072. li r11,`$FRAME+31`
  1073. stvx $inpperm,r10,$sp # wipe copies of round keys
  1074. addi r10,r10,32
  1075. stvx $inpperm,r11,$sp
  1076. addi r11,r11,32
  1077. stvx $inpperm,r10,$sp
  1078. addi r10,r10,32
  1079. stvx $inpperm,r11,$sp
  1080. addi r11,r11,32
  1081. stvx $inpperm,r10,$sp
  1082. addi r10,r10,32
  1083. stvx $inpperm,r11,$sp
  1084. addi r11,r11,32
  1085. stvx $inpperm,r10,$sp
  1086. addi r10,r10,32
  1087. stvx $inpperm,r11,$sp
  1088. addi r11,r11,32
  1089. mtspr 256,$vrsave
  1090. lvx v20,r10,$sp # ABI says so
  1091. addi r10,r10,32
  1092. lvx v21,r11,$sp
  1093. addi r11,r11,32
  1094. lvx v22,r10,$sp
  1095. addi r10,r10,32
  1096. lvx v23,r11,$sp
  1097. addi r11,r11,32
  1098. lvx v24,r10,$sp
  1099. addi r10,r10,32
  1100. lvx v25,r11,$sp
  1101. addi r11,r11,32
  1102. lvx v26,r10,$sp
  1103. addi r10,r10,32
  1104. lvx v27,r11,$sp
  1105. addi r11,r11,32
  1106. lvx v28,r10,$sp
  1107. addi r10,r10,32
  1108. lvx v29,r11,$sp
  1109. addi r11,r11,32
  1110. lvx v30,r10,$sp
  1111. lvx v31,r11,$sp
  1112. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1113. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1114. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1115. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1116. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1117. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1118. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1119. blr
  1120. .long 0
  1121. .byte 0,12,0x04,0,0x80,6,6,0
  1122. .long 0
  1123. .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
  1124. ___
  1125. }} }}}
  1126. #########################################################################
  1127. {{{ # CTR procedure[s] #
  1128. my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
  1129. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  1130. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
  1131. map("v$_",(4..11));
  1132. my $dat=$tmp;
  1133. $code.=<<___;
  1134. .globl .${prefix}_ctr32_encrypt_blocks
  1135. .align 5
  1136. .${prefix}_ctr32_encrypt_blocks:
  1137. ${UCMP}i $len,1
  1138. bltlr-
  1139. lis r0,0xfff0
  1140. mfspr $vrsave,256
  1141. mtspr 256,r0
  1142. li $idx,15
  1143. vxor $rndkey0,$rndkey0,$rndkey0
  1144. le?vspltisb $tmp,0x0f
  1145. lvx $ivec,0,$ivp # load [unaligned] iv
  1146. lvsl $inpperm,0,$ivp
  1147. lvx $inptail,$idx,$ivp
  1148. vspltisb $one,1
  1149. le?vxor $inpperm,$inpperm,$tmp
  1150. vperm $ivec,$ivec,$inptail,$inpperm
  1151. vsldoi $one,$rndkey0,$one,1
  1152. neg r11,$inp
  1153. ?lvsl $keyperm,0,$key # prepare for unaligned key
  1154. lwz $rounds,240($key)
  1155. lvsr $inpperm,0,r11 # prepare for unaligned load
  1156. lvx $inptail,0,$inp
  1157. addi $inp,$inp,15 # 15 is not typo
  1158. le?vxor $inpperm,$inpperm,$tmp
  1159. srwi $rounds,$rounds,1
  1160. li $idx,16
  1161. subi $rounds,$rounds,1
  1162. ${UCMP}i $len,8
  1163. bge _aesp8_ctr32_encrypt8x
  1164. ?lvsr $outperm,0,$out # prepare for unaligned store
  1165. vspltisb $outmask,-1
  1166. lvx $outhead,0,$out
  1167. ?vperm $outmask,$rndkey0,$outmask,$outperm
  1168. le?vxor $outperm,$outperm,$tmp
  1169. lvx $rndkey0,0,$key
  1170. mtctr $rounds
  1171. lvx $rndkey1,$idx,$key
  1172. addi $idx,$idx,16
  1173. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1174. vxor $inout,$ivec,$rndkey0
  1175. lvx $rndkey0,$idx,$key
  1176. addi $idx,$idx,16
  1177. b Loop_ctr32_enc
  1178. .align 5
  1179. Loop_ctr32_enc:
  1180. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1181. vcipher $inout,$inout,$rndkey1
  1182. lvx $rndkey1,$idx,$key
  1183. addi $idx,$idx,16
  1184. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1185. vcipher $inout,$inout,$rndkey0
  1186. lvx $rndkey0,$idx,$key
  1187. addi $idx,$idx,16
  1188. bdnz Loop_ctr32_enc
  1189. vadduwm $ivec,$ivec,$one
  1190. vmr $dat,$inptail
  1191. lvx $inptail,0,$inp
  1192. addi $inp,$inp,16
  1193. subic. $len,$len,1 # blocks--
  1194. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1195. vcipher $inout,$inout,$rndkey1
  1196. lvx $rndkey1,$idx,$key
  1197. vperm $dat,$dat,$inptail,$inpperm
  1198. li $idx,16
  1199. ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
  1200. lvx $rndkey0,0,$key
  1201. vxor $dat,$dat,$rndkey1 # last round key
  1202. vcipherlast $inout,$inout,$dat
  1203. lvx $rndkey1,$idx,$key
  1204. addi $idx,$idx,16
  1205. vperm $inout,$inout,$inout,$outperm
  1206. vsel $dat,$outhead,$inout,$outmask
  1207. mtctr $rounds
  1208. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1209. vmr $outhead,$inout
  1210. vxor $inout,$ivec,$rndkey0
  1211. lvx $rndkey0,$idx,$key
  1212. addi $idx,$idx,16
  1213. stvx $dat,0,$out
  1214. addi $out,$out,16
  1215. bne Loop_ctr32_enc
  1216. addi $out,$out,-1
  1217. lvx $inout,0,$out # redundant in aligned case
  1218. vsel $inout,$outhead,$inout,$outmask
  1219. stvx $inout,0,$out
  1220. mtspr 256,$vrsave
  1221. blr
  1222. .long 0
  1223. .byte 0,12,0x14,0,0,0,6,0
  1224. .long 0
  1225. ___
  1226. #########################################################################
  1227. {{ # Optimized CTR procedure #
  1228. my $key_="r11";
  1229. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  1230. $x00=0 if ($flavour =~ /osx/);
  1231. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
  1232. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
  1233. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  1234. # v26-v31 last 6 round keys
  1235. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  1236. my ($two,$three,$four)=($outhead,$outperm,$outmask);
  1237. $code.=<<___;
  1238. .align 5
  1239. _aesp8_ctr32_encrypt8x:
  1240. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  1241. li r10,`$FRAME+8*16+15`
  1242. li r11,`$FRAME+8*16+31`
  1243. stvx v20,r10,$sp # ABI says so
  1244. addi r10,r10,32
  1245. stvx v21,r11,$sp
  1246. addi r11,r11,32
  1247. stvx v22,r10,$sp
  1248. addi r10,r10,32
  1249. stvx v23,r11,$sp
  1250. addi r11,r11,32
  1251. stvx v24,r10,$sp
  1252. addi r10,r10,32
  1253. stvx v25,r11,$sp
  1254. addi r11,r11,32
  1255. stvx v26,r10,$sp
  1256. addi r10,r10,32
  1257. stvx v27,r11,$sp
  1258. addi r11,r11,32
  1259. stvx v28,r10,$sp
  1260. addi r10,r10,32
  1261. stvx v29,r11,$sp
  1262. addi r11,r11,32
  1263. stvx v30,r10,$sp
  1264. stvx v31,r11,$sp
  1265. li r0,-1
  1266. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  1267. li $x10,0x10
  1268. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1269. li $x20,0x20
  1270. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1271. li $x30,0x30
  1272. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1273. li $x40,0x40
  1274. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1275. li $x50,0x50
  1276. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1277. li $x60,0x60
  1278. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1279. li $x70,0x70
  1280. mtspr 256,r0
  1281. subi $rounds,$rounds,3 # -4 in total
  1282. lvx $rndkey0,$x00,$key # load key schedule
  1283. lvx v30,$x10,$key
  1284. addi $key,$key,0x20
  1285. lvx v31,$x00,$key
  1286. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  1287. addi $key_,$sp,$FRAME+15
  1288. mtctr $rounds
  1289. Load_ctr32_enc_key:
  1290. ?vperm v24,v30,v31,$keyperm
  1291. lvx v30,$x10,$key
  1292. addi $key,$key,0x20
  1293. stvx v24,$x00,$key_ # off-load round[1]
  1294. ?vperm v25,v31,v30,$keyperm
  1295. lvx v31,$x00,$key
  1296. stvx v25,$x10,$key_ # off-load round[2]
  1297. addi $key_,$key_,0x20
  1298. bdnz Load_ctr32_enc_key
  1299. lvx v26,$x10,$key
  1300. ?vperm v24,v30,v31,$keyperm
  1301. lvx v27,$x20,$key
  1302. stvx v24,$x00,$key_ # off-load round[3]
  1303. ?vperm v25,v31,v26,$keyperm
  1304. lvx v28,$x30,$key
  1305. stvx v25,$x10,$key_ # off-load round[4]
  1306. addi $key_,$sp,$FRAME+15 # rewind $key_
  1307. ?vperm v26,v26,v27,$keyperm
  1308. lvx v29,$x40,$key
  1309. ?vperm v27,v27,v28,$keyperm
  1310. lvx v30,$x50,$key
  1311. ?vperm v28,v28,v29,$keyperm
  1312. lvx v31,$x60,$key
  1313. ?vperm v29,v29,v30,$keyperm
  1314. lvx $out0,$x70,$key # borrow $out0
  1315. ?vperm v30,v30,v31,$keyperm
  1316. lvx v24,$x00,$key_ # pre-load round[1]
  1317. ?vperm v31,v31,$out0,$keyperm
  1318. lvx v25,$x10,$key_ # pre-load round[2]
  1319. vadduwm $two,$one,$one
  1320. subi $inp,$inp,15 # undo "caller"
  1321. $SHL $len,$len,4
  1322. vadduwm $out1,$ivec,$one # counter values ...
  1323. vadduwm $out2,$ivec,$two
  1324. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1325. le?li $idx,8
  1326. vadduwm $out3,$out1,$two
  1327. vxor $out1,$out1,$rndkey0
  1328. le?lvsl $inpperm,0,$idx
  1329. vadduwm $out4,$out2,$two
  1330. vxor $out2,$out2,$rndkey0
  1331. le?vspltisb $tmp,0x0f
  1332. vadduwm $out5,$out3,$two
  1333. vxor $out3,$out3,$rndkey0
  1334. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  1335. vadduwm $out6,$out4,$two
  1336. vxor $out4,$out4,$rndkey0
  1337. vadduwm $out7,$out5,$two
  1338. vxor $out5,$out5,$rndkey0
  1339. vadduwm $ivec,$out6,$two # next counter value
  1340. vxor $out6,$out6,$rndkey0
  1341. vxor $out7,$out7,$rndkey0
  1342. mtctr $rounds
  1343. b Loop_ctr32_enc8x
  1344. .align 5
  1345. Loop_ctr32_enc8x:
  1346. vcipher $out0,$out0,v24
  1347. vcipher $out1,$out1,v24
  1348. vcipher $out2,$out2,v24
  1349. vcipher $out3,$out3,v24
  1350. vcipher $out4,$out4,v24
  1351. vcipher $out5,$out5,v24
  1352. vcipher $out6,$out6,v24
  1353. vcipher $out7,$out7,v24
  1354. Loop_ctr32_enc8x_middle:
  1355. lvx v24,$x20,$key_ # round[3]
  1356. addi $key_,$key_,0x20
  1357. vcipher $out0,$out0,v25
  1358. vcipher $out1,$out1,v25
  1359. vcipher $out2,$out2,v25
  1360. vcipher $out3,$out3,v25
  1361. vcipher $out4,$out4,v25
  1362. vcipher $out5,$out5,v25
  1363. vcipher $out6,$out6,v25
  1364. vcipher $out7,$out7,v25
  1365. lvx v25,$x10,$key_ # round[4]
  1366. bdnz Loop_ctr32_enc8x
  1367. subic r11,$len,256 # $len-256, borrow $key_
  1368. vcipher $out0,$out0,v24
  1369. vcipher $out1,$out1,v24
  1370. vcipher $out2,$out2,v24
  1371. vcipher $out3,$out3,v24
  1372. vcipher $out4,$out4,v24
  1373. vcipher $out5,$out5,v24
  1374. vcipher $out6,$out6,v24
  1375. vcipher $out7,$out7,v24
  1376. subfe r0,r0,r0 # borrow?-1:0
  1377. vcipher $out0,$out0,v25
  1378. vcipher $out1,$out1,v25
  1379. vcipher $out2,$out2,v25
  1380. vcipher $out3,$out3,v25
  1381. vcipher $out4,$out4,v25
  1382. vcipher $out5,$out5,v25
  1383. vcipher $out6,$out6,v25
  1384. vcipher $out7,$out7,v25
  1385. and r0,r0,r11
  1386. addi $key_,$sp,$FRAME+15 # rewind $key_
  1387. vcipher $out0,$out0,v26
  1388. vcipher $out1,$out1,v26
  1389. vcipher $out2,$out2,v26
  1390. vcipher $out3,$out3,v26
  1391. vcipher $out4,$out4,v26
  1392. vcipher $out5,$out5,v26
  1393. vcipher $out6,$out6,v26
  1394. vcipher $out7,$out7,v26
  1395. lvx v24,$x00,$key_ # re-pre-load round[1]
  1396. subic $len,$len,129 # $len-=129
  1397. vcipher $out0,$out0,v27
  1398. addi $len,$len,1 # $len-=128 really
  1399. vcipher $out1,$out1,v27
  1400. vcipher $out2,$out2,v27
  1401. vcipher $out3,$out3,v27
  1402. vcipher $out4,$out4,v27
  1403. vcipher $out5,$out5,v27
  1404. vcipher $out6,$out6,v27
  1405. vcipher $out7,$out7,v27
  1406. lvx v25,$x10,$key_ # re-pre-load round[2]
  1407. vcipher $out0,$out0,v28
  1408. lvx_u $in0,$x00,$inp # load input
  1409. vcipher $out1,$out1,v28
  1410. lvx_u $in1,$x10,$inp
  1411. vcipher $out2,$out2,v28
  1412. lvx_u $in2,$x20,$inp
  1413. vcipher $out3,$out3,v28
  1414. lvx_u $in3,$x30,$inp
  1415. vcipher $out4,$out4,v28
  1416. lvx_u $in4,$x40,$inp
  1417. vcipher $out5,$out5,v28
  1418. lvx_u $in5,$x50,$inp
  1419. vcipher $out6,$out6,v28
  1420. lvx_u $in6,$x60,$inp
  1421. vcipher $out7,$out7,v28
  1422. lvx_u $in7,$x70,$inp
  1423. addi $inp,$inp,0x80
  1424. vcipher $out0,$out0,v29
  1425. le?vperm $in0,$in0,$in0,$inpperm
  1426. vcipher $out1,$out1,v29
  1427. le?vperm $in1,$in1,$in1,$inpperm
  1428. vcipher $out2,$out2,v29
  1429. le?vperm $in2,$in2,$in2,$inpperm
  1430. vcipher $out3,$out3,v29
  1431. le?vperm $in3,$in3,$in3,$inpperm
  1432. vcipher $out4,$out4,v29
  1433. le?vperm $in4,$in4,$in4,$inpperm
  1434. vcipher $out5,$out5,v29
  1435. le?vperm $in5,$in5,$in5,$inpperm
  1436. vcipher $out6,$out6,v29
  1437. le?vperm $in6,$in6,$in6,$inpperm
  1438. vcipher $out7,$out7,v29
  1439. le?vperm $in7,$in7,$in7,$inpperm
  1440. add $inp,$inp,r0 # $inp is adjusted in such
  1441. # way that at exit from the
  1442. # loop inX-in7 are loaded
  1443. # with last "words"
  1444. subfe. r0,r0,r0 # borrow?-1:0
  1445. vcipher $out0,$out0,v30
  1446. vxor $in0,$in0,v31 # xor with last round key
  1447. vcipher $out1,$out1,v30
  1448. vxor $in1,$in1,v31
  1449. vcipher $out2,$out2,v30
  1450. vxor $in2,$in2,v31
  1451. vcipher $out3,$out3,v30
  1452. vxor $in3,$in3,v31
  1453. vcipher $out4,$out4,v30
  1454. vxor $in4,$in4,v31
  1455. vcipher $out5,$out5,v30
  1456. vxor $in5,$in5,v31
  1457. vcipher $out6,$out6,v30
  1458. vxor $in6,$in6,v31
  1459. vcipher $out7,$out7,v30
  1460. vxor $in7,$in7,v31
  1461. bne Lctr32_enc8x_break # did $len-129 borrow?
  1462. vcipherlast $in0,$out0,$in0
  1463. vcipherlast $in1,$out1,$in1
  1464. vadduwm $out1,$ivec,$one # counter values ...
  1465. vcipherlast $in2,$out2,$in2
  1466. vadduwm $out2,$ivec,$two
  1467. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1468. vcipherlast $in3,$out3,$in3
  1469. vadduwm $out3,$out1,$two
  1470. vxor $out1,$out1,$rndkey0
  1471. vcipherlast $in4,$out4,$in4
  1472. vadduwm $out4,$out2,$two
  1473. vxor $out2,$out2,$rndkey0
  1474. vcipherlast $in5,$out5,$in5
  1475. vadduwm $out5,$out3,$two
  1476. vxor $out3,$out3,$rndkey0
  1477. vcipherlast $in6,$out6,$in6
  1478. vadduwm $out6,$out4,$two
  1479. vxor $out4,$out4,$rndkey0
  1480. vcipherlast $in7,$out7,$in7
  1481. vadduwm $out7,$out5,$two
  1482. vxor $out5,$out5,$rndkey0
  1483. le?vperm $in0,$in0,$in0,$inpperm
  1484. vadduwm $ivec,$out6,$two # next counter value
  1485. vxor $out6,$out6,$rndkey0
  1486. le?vperm $in1,$in1,$in1,$inpperm
  1487. vxor $out7,$out7,$rndkey0
  1488. mtctr $rounds
  1489. vcipher $out0,$out0,v24
  1490. stvx_u $in0,$x00,$out
  1491. le?vperm $in2,$in2,$in2,$inpperm
  1492. vcipher $out1,$out1,v24
  1493. stvx_u $in1,$x10,$out
  1494. le?vperm $in3,$in3,$in3,$inpperm
  1495. vcipher $out2,$out2,v24
  1496. stvx_u $in2,$x20,$out
  1497. le?vperm $in4,$in4,$in4,$inpperm
  1498. vcipher $out3,$out3,v24
  1499. stvx_u $in3,$x30,$out
  1500. le?vperm $in5,$in5,$in5,$inpperm
  1501. vcipher $out4,$out4,v24
  1502. stvx_u $in4,$x40,$out
  1503. le?vperm $in6,$in6,$in6,$inpperm
  1504. vcipher $out5,$out5,v24
  1505. stvx_u $in5,$x50,$out
  1506. le?vperm $in7,$in7,$in7,$inpperm
  1507. vcipher $out6,$out6,v24
  1508. stvx_u $in6,$x60,$out
  1509. vcipher $out7,$out7,v24
  1510. stvx_u $in7,$x70,$out
  1511. addi $out,$out,0x80
  1512. b Loop_ctr32_enc8x_middle
  1513. .align 5
  1514. Lctr32_enc8x_break:
  1515. cmpwi $len,-0x60
  1516. blt Lctr32_enc8x_one
  1517. nop
  1518. beq Lctr32_enc8x_two
  1519. cmpwi $len,-0x40
  1520. blt Lctr32_enc8x_three
  1521. nop
  1522. beq Lctr32_enc8x_four
  1523. cmpwi $len,-0x20
  1524. blt Lctr32_enc8x_five
  1525. nop
  1526. beq Lctr32_enc8x_six
  1527. cmpwi $len,0x00
  1528. blt Lctr32_enc8x_seven
  1529. Lctr32_enc8x_eight:
  1530. vcipherlast $out0,$out0,$in0
  1531. vcipherlast $out1,$out1,$in1
  1532. vcipherlast $out2,$out2,$in2
  1533. vcipherlast $out3,$out3,$in3
  1534. vcipherlast $out4,$out4,$in4
  1535. vcipherlast $out5,$out5,$in5
  1536. vcipherlast $out6,$out6,$in6
  1537. vcipherlast $out7,$out7,$in7
  1538. le?vperm $out0,$out0,$out0,$inpperm
  1539. le?vperm $out1,$out1,$out1,$inpperm
  1540. stvx_u $out0,$x00,$out
  1541. le?vperm $out2,$out2,$out2,$inpperm
  1542. stvx_u $out1,$x10,$out
  1543. le?vperm $out3,$out3,$out3,$inpperm
  1544. stvx_u $out2,$x20,$out
  1545. le?vperm $out4,$out4,$out4,$inpperm
  1546. stvx_u $out3,$x30,$out
  1547. le?vperm $out5,$out5,$out5,$inpperm
  1548. stvx_u $out4,$x40,$out
  1549. le?vperm $out6,$out6,$out6,$inpperm
  1550. stvx_u $out5,$x50,$out
  1551. le?vperm $out7,$out7,$out7,$inpperm
  1552. stvx_u $out6,$x60,$out
  1553. stvx_u $out7,$x70,$out
  1554. addi $out,$out,0x80
  1555. b Lctr32_enc8x_done
  1556. .align 5
  1557. Lctr32_enc8x_seven:
  1558. vcipherlast $out0,$out0,$in1
  1559. vcipherlast $out1,$out1,$in2
  1560. vcipherlast $out2,$out2,$in3
  1561. vcipherlast $out3,$out3,$in4
  1562. vcipherlast $out4,$out4,$in5
  1563. vcipherlast $out5,$out5,$in6
  1564. vcipherlast $out6,$out6,$in7
  1565. le?vperm $out0,$out0,$out0,$inpperm
  1566. le?vperm $out1,$out1,$out1,$inpperm
  1567. stvx_u $out0,$x00,$out
  1568. le?vperm $out2,$out2,$out2,$inpperm
  1569. stvx_u $out1,$x10,$out
  1570. le?vperm $out3,$out3,$out3,$inpperm
  1571. stvx_u $out2,$x20,$out
  1572. le?vperm $out4,$out4,$out4,$inpperm
  1573. stvx_u $out3,$x30,$out
  1574. le?vperm $out5,$out5,$out5,$inpperm
  1575. stvx_u $out4,$x40,$out
  1576. le?vperm $out6,$out6,$out6,$inpperm
  1577. stvx_u $out5,$x50,$out
  1578. stvx_u $out6,$x60,$out
  1579. addi $out,$out,0x70
  1580. b Lctr32_enc8x_done
  1581. .align 5
  1582. Lctr32_enc8x_six:
  1583. vcipherlast $out0,$out0,$in2
  1584. vcipherlast $out1,$out1,$in3
  1585. vcipherlast $out2,$out2,$in4
  1586. vcipherlast $out3,$out3,$in5
  1587. vcipherlast $out4,$out4,$in6
  1588. vcipherlast $out5,$out5,$in7
  1589. le?vperm $out0,$out0,$out0,$inpperm
  1590. le?vperm $out1,$out1,$out1,$inpperm
  1591. stvx_u $out0,$x00,$out
  1592. le?vperm $out2,$out2,$out2,$inpperm
  1593. stvx_u $out1,$x10,$out
  1594. le?vperm $out3,$out3,$out3,$inpperm
  1595. stvx_u $out2,$x20,$out
  1596. le?vperm $out4,$out4,$out4,$inpperm
  1597. stvx_u $out3,$x30,$out
  1598. le?vperm $out5,$out5,$out5,$inpperm
  1599. stvx_u $out4,$x40,$out
  1600. stvx_u $out5,$x50,$out
  1601. addi $out,$out,0x60
  1602. b Lctr32_enc8x_done
  1603. .align 5
  1604. Lctr32_enc8x_five:
  1605. vcipherlast $out0,$out0,$in3
  1606. vcipherlast $out1,$out1,$in4
  1607. vcipherlast $out2,$out2,$in5
  1608. vcipherlast $out3,$out3,$in6
  1609. vcipherlast $out4,$out4,$in7
  1610. le?vperm $out0,$out0,$out0,$inpperm
  1611. le?vperm $out1,$out1,$out1,$inpperm
  1612. stvx_u $out0,$x00,$out
  1613. le?vperm $out2,$out2,$out2,$inpperm
  1614. stvx_u $out1,$x10,$out
  1615. le?vperm $out3,$out3,$out3,$inpperm
  1616. stvx_u $out2,$x20,$out
  1617. le?vperm $out4,$out4,$out4,$inpperm
  1618. stvx_u $out3,$x30,$out
  1619. stvx_u $out4,$x40,$out
  1620. addi $out,$out,0x50
  1621. b Lctr32_enc8x_done
  1622. .align 5
  1623. Lctr32_enc8x_four:
  1624. vcipherlast $out0,$out0,$in4
  1625. vcipherlast $out1,$out1,$in5
  1626. vcipherlast $out2,$out2,$in6
  1627. vcipherlast $out3,$out3,$in7
  1628. le?vperm $out0,$out0,$out0,$inpperm
  1629. le?vperm $out1,$out1,$out1,$inpperm
  1630. stvx_u $out0,$x00,$out
  1631. le?vperm $out2,$out2,$out2,$inpperm
  1632. stvx_u $out1,$x10,$out
  1633. le?vperm $out3,$out3,$out3,$inpperm
  1634. stvx_u $out2,$x20,$out
  1635. stvx_u $out3,$x30,$out
  1636. addi $out,$out,0x40
  1637. b Lctr32_enc8x_done
  1638. .align 5
  1639. Lctr32_enc8x_three:
  1640. vcipherlast $out0,$out0,$in5
  1641. vcipherlast $out1,$out1,$in6
  1642. vcipherlast $out2,$out2,$in7
  1643. le?vperm $out0,$out0,$out0,$inpperm
  1644. le?vperm $out1,$out1,$out1,$inpperm
  1645. stvx_u $out0,$x00,$out
  1646. le?vperm $out2,$out2,$out2,$inpperm
  1647. stvx_u $out1,$x10,$out
  1648. stvx_u $out2,$x20,$out
  1649. addi $out,$out,0x30
  1650. b Lcbc_dec8x_done
  1651. .align 5
  1652. Lctr32_enc8x_two:
  1653. vcipherlast $out0,$out0,$in6
  1654. vcipherlast $out1,$out1,$in7
  1655. le?vperm $out0,$out0,$out0,$inpperm
  1656. le?vperm $out1,$out1,$out1,$inpperm
  1657. stvx_u $out0,$x00,$out
  1658. stvx_u $out1,$x10,$out
  1659. addi $out,$out,0x20
  1660. b Lcbc_dec8x_done
  1661. .align 5
  1662. Lctr32_enc8x_one:
  1663. vcipherlast $out0,$out0,$in7
  1664. le?vperm $out0,$out0,$out0,$inpperm
  1665. stvx_u $out0,0,$out
  1666. addi $out,$out,0x10
  1667. Lctr32_enc8x_done:
  1668. li r10,`$FRAME+15`
  1669. li r11,`$FRAME+31`
  1670. stvx $inpperm,r10,$sp # wipe copies of round keys
  1671. addi r10,r10,32
  1672. stvx $inpperm,r11,$sp
  1673. addi r11,r11,32
  1674. stvx $inpperm,r10,$sp
  1675. addi r10,r10,32
  1676. stvx $inpperm,r11,$sp
  1677. addi r11,r11,32
  1678. stvx $inpperm,r10,$sp
  1679. addi r10,r10,32
  1680. stvx $inpperm,r11,$sp
  1681. addi r11,r11,32
  1682. stvx $inpperm,r10,$sp
  1683. addi r10,r10,32
  1684. stvx $inpperm,r11,$sp
  1685. addi r11,r11,32
  1686. mtspr 256,$vrsave
  1687. lvx v20,r10,$sp # ABI says so
  1688. addi r10,r10,32
  1689. lvx v21,r11,$sp
  1690. addi r11,r11,32
  1691. lvx v22,r10,$sp
  1692. addi r10,r10,32
  1693. lvx v23,r11,$sp
  1694. addi r11,r11,32
  1695. lvx v24,r10,$sp
  1696. addi r10,r10,32
  1697. lvx v25,r11,$sp
  1698. addi r11,r11,32
  1699. lvx v26,r10,$sp
  1700. addi r10,r10,32
  1701. lvx v27,r11,$sp
  1702. addi r11,r11,32
  1703. lvx v28,r10,$sp
  1704. addi r10,r10,32
  1705. lvx v29,r11,$sp
  1706. addi r11,r11,32
  1707. lvx v30,r10,$sp
  1708. lvx v31,r11,$sp
  1709. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1710. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1711. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1712. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1713. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1714. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1715. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1716. blr
  1717. .long 0
  1718. .byte 0,12,0x04,0,0x80,6,6,0
  1719. .long 0
  1720. .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
  1721. ___
  1722. }} }}}
  1723. #########################################################################
  1724. {{{ # XTS procedures #
  1725. my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
  1726. my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
  1727. my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
  1728. my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
  1729. my $taillen = $key2;
  1730. ($inp,$idx) = ($idx,$inp); # reassign
  1731. $code.=<<___;
  1732. .globl .${prefix}_xts_encrypt
  1733. .align 5
  1734. .${prefix}_xts_encrypt:
  1735. mr $inp,r3 # reassign
  1736. li r3,-1
  1737. ${UCMP}i $len,16
  1738. bltlr-
  1739. lis r0,0xfff0
  1740. mfspr r12,256 # save vrsave
  1741. li r11,0
  1742. mtspr 256,r0
  1743. vspltisb $seven,0x07 # 0x070707..07
  1744. le?lvsl $leperm,r11,r11
  1745. le?vspltisb $tmp,0x0f
  1746. le?vxor $leperm,$leperm,$seven
  1747. li $idx,15
  1748. lvx $tweak,0,$ivp # load [unaligned] iv
  1749. lvsl $inpperm,0,$ivp
  1750. lvx $inptail,$idx,$ivp
  1751. le?vxor $inpperm,$inpperm,$tmp
  1752. vperm $tweak,$tweak,$inptail,$inpperm
  1753. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1754. lwz $rounds,240($key2)
  1755. srwi $rounds,$rounds,1
  1756. subi $rounds,$rounds,1
  1757. li $idx,16
  1758. neg r11,$inp
  1759. lvsr $inpperm,0,r11 # prepare for unaligned load
  1760. lvx $inout,0,$inp
  1761. addi $inp,$inp,15 # 15 is not typo
  1762. le?vxor $inpperm,$inpperm,$tmp
  1763. lvx $rndkey0,0,$key2
  1764. lvx $rndkey1,$idx,$key2
  1765. addi $idx,$idx,16
  1766. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1767. vxor $tweak,$tweak,$rndkey0
  1768. lvx $rndkey0,$idx,$key2
  1769. addi $idx,$idx,16
  1770. mtctr $rounds
  1771. Ltweak_xts_enc:
  1772. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1773. vcipher $tweak,$tweak,$rndkey1
  1774. lvx $rndkey1,$idx,$key2
  1775. addi $idx,$idx,16
  1776. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1777. vcipher $tweak,$tweak,$rndkey0
  1778. lvx $rndkey0,$idx,$key2
  1779. addi $idx,$idx,16
  1780. bdnz Ltweak_xts_enc
  1781. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1782. vcipher $tweak,$tweak,$rndkey1
  1783. lvx $rndkey1,$idx,$key2
  1784. li $idx,16
  1785. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1786. vcipherlast $tweak,$tweak,$rndkey0
  1787. lvx $inptail,0,$inp
  1788. addi $inp,$inp,16
  1789. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  1790. lwz $rounds,240($key1)
  1791. srwi $rounds,$rounds,1
  1792. subi $rounds,$rounds,1
  1793. li $idx,16
  1794. vslb $eighty7,$seven,$seven # 0x808080..80
  1795. vor $eighty7,$eighty7,$seven # 0x878787..87
  1796. vspltisb $tmp,1 # 0x010101..01
  1797. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  1798. ${UCMP}i $len,96
  1799. bge _aesp8_xts_encrypt6x
  1800. andi. $taillen,$len,15
  1801. subic r0,$len,32
  1802. subi $taillen,$taillen,16
  1803. subfe r0,r0,r0
  1804. and r0,r0,$taillen
  1805. add $inp,$inp,r0
  1806. lvx $rndkey0,0,$key1
  1807. lvx $rndkey1,$idx,$key1
  1808. addi $idx,$idx,16
  1809. vperm $inout,$inout,$inptail,$inpperm
  1810. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1811. vxor $inout,$inout,$tweak
  1812. vxor $inout,$inout,$rndkey0
  1813. lvx $rndkey0,$idx,$key1
  1814. addi $idx,$idx,16
  1815. mtctr $rounds
  1816. b Loop_xts_enc
  1817. .align 5
  1818. Loop_xts_enc:
  1819. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1820. vcipher $inout,$inout,$rndkey1
  1821. lvx $rndkey1,$idx,$key1
  1822. addi $idx,$idx,16
  1823. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1824. vcipher $inout,$inout,$rndkey0
  1825. lvx $rndkey0,$idx,$key1
  1826. addi $idx,$idx,16
  1827. bdnz Loop_xts_enc
  1828. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1829. vcipher $inout,$inout,$rndkey1
  1830. lvx $rndkey1,$idx,$key1
  1831. li $idx,16
  1832. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1833. vxor $rndkey0,$rndkey0,$tweak
  1834. vcipherlast $output,$inout,$rndkey0
  1835. le?vperm $tmp,$output,$output,$leperm
  1836. be?nop
  1837. le?stvx_u $tmp,0,$out
  1838. be?stvx_u $output,0,$out
  1839. addi $out,$out,16
  1840. subic. $len,$len,16
  1841. beq Lxts_enc_done
  1842. vmr $inout,$inptail
  1843. lvx $inptail,0,$inp
  1844. addi $inp,$inp,16
  1845. lvx $rndkey0,0,$key1
  1846. lvx $rndkey1,$idx,$key1
  1847. addi $idx,$idx,16
  1848. subic r0,$len,32
  1849. subfe r0,r0,r0
  1850. and r0,r0,$taillen
  1851. add $inp,$inp,r0
  1852. vsrab $tmp,$tweak,$seven # next tweak value
  1853. vaddubm $tweak,$tweak,$tweak
  1854. vsldoi $tmp,$tmp,$tmp,15
  1855. vand $tmp,$tmp,$eighty7
  1856. vxor $tweak,$tweak,$tmp
  1857. vperm $inout,$inout,$inptail,$inpperm
  1858. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1859. vxor $inout,$inout,$tweak
  1860. vxor $output,$output,$rndkey0 # just in case $len<16
  1861. vxor $inout,$inout,$rndkey0
  1862. lvx $rndkey0,$idx,$key1
  1863. addi $idx,$idx,16
  1864. mtctr $rounds
  1865. ${UCMP}i $len,16
  1866. bge Loop_xts_enc
  1867. vxor $output,$output,$tweak
  1868. lvsr $inpperm,0,$len # $inpperm is no longer needed
  1869. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  1870. vspltisb $tmp,-1
  1871. vperm $inptail,$inptail,$tmp,$inpperm
  1872. vsel $inout,$inout,$output,$inptail
  1873. subi r11,$out,17
  1874. subi $out,$out,16
  1875. mtctr $len
  1876. li $len,16
  1877. Loop_xts_enc_steal:
  1878. lbzu r0,1(r11)
  1879. stb r0,16(r11)
  1880. bdnz Loop_xts_enc_steal
  1881. mtctr $rounds
  1882. b Loop_xts_enc # one more time...
  1883. Lxts_enc_done:
  1884. mtspr 256,r12 # restore vrsave
  1885. li r3,0
  1886. blr
  1887. .long 0
  1888. .byte 0,12,0x04,0,0x80,6,6,0
  1889. .long 0
  1890. .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
  1891. .globl .${prefix}_xts_decrypt
  1892. .align 5
  1893. .${prefix}_xts_decrypt:
  1894. mr $inp,r3 # reassign
  1895. li r3,-1
  1896. ${UCMP}i $len,16
  1897. bltlr-
  1898. lis r0,0xfff8
  1899. mfspr r12,256 # save vrsave
  1900. li r11,0
  1901. mtspr 256,r0
  1902. andi. r0,$len,15
  1903. neg r0,r0
  1904. andi. r0,r0,16
  1905. sub $len,$len,r0
  1906. vspltisb $seven,0x07 # 0x070707..07
  1907. le?lvsl $leperm,r11,r11
  1908. le?vspltisb $tmp,0x0f
  1909. le?vxor $leperm,$leperm,$seven
  1910. li $idx,15
  1911. lvx $tweak,0,$ivp # load [unaligned] iv
  1912. lvsl $inpperm,0,$ivp
  1913. lvx $inptail,$idx,$ivp
  1914. le?vxor $inpperm,$inpperm,$tmp
  1915. vperm $tweak,$tweak,$inptail,$inpperm
  1916. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1917. lwz $rounds,240($key2)
  1918. srwi $rounds,$rounds,1
  1919. subi $rounds,$rounds,1
  1920. li $idx,16
  1921. neg r11,$inp
  1922. lvsr $inpperm,0,r11 # prepare for unaligned load
  1923. lvx $inout,0,$inp
  1924. addi $inp,$inp,15 # 15 is not typo
  1925. le?vxor $inpperm,$inpperm,$tmp
  1926. lvx $rndkey0,0,$key2
  1927. lvx $rndkey1,$idx,$key2
  1928. addi $idx,$idx,16
  1929. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1930. vxor $tweak,$tweak,$rndkey0
  1931. lvx $rndkey0,$idx,$key2
  1932. addi $idx,$idx,16
  1933. mtctr $rounds
  1934. Ltweak_xts_dec:
  1935. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1936. vcipher $tweak,$tweak,$rndkey1
  1937. lvx $rndkey1,$idx,$key2
  1938. addi $idx,$idx,16
  1939. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1940. vcipher $tweak,$tweak,$rndkey0
  1941. lvx $rndkey0,$idx,$key2
  1942. addi $idx,$idx,16
  1943. bdnz Ltweak_xts_dec
  1944. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1945. vcipher $tweak,$tweak,$rndkey1
  1946. lvx $rndkey1,$idx,$key2
  1947. li $idx,16
  1948. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1949. vcipherlast $tweak,$tweak,$rndkey0
  1950. lvx $inptail,0,$inp
  1951. addi $inp,$inp,16
  1952. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  1953. lwz $rounds,240($key1)
  1954. srwi $rounds,$rounds,1
  1955. subi $rounds,$rounds,1
  1956. li $idx,16
  1957. vslb $eighty7,$seven,$seven # 0x808080..80
  1958. vor $eighty7,$eighty7,$seven # 0x878787..87
  1959. vspltisb $tmp,1 # 0x010101..01
  1960. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  1961. ${UCMP}i $len,96
  1962. bge _aesp8_xts_decrypt6x
  1963. lvx $rndkey0,0,$key1
  1964. lvx $rndkey1,$idx,$key1
  1965. addi $idx,$idx,16
  1966. vperm $inout,$inout,$inptail,$inpperm
  1967. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1968. vxor $inout,$inout,$tweak
  1969. vxor $inout,$inout,$rndkey0
  1970. lvx $rndkey0,$idx,$key1
  1971. addi $idx,$idx,16
  1972. mtctr $rounds
  1973. ${UCMP}i $len,16
  1974. blt Ltail_xts_dec
  1975. be?b Loop_xts_dec
  1976. .align 5
  1977. Loop_xts_dec:
  1978. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1979. vncipher $inout,$inout,$rndkey1
  1980. lvx $rndkey1,$idx,$key1
  1981. addi $idx,$idx,16
  1982. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1983. vncipher $inout,$inout,$rndkey0
  1984. lvx $rndkey0,$idx,$key1
  1985. addi $idx,$idx,16
  1986. bdnz Loop_xts_dec
  1987. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1988. vncipher $inout,$inout,$rndkey1
  1989. lvx $rndkey1,$idx,$key1
  1990. li $idx,16
  1991. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1992. vxor $rndkey0,$rndkey0,$tweak
  1993. vncipherlast $output,$inout,$rndkey0
  1994. le?vperm $tmp,$output,$output,$leperm
  1995. be?nop
  1996. le?stvx_u $tmp,0,$out
  1997. be?stvx_u $output,0,$out
  1998. addi $out,$out,16
  1999. subic. $len,$len,16
  2000. beq Lxts_dec_done
  2001. vmr $inout,$inptail
  2002. lvx $inptail,0,$inp
  2003. addi $inp,$inp,16
  2004. lvx $rndkey0,0,$key1
  2005. lvx $rndkey1,$idx,$key1
  2006. addi $idx,$idx,16
  2007. vsrab $tmp,$tweak,$seven # next tweak value
  2008. vaddubm $tweak,$tweak,$tweak
  2009. vsldoi $tmp,$tmp,$tmp,15
  2010. vand $tmp,$tmp,$eighty7
  2011. vxor $tweak,$tweak,$tmp
  2012. vperm $inout,$inout,$inptail,$inpperm
  2013. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2014. vxor $inout,$inout,$tweak
  2015. vxor $inout,$inout,$rndkey0
  2016. lvx $rndkey0,$idx,$key1
  2017. addi $idx,$idx,16
  2018. mtctr $rounds
  2019. ${UCMP}i $len,16
  2020. bge Loop_xts_dec
  2021. Ltail_xts_dec:
  2022. vsrab $tmp,$tweak,$seven # next tweak value
  2023. vaddubm $tweak1,$tweak,$tweak
  2024. vsldoi $tmp,$tmp,$tmp,15
  2025. vand $tmp,$tmp,$eighty7
  2026. vxor $tweak1,$tweak1,$tmp
  2027. subi $inp,$inp,16
  2028. add $inp,$inp,$len
  2029. vxor $inout,$inout,$tweak # :-(
  2030. vxor $inout,$inout,$tweak1 # :-)
  2031. Loop_xts_dec_short:
  2032. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2033. vncipher $inout,$inout,$rndkey1
  2034. lvx $rndkey1,$idx,$key1
  2035. addi $idx,$idx,16
  2036. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2037. vncipher $inout,$inout,$rndkey0
  2038. lvx $rndkey0,$idx,$key1
  2039. addi $idx,$idx,16
  2040. bdnz Loop_xts_dec_short
  2041. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2042. vncipher $inout,$inout,$rndkey1
  2043. lvx $rndkey1,$idx,$key1
  2044. li $idx,16
  2045. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2046. vxor $rndkey0,$rndkey0,$tweak1
  2047. vncipherlast $output,$inout,$rndkey0
  2048. le?vperm $tmp,$output,$output,$leperm
  2049. be?nop
  2050. le?stvx_u $tmp,0,$out
  2051. be?stvx_u $output,0,$out
  2052. vmr $inout,$inptail
  2053. lvx $inptail,0,$inp
  2054. #addi $inp,$inp,16
  2055. lvx $rndkey0,0,$key1
  2056. lvx $rndkey1,$idx,$key1
  2057. addi $idx,$idx,16
  2058. vperm $inout,$inout,$inptail,$inpperm
  2059. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2060. lvsr $inpperm,0,$len # $inpperm is no longer needed
  2061. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  2062. vspltisb $tmp,-1
  2063. vperm $inptail,$inptail,$tmp,$inpperm
  2064. vsel $inout,$inout,$output,$inptail
  2065. vxor $rndkey0,$rndkey0,$tweak
  2066. vxor $inout,$inout,$rndkey0
  2067. lvx $rndkey0,$idx,$key1
  2068. addi $idx,$idx,16
  2069. subi r11,$out,1
  2070. mtctr $len
  2071. li $len,16
  2072. Loop_xts_dec_steal:
  2073. lbzu r0,1(r11)
  2074. stb r0,16(r11)
  2075. bdnz Loop_xts_dec_steal
  2076. mtctr $rounds
  2077. b Loop_xts_dec # one more time...
  2078. Lxts_dec_done:
  2079. mtspr 256,r12 # restore vrsave
  2080. li r3,0
  2081. blr
  2082. .long 0
  2083. .byte 0,12,0x04,0,0x80,6,6,0
  2084. .long 0
  2085. .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
  2086. ___
  2087. #########################################################################
  2088. {{ # Optimized XTS procedures #
  2089. my $key_="r11";
  2090. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  2091. $x00=0 if ($flavour =~ /osx/);
  2092. my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
  2093. my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
  2094. my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
  2095. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  2096. # v26-v31 last 6 round keys
  2097. my ($keyperm)=($out0); # aliases with "caller", redundant assignment
  2098. my $taillen=$x70;
  2099. $code.=<<___;
  2100. .align 5
  2101. _aesp8_xts_encrypt6x:
  2102. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2103. mflr r0
  2104. li r7,`$FRAME+8*16+15`
  2105. li r8,`$FRAME+8*16+31`
  2106. $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2107. stvx v20,r7,$sp # ABI says so
  2108. addi r7,r7,32
  2109. stvx v21,r8,$sp
  2110. addi r8,r8,32
  2111. stvx v22,r7,$sp
  2112. addi r7,r7,32
  2113. stvx v23,r8,$sp
  2114. addi r8,r8,32
  2115. stvx v24,r7,$sp
  2116. addi r7,r7,32
  2117. stvx v25,r8,$sp
  2118. addi r8,r8,32
  2119. stvx v26,r7,$sp
  2120. addi r7,r7,32
  2121. stvx v27,r8,$sp
  2122. addi r8,r8,32
  2123. stvx v28,r7,$sp
  2124. addi r7,r7,32
  2125. stvx v29,r8,$sp
  2126. addi r8,r8,32
  2127. stvx v30,r7,$sp
  2128. stvx v31,r8,$sp
  2129. mr r7,r0
  2130. li r0,-1
  2131. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2132. li $x10,0x10
  2133. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2134. li $x20,0x20
  2135. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2136. li $x30,0x30
  2137. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2138. li $x40,0x40
  2139. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2140. li $x50,0x50
  2141. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2142. li $x60,0x60
  2143. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2144. li $x70,0x70
  2145. mtspr 256,r0
  2146. subi $rounds,$rounds,3 # -4 in total
  2147. lvx $rndkey0,$x00,$key1 # load key schedule
  2148. lvx v30,$x10,$key1
  2149. addi $key1,$key1,0x20
  2150. lvx v31,$x00,$key1
  2151. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2152. addi $key_,$sp,$FRAME+15
  2153. mtctr $rounds
  2154. Load_xts_enc_key:
  2155. ?vperm v24,v30,v31,$keyperm
  2156. lvx v30,$x10,$key1
  2157. addi $key1,$key1,0x20
  2158. stvx v24,$x00,$key_ # off-load round[1]
  2159. ?vperm v25,v31,v30,$keyperm
  2160. lvx v31,$x00,$key1
  2161. stvx v25,$x10,$key_ # off-load round[2]
  2162. addi $key_,$key_,0x20
  2163. bdnz Load_xts_enc_key
  2164. lvx v26,$x10,$key1
  2165. ?vperm v24,v30,v31,$keyperm
  2166. lvx v27,$x20,$key1
  2167. stvx v24,$x00,$key_ # off-load round[3]
  2168. ?vperm v25,v31,v26,$keyperm
  2169. lvx v28,$x30,$key1
  2170. stvx v25,$x10,$key_ # off-load round[4]
  2171. addi $key_,$sp,$FRAME+15 # rewind $key_
  2172. ?vperm v26,v26,v27,$keyperm
  2173. lvx v29,$x40,$key1
  2174. ?vperm v27,v27,v28,$keyperm
  2175. lvx v30,$x50,$key1
  2176. ?vperm v28,v28,v29,$keyperm
  2177. lvx v31,$x60,$key1
  2178. ?vperm v29,v29,v30,$keyperm
  2179. lvx $twk5,$x70,$key1 # borrow $twk5
  2180. ?vperm v30,v30,v31,$keyperm
  2181. lvx v24,$x00,$key_ # pre-load round[1]
  2182. ?vperm v31,v31,$twk5,$keyperm
  2183. lvx v25,$x10,$key_ # pre-load round[2]
  2184. vperm $in0,$inout,$inptail,$inpperm
  2185. subi $inp,$inp,31 # undo "caller"
  2186. vxor $twk0,$tweak,$rndkey0
  2187. vsrab $tmp,$tweak,$seven # next tweak value
  2188. vaddubm $tweak,$tweak,$tweak
  2189. vsldoi $tmp,$tmp,$tmp,15
  2190. vand $tmp,$tmp,$eighty7
  2191. vxor $out0,$in0,$twk0
  2192. vxor $tweak,$tweak,$tmp
  2193. lvx_u $in1,$x10,$inp
  2194. vxor $twk1,$tweak,$rndkey0
  2195. vsrab $tmp,$tweak,$seven # next tweak value
  2196. vaddubm $tweak,$tweak,$tweak
  2197. vsldoi $tmp,$tmp,$tmp,15
  2198. le?vperm $in1,$in1,$in1,$leperm
  2199. vand $tmp,$tmp,$eighty7
  2200. vxor $out1,$in1,$twk1
  2201. vxor $tweak,$tweak,$tmp
  2202. lvx_u $in2,$x20,$inp
  2203. andi. $taillen,$len,15
  2204. vxor $twk2,$tweak,$rndkey0
  2205. vsrab $tmp,$tweak,$seven # next tweak value
  2206. vaddubm $tweak,$tweak,$tweak
  2207. vsldoi $tmp,$tmp,$tmp,15
  2208. le?vperm $in2,$in2,$in2,$leperm
  2209. vand $tmp,$tmp,$eighty7
  2210. vxor $out2,$in2,$twk2
  2211. vxor $tweak,$tweak,$tmp
  2212. lvx_u $in3,$x30,$inp
  2213. sub $len,$len,$taillen
  2214. vxor $twk3,$tweak,$rndkey0
  2215. vsrab $tmp,$tweak,$seven # next tweak value
  2216. vaddubm $tweak,$tweak,$tweak
  2217. vsldoi $tmp,$tmp,$tmp,15
  2218. le?vperm $in3,$in3,$in3,$leperm
  2219. vand $tmp,$tmp,$eighty7
  2220. vxor $out3,$in3,$twk3
  2221. vxor $tweak,$tweak,$tmp
  2222. lvx_u $in4,$x40,$inp
  2223. subi $len,$len,0x60
  2224. vxor $twk4,$tweak,$rndkey0
  2225. vsrab $tmp,$tweak,$seven # next tweak value
  2226. vaddubm $tweak,$tweak,$tweak
  2227. vsldoi $tmp,$tmp,$tmp,15
  2228. le?vperm $in4,$in4,$in4,$leperm
  2229. vand $tmp,$tmp,$eighty7
  2230. vxor $out4,$in4,$twk4
  2231. vxor $tweak,$tweak,$tmp
  2232. lvx_u $in5,$x50,$inp
  2233. addi $inp,$inp,0x60
  2234. vxor $twk5,$tweak,$rndkey0
  2235. vsrab $tmp,$tweak,$seven # next tweak value
  2236. vaddubm $tweak,$tweak,$tweak
  2237. vsldoi $tmp,$tmp,$tmp,15
  2238. le?vperm $in5,$in5,$in5,$leperm
  2239. vand $tmp,$tmp,$eighty7
  2240. vxor $out5,$in5,$twk5
  2241. vxor $tweak,$tweak,$tmp
  2242. vxor v31,v31,$rndkey0
  2243. mtctr $rounds
  2244. b Loop_xts_enc6x
  2245. .align 5
  2246. Loop_xts_enc6x:
  2247. vcipher $out0,$out0,v24
  2248. vcipher $out1,$out1,v24
  2249. vcipher $out2,$out2,v24
  2250. vcipher $out3,$out3,v24
  2251. vcipher $out4,$out4,v24
  2252. vcipher $out5,$out5,v24
  2253. lvx v24,$x20,$key_ # round[3]
  2254. addi $key_,$key_,0x20
  2255. vcipher $out0,$out0,v25
  2256. vcipher $out1,$out1,v25
  2257. vcipher $out2,$out2,v25
  2258. vcipher $out3,$out3,v25
  2259. vcipher $out4,$out4,v25
  2260. vcipher $out5,$out5,v25
  2261. lvx v25,$x10,$key_ # round[4]
  2262. bdnz Loop_xts_enc6x
  2263. subic $len,$len,96 # $len-=96
  2264. vxor $in0,$twk0,v31 # xor with last round key
  2265. vcipher $out0,$out0,v24
  2266. vcipher $out1,$out1,v24
  2267. vsrab $tmp,$tweak,$seven # next tweak value
  2268. vxor $twk0,$tweak,$rndkey0
  2269. vaddubm $tweak,$tweak,$tweak
  2270. vcipher $out2,$out2,v24
  2271. vcipher $out3,$out3,v24
  2272. vsldoi $tmp,$tmp,$tmp,15
  2273. vcipher $out4,$out4,v24
  2274. vcipher $out5,$out5,v24
  2275. subfe. r0,r0,r0 # borrow?-1:0
  2276. vand $tmp,$tmp,$eighty7
  2277. vcipher $out0,$out0,v25
  2278. vcipher $out1,$out1,v25
  2279. vxor $tweak,$tweak,$tmp
  2280. vcipher $out2,$out2,v25
  2281. vcipher $out3,$out3,v25
  2282. vxor $in1,$twk1,v31
  2283. vsrab $tmp,$tweak,$seven # next tweak value
  2284. vxor $twk1,$tweak,$rndkey0
  2285. vcipher $out4,$out4,v25
  2286. vcipher $out5,$out5,v25
  2287. and r0,r0,$len
  2288. vaddubm $tweak,$tweak,$tweak
  2289. vsldoi $tmp,$tmp,$tmp,15
  2290. vcipher $out0,$out0,v26
  2291. vcipher $out1,$out1,v26
  2292. vand $tmp,$tmp,$eighty7
  2293. vcipher $out2,$out2,v26
  2294. vcipher $out3,$out3,v26
  2295. vxor $tweak,$tweak,$tmp
  2296. vcipher $out4,$out4,v26
  2297. vcipher $out5,$out5,v26
  2298. add $inp,$inp,r0 # $inp is adjusted in such
  2299. # way that at exit from the
  2300. # loop inX-in5 are loaded
  2301. # with last "words"
  2302. vxor $in2,$twk2,v31
  2303. vsrab $tmp,$tweak,$seven # next tweak value
  2304. vxor $twk2,$tweak,$rndkey0
  2305. vaddubm $tweak,$tweak,$tweak
  2306. vcipher $out0,$out0,v27
  2307. vcipher $out1,$out1,v27
  2308. vsldoi $tmp,$tmp,$tmp,15
  2309. vcipher $out2,$out2,v27
  2310. vcipher $out3,$out3,v27
  2311. vand $tmp,$tmp,$eighty7
  2312. vcipher $out4,$out4,v27
  2313. vcipher $out5,$out5,v27
  2314. addi $key_,$sp,$FRAME+15 # rewind $key_
  2315. vxor $tweak,$tweak,$tmp
  2316. vcipher $out0,$out0,v28
  2317. vcipher $out1,$out1,v28
  2318. vxor $in3,$twk3,v31
  2319. vsrab $tmp,$tweak,$seven # next tweak value
  2320. vxor $twk3,$tweak,$rndkey0
  2321. vcipher $out2,$out2,v28
  2322. vcipher $out3,$out3,v28
  2323. vaddubm $tweak,$tweak,$tweak
  2324. vsldoi $tmp,$tmp,$tmp,15
  2325. vcipher $out4,$out4,v28
  2326. vcipher $out5,$out5,v28
  2327. lvx v24,$x00,$key_ # re-pre-load round[1]
  2328. vand $tmp,$tmp,$eighty7
  2329. vcipher $out0,$out0,v29
  2330. vcipher $out1,$out1,v29
  2331. vxor $tweak,$tweak,$tmp
  2332. vcipher $out2,$out2,v29
  2333. vcipher $out3,$out3,v29
  2334. vxor $in4,$twk4,v31
  2335. vsrab $tmp,$tweak,$seven # next tweak value
  2336. vxor $twk4,$tweak,$rndkey0
  2337. vcipher $out4,$out4,v29
  2338. vcipher $out5,$out5,v29
  2339. lvx v25,$x10,$key_ # re-pre-load round[2]
  2340. vaddubm $tweak,$tweak,$tweak
  2341. vsldoi $tmp,$tmp,$tmp,15
  2342. vcipher $out0,$out0,v30
  2343. vcipher $out1,$out1,v30
  2344. vand $tmp,$tmp,$eighty7
  2345. vcipher $out2,$out2,v30
  2346. vcipher $out3,$out3,v30
  2347. vxor $tweak,$tweak,$tmp
  2348. vcipher $out4,$out4,v30
  2349. vcipher $out5,$out5,v30
  2350. vxor $in5,$twk5,v31
  2351. vsrab $tmp,$tweak,$seven # next tweak value
  2352. vxor $twk5,$tweak,$rndkey0
  2353. vcipherlast $out0,$out0,$in0
  2354. lvx_u $in0,$x00,$inp # load next input block
  2355. vaddubm $tweak,$tweak,$tweak
  2356. vsldoi $tmp,$tmp,$tmp,15
  2357. vcipherlast $out1,$out1,$in1
  2358. lvx_u $in1,$x10,$inp
  2359. vcipherlast $out2,$out2,$in2
  2360. le?vperm $in0,$in0,$in0,$leperm
  2361. lvx_u $in2,$x20,$inp
  2362. vand $tmp,$tmp,$eighty7
  2363. vcipherlast $out3,$out3,$in3
  2364. le?vperm $in1,$in1,$in1,$leperm
  2365. lvx_u $in3,$x30,$inp
  2366. vcipherlast $out4,$out4,$in4
  2367. le?vperm $in2,$in2,$in2,$leperm
  2368. lvx_u $in4,$x40,$inp
  2369. vxor $tweak,$tweak,$tmp
  2370. vcipherlast $tmp,$out5,$in5 # last block might be needed
  2371. # in stealing mode
  2372. le?vperm $in3,$in3,$in3,$leperm
  2373. lvx_u $in5,$x50,$inp
  2374. addi $inp,$inp,0x60
  2375. le?vperm $in4,$in4,$in4,$leperm
  2376. le?vperm $in5,$in5,$in5,$leperm
  2377. le?vperm $out0,$out0,$out0,$leperm
  2378. le?vperm $out1,$out1,$out1,$leperm
  2379. stvx_u $out0,$x00,$out # store output
  2380. vxor $out0,$in0,$twk0
  2381. le?vperm $out2,$out2,$out2,$leperm
  2382. stvx_u $out1,$x10,$out
  2383. vxor $out1,$in1,$twk1
  2384. le?vperm $out3,$out3,$out3,$leperm
  2385. stvx_u $out2,$x20,$out
  2386. vxor $out2,$in2,$twk2
  2387. le?vperm $out4,$out4,$out4,$leperm
  2388. stvx_u $out3,$x30,$out
  2389. vxor $out3,$in3,$twk3
  2390. le?vperm $out5,$tmp,$tmp,$leperm
  2391. stvx_u $out4,$x40,$out
  2392. vxor $out4,$in4,$twk4
  2393. le?stvx_u $out5,$x50,$out
  2394. be?stvx_u $tmp, $x50,$out
  2395. vxor $out5,$in5,$twk5
  2396. addi $out,$out,0x60
  2397. mtctr $rounds
  2398. beq Loop_xts_enc6x # did $len-=96 borrow?
  2399. addic. $len,$len,0x60
  2400. beq Lxts_enc6x_zero
  2401. cmpwi $len,0x20
  2402. blt Lxts_enc6x_one
  2403. nop
  2404. beq Lxts_enc6x_two
  2405. cmpwi $len,0x40
  2406. blt Lxts_enc6x_three
  2407. nop
  2408. beq Lxts_enc6x_four
  2409. Lxts_enc6x_five:
  2410. vxor $out0,$in1,$twk0
  2411. vxor $out1,$in2,$twk1
  2412. vxor $out2,$in3,$twk2
  2413. vxor $out3,$in4,$twk3
  2414. vxor $out4,$in5,$twk4
  2415. bl _aesp8_xts_enc5x
  2416. le?vperm $out0,$out0,$out0,$leperm
  2417. vmr $twk0,$twk5 # unused tweak
  2418. le?vperm $out1,$out1,$out1,$leperm
  2419. stvx_u $out0,$x00,$out # store output
  2420. le?vperm $out2,$out2,$out2,$leperm
  2421. stvx_u $out1,$x10,$out
  2422. le?vperm $out3,$out3,$out3,$leperm
  2423. stvx_u $out2,$x20,$out
  2424. vxor $tmp,$out4,$twk5 # last block prep for stealing
  2425. le?vperm $out4,$out4,$out4,$leperm
  2426. stvx_u $out3,$x30,$out
  2427. stvx_u $out4,$x40,$out
  2428. addi $out,$out,0x50
  2429. bne Lxts_enc6x_steal
  2430. b Lxts_enc6x_done
  2431. .align 4
  2432. Lxts_enc6x_four:
  2433. vxor $out0,$in2,$twk0
  2434. vxor $out1,$in3,$twk1
  2435. vxor $out2,$in4,$twk2
  2436. vxor $out3,$in5,$twk3
  2437. vxor $out4,$out4,$out4
  2438. bl _aesp8_xts_enc5x
  2439. le?vperm $out0,$out0,$out0,$leperm
  2440. vmr $twk0,$twk4 # unused tweak
  2441. le?vperm $out1,$out1,$out1,$leperm
  2442. stvx_u $out0,$x00,$out # store output
  2443. le?vperm $out2,$out2,$out2,$leperm
  2444. stvx_u $out1,$x10,$out
  2445. vxor $tmp,$out3,$twk4 # last block prep for stealing
  2446. le?vperm $out3,$out3,$out3,$leperm
  2447. stvx_u $out2,$x20,$out
  2448. stvx_u $out3,$x30,$out
  2449. addi $out,$out,0x40
  2450. bne Lxts_enc6x_steal
  2451. b Lxts_enc6x_done
  2452. .align 4
  2453. Lxts_enc6x_three:
  2454. vxor $out0,$in3,$twk0
  2455. vxor $out1,$in4,$twk1
  2456. vxor $out2,$in5,$twk2
  2457. vxor $out3,$out3,$out3
  2458. vxor $out4,$out4,$out4
  2459. bl _aesp8_xts_enc5x
  2460. le?vperm $out0,$out0,$out0,$leperm
  2461. vmr $twk0,$twk3 # unused tweak
  2462. le?vperm $out1,$out1,$out1,$leperm
  2463. stvx_u $out0,$x00,$out # store output
  2464. vxor $tmp,$out2,$twk3 # last block prep for stealing
  2465. le?vperm $out2,$out2,$out2,$leperm
  2466. stvx_u $out1,$x10,$out
  2467. stvx_u $out2,$x20,$out
  2468. addi $out,$out,0x30
  2469. bne Lxts_enc6x_steal
  2470. b Lxts_enc6x_done
  2471. .align 4
  2472. Lxts_enc6x_two:
  2473. vxor $out0,$in4,$twk0
  2474. vxor $out1,$in5,$twk1
  2475. vxor $out2,$out2,$out2
  2476. vxor $out3,$out3,$out3
  2477. vxor $out4,$out4,$out4
  2478. bl _aesp8_xts_enc5x
  2479. le?vperm $out0,$out0,$out0,$leperm
  2480. vmr $twk0,$twk2 # unused tweak
  2481. vxor $tmp,$out1,$twk2 # last block prep for stealing
  2482. le?vperm $out1,$out1,$out1,$leperm
  2483. stvx_u $out0,$x00,$out # store output
  2484. stvx_u $out1,$x10,$out
  2485. addi $out,$out,0x20
  2486. bne Lxts_enc6x_steal
  2487. b Lxts_enc6x_done
  2488. .align 4
  2489. Lxts_enc6x_one:
  2490. vxor $out0,$in5,$twk0
  2491. nop
  2492. Loop_xts_enc1x:
  2493. vcipher $out0,$out0,v24
  2494. lvx v24,$x20,$key_ # round[3]
  2495. addi $key_,$key_,0x20
  2496. vcipher $out0,$out0,v25
  2497. lvx v25,$x10,$key_ # round[4]
  2498. bdnz Loop_xts_enc1x
  2499. add $inp,$inp,$taillen
  2500. cmpwi $taillen,0
  2501. vcipher $out0,$out0,v24
  2502. subi $inp,$inp,16
  2503. vcipher $out0,$out0,v25
  2504. lvsr $inpperm,0,$taillen
  2505. vcipher $out0,$out0,v26
  2506. lvx_u $in0,0,$inp
  2507. vcipher $out0,$out0,v27
  2508. addi $key_,$sp,$FRAME+15 # rewind $key_
  2509. vcipher $out0,$out0,v28
  2510. lvx v24,$x00,$key_ # re-pre-load round[1]
  2511. vcipher $out0,$out0,v29
  2512. lvx v25,$x10,$key_ # re-pre-load round[2]
  2513. vxor $twk0,$twk0,v31
  2514. le?vperm $in0,$in0,$in0,$leperm
  2515. vcipher $out0,$out0,v30
  2516. vperm $in0,$in0,$in0,$inpperm
  2517. vcipherlast $out0,$out0,$twk0
  2518. vmr $twk0,$twk1 # unused tweak
  2519. vxor $tmp,$out0,$twk1 # last block prep for stealing
  2520. le?vperm $out0,$out0,$out0,$leperm
  2521. stvx_u $out0,$x00,$out # store output
  2522. addi $out,$out,0x10
  2523. bne Lxts_enc6x_steal
  2524. b Lxts_enc6x_done
  2525. .align 4
  2526. Lxts_enc6x_zero:
  2527. cmpwi $taillen,0
  2528. beq Lxts_enc6x_done
  2529. add $inp,$inp,$taillen
  2530. subi $inp,$inp,16
  2531. lvx_u $in0,0,$inp
  2532. lvsr $inpperm,0,$taillen # $in5 is no more
  2533. le?vperm $in0,$in0,$in0,$leperm
  2534. vperm $in0,$in0,$in0,$inpperm
  2535. vxor $tmp,$tmp,$twk0
  2536. Lxts_enc6x_steal:
  2537. vxor $in0,$in0,$twk0
  2538. vxor $out0,$out0,$out0
  2539. vspltisb $out1,-1
  2540. vperm $out0,$out0,$out1,$inpperm
  2541. vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
  2542. subi r3,$out,17
  2543. subi $out,$out,16
  2544. mtctr $taillen
  2545. Loop_xts_enc6x_steal:
  2546. lbzu r0,1(r3)
  2547. stb r0,16(r3)
  2548. bdnz Loop_xts_enc6x_steal
  2549. li $taillen,0
  2550. mtctr $rounds
  2551. b Loop_xts_enc1x # one more time...
  2552. .align 4
  2553. Lxts_enc6x_done:
  2554. mtlr r7
  2555. li r10,`$FRAME+15`
  2556. li r11,`$FRAME+31`
  2557. stvx $seven,r10,$sp # wipe copies of round keys
  2558. addi r10,r10,32
  2559. stvx $seven,r11,$sp
  2560. addi r11,r11,32
  2561. stvx $seven,r10,$sp
  2562. addi r10,r10,32
  2563. stvx $seven,r11,$sp
  2564. addi r11,r11,32
  2565. stvx $seven,r10,$sp
  2566. addi r10,r10,32
  2567. stvx $seven,r11,$sp
  2568. addi r11,r11,32
  2569. stvx $seven,r10,$sp
  2570. addi r10,r10,32
  2571. stvx $seven,r11,$sp
  2572. addi r11,r11,32
  2573. mtspr 256,$vrsave
  2574. lvx v20,r10,$sp # ABI says so
  2575. addi r10,r10,32
  2576. lvx v21,r11,$sp
  2577. addi r11,r11,32
  2578. lvx v22,r10,$sp
  2579. addi r10,r10,32
  2580. lvx v23,r11,$sp
  2581. addi r11,r11,32
  2582. lvx v24,r10,$sp
  2583. addi r10,r10,32
  2584. lvx v25,r11,$sp
  2585. addi r11,r11,32
  2586. lvx v26,r10,$sp
  2587. addi r10,r10,32
  2588. lvx v27,r11,$sp
  2589. addi r11,r11,32
  2590. lvx v28,r10,$sp
  2591. addi r10,r10,32
  2592. lvx v29,r11,$sp
  2593. addi r11,r11,32
  2594. lvx v30,r10,$sp
  2595. lvx v31,r11,$sp
  2596. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2597. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2598. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2599. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2600. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2601. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2602. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  2603. blr
  2604. .long 0
  2605. .byte 0,12,0x04,1,0x80,6,6,0
  2606. .long 0
  2607. .align 5
  2608. _aesp8_xts_enc5x:
  2609. vcipher $out0,$out0,v24
  2610. vcipher $out1,$out1,v24
  2611. vcipher $out2,$out2,v24
  2612. vcipher $out3,$out3,v24
  2613. vcipher $out4,$out4,v24
  2614. lvx v24,$x20,$key_ # round[3]
  2615. addi $key_,$key_,0x20
  2616. vcipher $out0,$out0,v25
  2617. vcipher $out1,$out1,v25
  2618. vcipher $out2,$out2,v25
  2619. vcipher $out3,$out3,v25
  2620. vcipher $out4,$out4,v25
  2621. lvx v25,$x10,$key_ # round[4]
  2622. bdnz _aesp8_xts_enc5x
  2623. add $inp,$inp,$taillen
  2624. cmpwi $taillen,0
  2625. vcipher $out0,$out0,v24
  2626. vcipher $out1,$out1,v24
  2627. vcipher $out2,$out2,v24
  2628. vcipher $out3,$out3,v24
  2629. vcipher $out4,$out4,v24
  2630. subi $inp,$inp,16
  2631. vcipher $out0,$out0,v25
  2632. vcipher $out1,$out1,v25
  2633. vcipher $out2,$out2,v25
  2634. vcipher $out3,$out3,v25
  2635. vcipher $out4,$out4,v25
  2636. vxor $twk0,$twk0,v31
  2637. vcipher $out0,$out0,v26
  2638. lvsr $inpperm,r0,$taillen # $in5 is no more
  2639. vcipher $out1,$out1,v26
  2640. vcipher $out2,$out2,v26
  2641. vcipher $out3,$out3,v26
  2642. vcipher $out4,$out4,v26
  2643. vxor $in1,$twk1,v31
  2644. vcipher $out0,$out0,v27
  2645. lvx_u $in0,0,$inp
  2646. vcipher $out1,$out1,v27
  2647. vcipher $out2,$out2,v27
  2648. vcipher $out3,$out3,v27
  2649. vcipher $out4,$out4,v27
  2650. vxor $in2,$twk2,v31
  2651. addi $key_,$sp,$FRAME+15 # rewind $key_
  2652. vcipher $out0,$out0,v28
  2653. vcipher $out1,$out1,v28
  2654. vcipher $out2,$out2,v28
  2655. vcipher $out3,$out3,v28
  2656. vcipher $out4,$out4,v28
  2657. lvx v24,$x00,$key_ # re-pre-load round[1]
  2658. vxor $in3,$twk3,v31
  2659. vcipher $out0,$out0,v29
  2660. le?vperm $in0,$in0,$in0,$leperm
  2661. vcipher $out1,$out1,v29
  2662. vcipher $out2,$out2,v29
  2663. vcipher $out3,$out3,v29
  2664. vcipher $out4,$out4,v29
  2665. lvx v25,$x10,$key_ # re-pre-load round[2]
  2666. vxor $in4,$twk4,v31
  2667. vcipher $out0,$out0,v30
  2668. vperm $in0,$in0,$in0,$inpperm
  2669. vcipher $out1,$out1,v30
  2670. vcipher $out2,$out2,v30
  2671. vcipher $out3,$out3,v30
  2672. vcipher $out4,$out4,v30
  2673. vcipherlast $out0,$out0,$twk0
  2674. vcipherlast $out1,$out1,$in1
  2675. vcipherlast $out2,$out2,$in2
  2676. vcipherlast $out3,$out3,$in3
  2677. vcipherlast $out4,$out4,$in4
  2678. blr
  2679. .long 0
  2680. .byte 0,12,0x14,0,0,0,0,0
  2681. .align 5
  2682. _aesp8_xts_decrypt6x:
  2683. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2684. mflr r0
  2685. li r7,`$FRAME+8*16+15`
  2686. li r8,`$FRAME+8*16+31`
  2687. $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2688. stvx v20,r7,$sp # ABI says so
  2689. addi r7,r7,32
  2690. stvx v21,r8,$sp
  2691. addi r8,r8,32
  2692. stvx v22,r7,$sp
  2693. addi r7,r7,32
  2694. stvx v23,r8,$sp
  2695. addi r8,r8,32
  2696. stvx v24,r7,$sp
  2697. addi r7,r7,32
  2698. stvx v25,r8,$sp
  2699. addi r8,r8,32
  2700. stvx v26,r7,$sp
  2701. addi r7,r7,32
  2702. stvx v27,r8,$sp
  2703. addi r8,r8,32
  2704. stvx v28,r7,$sp
  2705. addi r7,r7,32
  2706. stvx v29,r8,$sp
  2707. addi r8,r8,32
  2708. stvx v30,r7,$sp
  2709. stvx v31,r8,$sp
  2710. mr r7,r0
  2711. li r0,-1
  2712. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2713. li $x10,0x10
  2714. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2715. li $x20,0x20
  2716. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2717. li $x30,0x30
  2718. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2719. li $x40,0x40
  2720. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2721. li $x50,0x50
  2722. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2723. li $x60,0x60
  2724. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2725. li $x70,0x70
  2726. mtspr 256,r0
  2727. subi $rounds,$rounds,3 # -4 in total
  2728. lvx $rndkey0,$x00,$key1 # load key schedule
  2729. lvx v30,$x10,$key1
  2730. addi $key1,$key1,0x20
  2731. lvx v31,$x00,$key1
  2732. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2733. addi $key_,$sp,$FRAME+15
  2734. mtctr $rounds
  2735. Load_xts_dec_key:
  2736. ?vperm v24,v30,v31,$keyperm
  2737. lvx v30,$x10,$key1
  2738. addi $key1,$key1,0x20
  2739. stvx v24,$x00,$key_ # off-load round[1]
  2740. ?vperm v25,v31,v30,$keyperm
  2741. lvx v31,$x00,$key1
  2742. stvx v25,$x10,$key_ # off-load round[2]
  2743. addi $key_,$key_,0x20
  2744. bdnz Load_xts_dec_key
  2745. lvx v26,$x10,$key1
  2746. ?vperm v24,v30,v31,$keyperm
  2747. lvx v27,$x20,$key1
  2748. stvx v24,$x00,$key_ # off-load round[3]
  2749. ?vperm v25,v31,v26,$keyperm
  2750. lvx v28,$x30,$key1
  2751. stvx v25,$x10,$key_ # off-load round[4]
  2752. addi $key_,$sp,$FRAME+15 # rewind $key_
  2753. ?vperm v26,v26,v27,$keyperm
  2754. lvx v29,$x40,$key1
  2755. ?vperm v27,v27,v28,$keyperm
  2756. lvx v30,$x50,$key1
  2757. ?vperm v28,v28,v29,$keyperm
  2758. lvx v31,$x60,$key1
  2759. ?vperm v29,v29,v30,$keyperm
  2760. lvx $twk5,$x70,$key1 # borrow $twk5
  2761. ?vperm v30,v30,v31,$keyperm
  2762. lvx v24,$x00,$key_ # pre-load round[1]
  2763. ?vperm v31,v31,$twk5,$keyperm
  2764. lvx v25,$x10,$key_ # pre-load round[2]
  2765. vperm $in0,$inout,$inptail,$inpperm
  2766. subi $inp,$inp,31 # undo "caller"
  2767. vxor $twk0,$tweak,$rndkey0
  2768. vsrab $tmp,$tweak,$seven # next tweak value
  2769. vaddubm $tweak,$tweak,$tweak
  2770. vsldoi $tmp,$tmp,$tmp,15
  2771. vand $tmp,$tmp,$eighty7
  2772. vxor $out0,$in0,$twk0
  2773. vxor $tweak,$tweak,$tmp
  2774. lvx_u $in1,$x10,$inp
  2775. vxor $twk1,$tweak,$rndkey0
  2776. vsrab $tmp,$tweak,$seven # next tweak value
  2777. vaddubm $tweak,$tweak,$tweak
  2778. vsldoi $tmp,$tmp,$tmp,15
  2779. le?vperm $in1,$in1,$in1,$leperm
  2780. vand $tmp,$tmp,$eighty7
  2781. vxor $out1,$in1,$twk1
  2782. vxor $tweak,$tweak,$tmp
  2783. lvx_u $in2,$x20,$inp
  2784. andi. $taillen,$len,15
  2785. vxor $twk2,$tweak,$rndkey0
  2786. vsrab $tmp,$tweak,$seven # next tweak value
  2787. vaddubm $tweak,$tweak,$tweak
  2788. vsldoi $tmp,$tmp,$tmp,15
  2789. le?vperm $in2,$in2,$in2,$leperm
  2790. vand $tmp,$tmp,$eighty7
  2791. vxor $out2,$in2,$twk2
  2792. vxor $tweak,$tweak,$tmp
  2793. lvx_u $in3,$x30,$inp
  2794. sub $len,$len,$taillen
  2795. vxor $twk3,$tweak,$rndkey0
  2796. vsrab $tmp,$tweak,$seven # next tweak value
  2797. vaddubm $tweak,$tweak,$tweak
  2798. vsldoi $tmp,$tmp,$tmp,15
  2799. le?vperm $in3,$in3,$in3,$leperm
  2800. vand $tmp,$tmp,$eighty7
  2801. vxor $out3,$in3,$twk3
  2802. vxor $tweak,$tweak,$tmp
  2803. lvx_u $in4,$x40,$inp
  2804. subi $len,$len,0x60
  2805. vxor $twk4,$tweak,$rndkey0
  2806. vsrab $tmp,$tweak,$seven # next tweak value
  2807. vaddubm $tweak,$tweak,$tweak
  2808. vsldoi $tmp,$tmp,$tmp,15
  2809. le?vperm $in4,$in4,$in4,$leperm
  2810. vand $tmp,$tmp,$eighty7
  2811. vxor $out4,$in4,$twk4
  2812. vxor $tweak,$tweak,$tmp
  2813. lvx_u $in5,$x50,$inp
  2814. addi $inp,$inp,0x60
  2815. vxor $twk5,$tweak,$rndkey0
  2816. vsrab $tmp,$tweak,$seven # next tweak value
  2817. vaddubm $tweak,$tweak,$tweak
  2818. vsldoi $tmp,$tmp,$tmp,15
  2819. le?vperm $in5,$in5,$in5,$leperm
  2820. vand $tmp,$tmp,$eighty7
  2821. vxor $out5,$in5,$twk5
  2822. vxor $tweak,$tweak,$tmp
  2823. vxor v31,v31,$rndkey0
  2824. mtctr $rounds
  2825. b Loop_xts_dec6x
  2826. .align 5
  2827. Loop_xts_dec6x:
  2828. vncipher $out0,$out0,v24
  2829. vncipher $out1,$out1,v24
  2830. vncipher $out2,$out2,v24
  2831. vncipher $out3,$out3,v24
  2832. vncipher $out4,$out4,v24
  2833. vncipher $out5,$out5,v24
  2834. lvx v24,$x20,$key_ # round[3]
  2835. addi $key_,$key_,0x20
  2836. vncipher $out0,$out0,v25
  2837. vncipher $out1,$out1,v25
  2838. vncipher $out2,$out2,v25
  2839. vncipher $out3,$out3,v25
  2840. vncipher $out4,$out4,v25
  2841. vncipher $out5,$out5,v25
  2842. lvx v25,$x10,$key_ # round[4]
  2843. bdnz Loop_xts_dec6x
  2844. subic $len,$len,96 # $len-=96
  2845. vxor $in0,$twk0,v31 # xor with last round key
  2846. vncipher $out0,$out0,v24
  2847. vncipher $out1,$out1,v24
  2848. vsrab $tmp,$tweak,$seven # next tweak value
  2849. vxor $twk0,$tweak,$rndkey0
  2850. vaddubm $tweak,$tweak,$tweak
  2851. vncipher $out2,$out2,v24
  2852. vncipher $out3,$out3,v24
  2853. vsldoi $tmp,$tmp,$tmp,15
  2854. vncipher $out4,$out4,v24
  2855. vncipher $out5,$out5,v24
  2856. subfe. r0,r0,r0 # borrow?-1:0
  2857. vand $tmp,$tmp,$eighty7
  2858. vncipher $out0,$out0,v25
  2859. vncipher $out1,$out1,v25
  2860. vxor $tweak,$tweak,$tmp
  2861. vncipher $out2,$out2,v25
  2862. vncipher $out3,$out3,v25
  2863. vxor $in1,$twk1,v31
  2864. vsrab $tmp,$tweak,$seven # next tweak value
  2865. vxor $twk1,$tweak,$rndkey0
  2866. vncipher $out4,$out4,v25
  2867. vncipher $out5,$out5,v25
  2868. and r0,r0,$len
  2869. vaddubm $tweak,$tweak,$tweak
  2870. vsldoi $tmp,$tmp,$tmp,15
  2871. vncipher $out0,$out0,v26
  2872. vncipher $out1,$out1,v26
  2873. vand $tmp,$tmp,$eighty7
  2874. vncipher $out2,$out2,v26
  2875. vncipher $out3,$out3,v26
  2876. vxor $tweak,$tweak,$tmp
  2877. vncipher $out4,$out4,v26
  2878. vncipher $out5,$out5,v26
  2879. add $inp,$inp,r0 # $inp is adjusted in such
  2880. # way that at exit from the
  2881. # loop inX-in5 are loaded
  2882. # with last "words"
  2883. vxor $in2,$twk2,v31
  2884. vsrab $tmp,$tweak,$seven # next tweak value
  2885. vxor $twk2,$tweak,$rndkey0
  2886. vaddubm $tweak,$tweak,$tweak
  2887. vncipher $out0,$out0,v27
  2888. vncipher $out1,$out1,v27
  2889. vsldoi $tmp,$tmp,$tmp,15
  2890. vncipher $out2,$out2,v27
  2891. vncipher $out3,$out3,v27
  2892. vand $tmp,$tmp,$eighty7
  2893. vncipher $out4,$out4,v27
  2894. vncipher $out5,$out5,v27
  2895. addi $key_,$sp,$FRAME+15 # rewind $key_
  2896. vxor $tweak,$tweak,$tmp
  2897. vncipher $out0,$out0,v28
  2898. vncipher $out1,$out1,v28
  2899. vxor $in3,$twk3,v31
  2900. vsrab $tmp,$tweak,$seven # next tweak value
  2901. vxor $twk3,$tweak,$rndkey0
  2902. vncipher $out2,$out2,v28
  2903. vncipher $out3,$out3,v28
  2904. vaddubm $tweak,$tweak,$tweak
  2905. vsldoi $tmp,$tmp,$tmp,15
  2906. vncipher $out4,$out4,v28
  2907. vncipher $out5,$out5,v28
  2908. lvx v24,$x00,$key_ # re-pre-load round[1]
  2909. vand $tmp,$tmp,$eighty7
  2910. vncipher $out0,$out0,v29
  2911. vncipher $out1,$out1,v29
  2912. vxor $tweak,$tweak,$tmp
  2913. vncipher $out2,$out2,v29
  2914. vncipher $out3,$out3,v29
  2915. vxor $in4,$twk4,v31
  2916. vsrab $tmp,$tweak,$seven # next tweak value
  2917. vxor $twk4,$tweak,$rndkey0
  2918. vncipher $out4,$out4,v29
  2919. vncipher $out5,$out5,v29
  2920. lvx v25,$x10,$key_ # re-pre-load round[2]
  2921. vaddubm $tweak,$tweak,$tweak
  2922. vsldoi $tmp,$tmp,$tmp,15
  2923. vncipher $out0,$out0,v30
  2924. vncipher $out1,$out1,v30
  2925. vand $tmp,$tmp,$eighty7
  2926. vncipher $out2,$out2,v30
  2927. vncipher $out3,$out3,v30
  2928. vxor $tweak,$tweak,$tmp
  2929. vncipher $out4,$out4,v30
  2930. vncipher $out5,$out5,v30
  2931. vxor $in5,$twk5,v31
  2932. vsrab $tmp,$tweak,$seven # next tweak value
  2933. vxor $twk5,$tweak,$rndkey0
  2934. vncipherlast $out0,$out0,$in0
  2935. lvx_u $in0,$x00,$inp # load next input block
  2936. vaddubm $tweak,$tweak,$tweak
  2937. vsldoi $tmp,$tmp,$tmp,15
  2938. vncipherlast $out1,$out1,$in1
  2939. lvx_u $in1,$x10,$inp
  2940. vncipherlast $out2,$out2,$in2
  2941. le?vperm $in0,$in0,$in0,$leperm
  2942. lvx_u $in2,$x20,$inp
  2943. vand $tmp,$tmp,$eighty7
  2944. vncipherlast $out3,$out3,$in3
  2945. le?vperm $in1,$in1,$in1,$leperm
  2946. lvx_u $in3,$x30,$inp
  2947. vncipherlast $out4,$out4,$in4
  2948. le?vperm $in2,$in2,$in2,$leperm
  2949. lvx_u $in4,$x40,$inp
  2950. vxor $tweak,$tweak,$tmp
  2951. vncipherlast $out5,$out5,$in5
  2952. le?vperm $in3,$in3,$in3,$leperm
  2953. lvx_u $in5,$x50,$inp
  2954. addi $inp,$inp,0x60
  2955. le?vperm $in4,$in4,$in4,$leperm
  2956. le?vperm $in5,$in5,$in5,$leperm
  2957. le?vperm $out0,$out0,$out0,$leperm
  2958. le?vperm $out1,$out1,$out1,$leperm
  2959. stvx_u $out0,$x00,$out # store output
  2960. vxor $out0,$in0,$twk0
  2961. le?vperm $out2,$out2,$out2,$leperm
  2962. stvx_u $out1,$x10,$out
  2963. vxor $out1,$in1,$twk1
  2964. le?vperm $out3,$out3,$out3,$leperm
  2965. stvx_u $out2,$x20,$out
  2966. vxor $out2,$in2,$twk2
  2967. le?vperm $out4,$out4,$out4,$leperm
  2968. stvx_u $out3,$x30,$out
  2969. vxor $out3,$in3,$twk3
  2970. le?vperm $out5,$out5,$out5,$leperm
  2971. stvx_u $out4,$x40,$out
  2972. vxor $out4,$in4,$twk4
  2973. stvx_u $out5,$x50,$out
  2974. vxor $out5,$in5,$twk5
  2975. addi $out,$out,0x60
  2976. mtctr $rounds
  2977. beq Loop_xts_dec6x # did $len-=96 borrow?
  2978. addic. $len,$len,0x60
  2979. beq Lxts_dec6x_zero
  2980. cmpwi $len,0x20
  2981. blt Lxts_dec6x_one
  2982. nop
  2983. beq Lxts_dec6x_two
  2984. cmpwi $len,0x40
  2985. blt Lxts_dec6x_three
  2986. nop
  2987. beq Lxts_dec6x_four
  2988. Lxts_dec6x_five:
  2989. vxor $out0,$in1,$twk0
  2990. vxor $out1,$in2,$twk1
  2991. vxor $out2,$in3,$twk2
  2992. vxor $out3,$in4,$twk3
  2993. vxor $out4,$in5,$twk4
  2994. bl _aesp8_xts_dec5x
  2995. le?vperm $out0,$out0,$out0,$leperm
  2996. vmr $twk0,$twk5 # unused tweak
  2997. vxor $twk1,$tweak,$rndkey0
  2998. le?vperm $out1,$out1,$out1,$leperm
  2999. stvx_u $out0,$x00,$out # store output
  3000. vxor $out0,$in0,$twk1
  3001. le?vperm $out2,$out2,$out2,$leperm
  3002. stvx_u $out1,$x10,$out
  3003. le?vperm $out3,$out3,$out3,$leperm
  3004. stvx_u $out2,$x20,$out
  3005. le?vperm $out4,$out4,$out4,$leperm
  3006. stvx_u $out3,$x30,$out
  3007. stvx_u $out4,$x40,$out
  3008. addi $out,$out,0x50
  3009. bne Lxts_dec6x_steal
  3010. b Lxts_dec6x_done
  3011. .align 4
  3012. Lxts_dec6x_four:
  3013. vxor $out0,$in2,$twk0
  3014. vxor $out1,$in3,$twk1
  3015. vxor $out2,$in4,$twk2
  3016. vxor $out3,$in5,$twk3
  3017. vxor $out4,$out4,$out4
  3018. bl _aesp8_xts_dec5x
  3019. le?vperm $out0,$out0,$out0,$leperm
  3020. vmr $twk0,$twk4 # unused tweak
  3021. vmr $twk1,$twk5
  3022. le?vperm $out1,$out1,$out1,$leperm
  3023. stvx_u $out0,$x00,$out # store output
  3024. vxor $out0,$in0,$twk5
  3025. le?vperm $out2,$out2,$out2,$leperm
  3026. stvx_u $out1,$x10,$out
  3027. le?vperm $out3,$out3,$out3,$leperm
  3028. stvx_u $out2,$x20,$out
  3029. stvx_u $out3,$x30,$out
  3030. addi $out,$out,0x40
  3031. bne Lxts_dec6x_steal
  3032. b Lxts_dec6x_done
  3033. .align 4
  3034. Lxts_dec6x_three:
  3035. vxor $out0,$in3,$twk0
  3036. vxor $out1,$in4,$twk1
  3037. vxor $out2,$in5,$twk2
  3038. vxor $out3,$out3,$out3
  3039. vxor $out4,$out4,$out4
  3040. bl _aesp8_xts_dec5x
  3041. le?vperm $out0,$out0,$out0,$leperm
  3042. vmr $twk0,$twk3 # unused tweak
  3043. vmr $twk1,$twk4
  3044. le?vperm $out1,$out1,$out1,$leperm
  3045. stvx_u $out0,$x00,$out # store output
  3046. vxor $out0,$in0,$twk4
  3047. le?vperm $out2,$out2,$out2,$leperm
  3048. stvx_u $out1,$x10,$out
  3049. stvx_u $out2,$x20,$out
  3050. addi $out,$out,0x30
  3051. bne Lxts_dec6x_steal
  3052. b Lxts_dec6x_done
  3053. .align 4
  3054. Lxts_dec6x_two:
  3055. vxor $out0,$in4,$twk0
  3056. vxor $out1,$in5,$twk1
  3057. vxor $out2,$out2,$out2
  3058. vxor $out3,$out3,$out3
  3059. vxor $out4,$out4,$out4
  3060. bl _aesp8_xts_dec5x
  3061. le?vperm $out0,$out0,$out0,$leperm
  3062. vmr $twk0,$twk2 # unused tweak
  3063. vmr $twk1,$twk3
  3064. le?vperm $out1,$out1,$out1,$leperm
  3065. stvx_u $out0,$x00,$out # store output
  3066. vxor $out0,$in0,$twk3
  3067. stvx_u $out1,$x10,$out
  3068. addi $out,$out,0x20
  3069. bne Lxts_dec6x_steal
  3070. b Lxts_dec6x_done
  3071. .align 4
  3072. Lxts_dec6x_one:
  3073. vxor $out0,$in5,$twk0
  3074. nop
  3075. Loop_xts_dec1x:
  3076. vncipher $out0,$out0,v24
  3077. lvx v24,$x20,$key_ # round[3]
  3078. addi $key_,$key_,0x20
  3079. vncipher $out0,$out0,v25
  3080. lvx v25,$x10,$key_ # round[4]
  3081. bdnz Loop_xts_dec1x
  3082. subi r0,$taillen,1
  3083. vncipher $out0,$out0,v24
  3084. andi. r0,r0,16
  3085. cmpwi $taillen,0
  3086. vncipher $out0,$out0,v25
  3087. sub $inp,$inp,r0
  3088. vncipher $out0,$out0,v26
  3089. lvx_u $in0,0,$inp
  3090. vncipher $out0,$out0,v27
  3091. addi $key_,$sp,$FRAME+15 # rewind $key_
  3092. vncipher $out0,$out0,v28
  3093. lvx v24,$x00,$key_ # re-pre-load round[1]
  3094. vncipher $out0,$out0,v29
  3095. lvx v25,$x10,$key_ # re-pre-load round[2]
  3096. vxor $twk0,$twk0,v31
  3097. le?vperm $in0,$in0,$in0,$leperm
  3098. vncipher $out0,$out0,v30
  3099. mtctr $rounds
  3100. vncipherlast $out0,$out0,$twk0
  3101. vmr $twk0,$twk1 # unused tweak
  3102. vmr $twk1,$twk2
  3103. le?vperm $out0,$out0,$out0,$leperm
  3104. stvx_u $out0,$x00,$out # store output
  3105. addi $out,$out,0x10
  3106. vxor $out0,$in0,$twk2
  3107. bne Lxts_dec6x_steal
  3108. b Lxts_dec6x_done
  3109. .align 4
  3110. Lxts_dec6x_zero:
  3111. cmpwi $taillen,0
  3112. beq Lxts_dec6x_done
  3113. lvx_u $in0,0,$inp
  3114. le?vperm $in0,$in0,$in0,$leperm
  3115. vxor $out0,$in0,$twk1
  3116. Lxts_dec6x_steal:
  3117. vncipher $out0,$out0,v24
  3118. lvx v24,$x20,$key_ # round[3]
  3119. addi $key_,$key_,0x20
  3120. vncipher $out0,$out0,v25
  3121. lvx v25,$x10,$key_ # round[4]
  3122. bdnz Lxts_dec6x_steal
  3123. add $inp,$inp,$taillen
  3124. vncipher $out0,$out0,v24
  3125. cmpwi $taillen,0
  3126. vncipher $out0,$out0,v25
  3127. lvx_u $in0,0,$inp
  3128. vncipher $out0,$out0,v26
  3129. lvsr $inpperm,0,$taillen # $in5 is no more
  3130. vncipher $out0,$out0,v27
  3131. addi $key_,$sp,$FRAME+15 # rewind $key_
  3132. vncipher $out0,$out0,v28
  3133. lvx v24,$x00,$key_ # re-pre-load round[1]
  3134. vncipher $out0,$out0,v29
  3135. lvx v25,$x10,$key_ # re-pre-load round[2]
  3136. vxor $twk1,$twk1,v31
  3137. le?vperm $in0,$in0,$in0,$leperm
  3138. vncipher $out0,$out0,v30
  3139. vperm $in0,$in0,$in0,$inpperm
  3140. vncipherlast $tmp,$out0,$twk1
  3141. le?vperm $out0,$tmp,$tmp,$leperm
  3142. le?stvx_u $out0,0,$out
  3143. be?stvx_u $tmp,0,$out
  3144. vxor $out0,$out0,$out0
  3145. vspltisb $out1,-1
  3146. vperm $out0,$out0,$out1,$inpperm
  3147. vsel $out0,$in0,$tmp,$out0
  3148. vxor $out0,$out0,$twk0
  3149. subi r3,$out,1
  3150. mtctr $taillen
  3151. Loop_xts_dec6x_steal:
  3152. lbzu r0,1(r3)
  3153. stb r0,16(r3)
  3154. bdnz Loop_xts_dec6x_steal
  3155. li $taillen,0
  3156. mtctr $rounds
  3157. b Loop_xts_dec1x # one more time...
  3158. .align 4
  3159. Lxts_dec6x_done:
  3160. mtlr r7
  3161. li r10,`$FRAME+15`
  3162. li r11,`$FRAME+31`
  3163. stvx $seven,r10,$sp # wipe copies of round keys
  3164. addi r10,r10,32
  3165. stvx $seven,r11,$sp
  3166. addi r11,r11,32
  3167. stvx $seven,r10,$sp
  3168. addi r10,r10,32
  3169. stvx $seven,r11,$sp
  3170. addi r11,r11,32
  3171. stvx $seven,r10,$sp
  3172. addi r10,r10,32
  3173. stvx $seven,r11,$sp
  3174. addi r11,r11,32
  3175. stvx $seven,r10,$sp
  3176. addi r10,r10,32
  3177. stvx $seven,r11,$sp
  3178. addi r11,r11,32
  3179. mtspr 256,$vrsave
  3180. lvx v20,r10,$sp # ABI says so
  3181. addi r10,r10,32
  3182. lvx v21,r11,$sp
  3183. addi r11,r11,32
  3184. lvx v22,r10,$sp
  3185. addi r10,r10,32
  3186. lvx v23,r11,$sp
  3187. addi r11,r11,32
  3188. lvx v24,r10,$sp
  3189. addi r10,r10,32
  3190. lvx v25,r11,$sp
  3191. addi r11,r11,32
  3192. lvx v26,r10,$sp
  3193. addi r10,r10,32
  3194. lvx v27,r11,$sp
  3195. addi r11,r11,32
  3196. lvx v28,r10,$sp
  3197. addi r10,r10,32
  3198. lvx v29,r11,$sp
  3199. addi r11,r11,32
  3200. lvx v30,r10,$sp
  3201. lvx v31,r11,$sp
  3202. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  3203. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  3204. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  3205. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  3206. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  3207. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  3208. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  3209. blr
  3210. .long 0
  3211. .byte 0,12,0x04,1,0x80,6,6,0
  3212. .long 0
  3213. .align 5
  3214. _aesp8_xts_dec5x:
  3215. vncipher $out0,$out0,v24
  3216. vncipher $out1,$out1,v24
  3217. vncipher $out2,$out2,v24
  3218. vncipher $out3,$out3,v24
  3219. vncipher $out4,$out4,v24
  3220. lvx v24,$x20,$key_ # round[3]
  3221. addi $key_,$key_,0x20
  3222. vncipher $out0,$out0,v25
  3223. vncipher $out1,$out1,v25
  3224. vncipher $out2,$out2,v25
  3225. vncipher $out3,$out3,v25
  3226. vncipher $out4,$out4,v25
  3227. lvx v25,$x10,$key_ # round[4]
  3228. bdnz _aesp8_xts_dec5x
  3229. subi r0,$taillen,1
  3230. vncipher $out0,$out0,v24
  3231. vncipher $out1,$out1,v24
  3232. vncipher $out2,$out2,v24
  3233. vncipher $out3,$out3,v24
  3234. vncipher $out4,$out4,v24
  3235. andi. r0,r0,16
  3236. cmpwi $taillen,0
  3237. vncipher $out0,$out0,v25
  3238. vncipher $out1,$out1,v25
  3239. vncipher $out2,$out2,v25
  3240. vncipher $out3,$out3,v25
  3241. vncipher $out4,$out4,v25
  3242. vxor $twk0,$twk0,v31
  3243. sub $inp,$inp,r0
  3244. vncipher $out0,$out0,v26
  3245. vncipher $out1,$out1,v26
  3246. vncipher $out2,$out2,v26
  3247. vncipher $out3,$out3,v26
  3248. vncipher $out4,$out4,v26
  3249. vxor $in1,$twk1,v31
  3250. vncipher $out0,$out0,v27
  3251. lvx_u $in0,0,$inp
  3252. vncipher $out1,$out1,v27
  3253. vncipher $out2,$out2,v27
  3254. vncipher $out3,$out3,v27
  3255. vncipher $out4,$out4,v27
  3256. vxor $in2,$twk2,v31
  3257. addi $key_,$sp,$FRAME+15 # rewind $key_
  3258. vncipher $out0,$out0,v28
  3259. vncipher $out1,$out1,v28
  3260. vncipher $out2,$out2,v28
  3261. vncipher $out3,$out3,v28
  3262. vncipher $out4,$out4,v28
  3263. lvx v24,$x00,$key_ # re-pre-load round[1]
  3264. vxor $in3,$twk3,v31
  3265. vncipher $out0,$out0,v29
  3266. le?vperm $in0,$in0,$in0,$leperm
  3267. vncipher $out1,$out1,v29
  3268. vncipher $out2,$out2,v29
  3269. vncipher $out3,$out3,v29
  3270. vncipher $out4,$out4,v29
  3271. lvx v25,$x10,$key_ # re-pre-load round[2]
  3272. vxor $in4,$twk4,v31
  3273. vncipher $out0,$out0,v30
  3274. vncipher $out1,$out1,v30
  3275. vncipher $out2,$out2,v30
  3276. vncipher $out3,$out3,v30
  3277. vncipher $out4,$out4,v30
  3278. vncipherlast $out0,$out0,$twk0
  3279. vncipherlast $out1,$out1,$in1
  3280. vncipherlast $out2,$out2,$in2
  3281. vncipherlast $out3,$out3,$in3
  3282. vncipherlast $out4,$out4,$in4
  3283. mtctr $rounds
  3284. blr
  3285. .long 0
  3286. .byte 0,12,0x14,0,0,0,0,0
  3287. ___
  3288. }} }}}
  3289. my $consts=1;
  3290. foreach(split("\n",$code)) {
  3291. s/\`([^\`]*)\`/eval($1)/geo;
  3292. # constants table endian-specific conversion
  3293. if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
  3294. my $conv=$3;
  3295. my @bytes=();
  3296. # convert to endian-agnostic format
  3297. if ($1 eq "long") {
  3298. foreach (split(/,\s*/,$2)) {
  3299. my $l = /^0/?oct:int;
  3300. push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
  3301. }
  3302. } else {
  3303. @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
  3304. }
  3305. # little-endian conversion
  3306. if ($flavour =~ /le$/o) {
  3307. SWITCH: for($conv) {
  3308. /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
  3309. /\?rev/ && do { @bytes=reverse(@bytes); last; };
  3310. }
  3311. }
  3312. #emit
  3313. print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
  3314. next;
  3315. }
  3316. $consts=0 if (m/Lconsts:/o); # end of table
  3317. # instructions prefixed with '?' are endian-specific and need
  3318. # to be adjusted accordingly...
  3319. if ($flavour =~ /le$/o) { # little-endian
  3320. s/le\?//o or
  3321. s/be\?/#be#/o or
  3322. s/\?lvsr/lvsl/o or
  3323. s/\?lvsl/lvsr/o or
  3324. s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
  3325. s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
  3326. s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
  3327. } else { # big-endian
  3328. s/le\?/#le#/o or
  3329. s/be\?//o or
  3330. s/\?([a-z]+)/$1/o;
  3331. }
  3332. print $_,"\n";
  3333. }
  3334. close STDOUT;