aesp8-ppc.pl 93 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856
  1. #! /usr/bin/env perl
  2. # Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  11. # project. The module is, however, dual licensed under OpenSSL and
  12. # CRYPTOGAMS licenses depending on where you obtain it. For further
  13. # details see http://www.openssl.org/~appro/cryptogams/.
  14. # ====================================================================
  15. #
  16. # This module implements support for AES instructions as per PowerISA
  17. # specification version 2.07, first implemented by POWER8 processor.
  18. # The module is endian-agnostic in sense that it supports both big-
  19. # and little-endian cases. Data alignment in parallelizable modes is
  20. # handled with VSX loads and stores, which implies MSR.VSX flag being
  21. # set. It should also be noted that ISA specification doesn't prohibit
  22. # alignment exceptions for these instructions on page boundaries.
  23. # Initially alignment was handled in pure AltiVec/VMX way [when data
  24. # is aligned programmatically, which in turn guarantees exception-
  25. # free execution], but it turned to hamper performance when vcipher
  26. # instructions are interleaved. It's reckoned that eventual
  27. # misalignment penalties at page boundaries are in average lower
  28. # than additional overhead in pure AltiVec approach.
  29. #
  30. # May 2016
  31. #
  32. # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
  33. # systems were measured.
  34. #
  35. ######################################################################
  36. # Current large-block performance in cycles per byte processed with
  37. # 128-bit key (less is better).
  38. #
  39. # CBC en-/decrypt CTR XTS
  40. # POWER8[le] 3.96/0.72 0.74 1.1
  41. # POWER8[be] 3.75/0.65 0.66 1.0
  42. # POWER9[le] 4.02/0.86 0.84 1.05
  43. # POWER9[be] 3.99/0.78 0.79 0.97
  44. # $output is the last argument if it looks like a file (it has an extension)
  45. # $flavour is the first argument if it doesn't look like a file
  46. $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  47. $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  48. if ($flavour =~ /64/) {
  49. $SIZE_T =8;
  50. $LRSAVE =2*$SIZE_T;
  51. $STU ="stdu";
  52. $POP ="ld";
  53. $PUSH ="std";
  54. $UCMP ="cmpld";
  55. $SHL ="sldi";
  56. } elsif ($flavour =~ /32/) {
  57. $SIZE_T =4;
  58. $LRSAVE =$SIZE_T;
  59. $STU ="stwu";
  60. $POP ="lwz";
  61. $PUSH ="stw";
  62. $UCMP ="cmplw";
  63. $SHL ="slwi";
  64. } else { die "nonsense $flavour"; }
  65. $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
  66. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  67. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  68. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  69. die "can't locate ppc-xlate.pl";
  70. open STDOUT,"| $^X $xlate $flavour \"$output\""
  71. or die "can't call $xlate: $!";
  72. $FRAME=8*$SIZE_T;
  73. $prefix="aes_p8";
  74. $sp="r1";
  75. $vrsave="r12";
  76. #########################################################################
  77. {{{ # Key setup procedures #
  78. my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
  79. my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
  80. my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
  81. $code.=<<___;
  82. .machine "any"
  83. .text
  84. .align 7
  85. rcon:
  86. .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
  87. .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
  88. .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
  89. .long 0,0,0,0 ?asis
  90. .long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
  91. Lconsts:
  92. mflr r0
  93. bcl 20,31,\$+4
  94. mflr $ptr #vvvvv "distance between . and rcon
  95. addi $ptr,$ptr,-0x58
  96. mtlr r0
  97. blr
  98. .long 0
  99. .byte 0,12,0x14,0,0,0,0,0
  100. .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
  101. .globl .${prefix}_set_encrypt_key
  102. .align 5
  103. .${prefix}_set_encrypt_key:
  104. Lset_encrypt_key:
  105. mflr r11
  106. $PUSH r11,$LRSAVE($sp)
  107. li $ptr,-1
  108. ${UCMP}i $inp,0
  109. beq- Lenc_key_abort # if ($inp==0) return -1;
  110. ${UCMP}i $out,0
  111. beq- Lenc_key_abort # if ($out==0) return -1;
  112. li $ptr,-2
  113. cmpwi $bits,128
  114. blt- Lenc_key_abort
  115. cmpwi $bits,256
  116. bgt- Lenc_key_abort
  117. andi. r0,$bits,0x3f
  118. bne- Lenc_key_abort
  119. lis r0,0xfff0
  120. mfspr $vrsave,256
  121. mtspr 256,r0
  122. bl Lconsts
  123. mtlr r11
  124. neg r9,$inp
  125. lvx $in0,0,$inp
  126. addi $inp,$inp,15 # 15 is not typo
  127. lvsr $key,0,r9 # borrow $key
  128. li r8,0x20
  129. cmpwi $bits,192
  130. lvx $in1,0,$inp
  131. le?vspltisb $mask,0x0f # borrow $mask
  132. lvx $rcon,0,$ptr
  133. le?vxor $key,$key,$mask # adjust for byte swap
  134. lvx $mask,r8,$ptr
  135. addi $ptr,$ptr,0x10
  136. vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
  137. li $cnt,8
  138. vxor $zero,$zero,$zero
  139. mtctr $cnt
  140. ?lvsr $outperm,0,$out
  141. vspltisb $outmask,-1
  142. lvx $outhead,0,$out
  143. ?vperm $outmask,$zero,$outmask,$outperm
  144. blt Loop128
  145. addi $inp,$inp,8
  146. beq L192
  147. addi $inp,$inp,8
  148. b L256
  149. .align 4
  150. Loop128:
  151. vperm $key,$in0,$in0,$mask # rotate-n-splat
  152. vsldoi $tmp,$zero,$in0,12 # >>32
  153. vperm $outtail,$in0,$in0,$outperm # rotate
  154. vsel $stage,$outhead,$outtail,$outmask
  155. vmr $outhead,$outtail
  156. vcipherlast $key,$key,$rcon
  157. stvx $stage,0,$out
  158. addi $out,$out,16
  159. vxor $in0,$in0,$tmp
  160. vsldoi $tmp,$zero,$tmp,12 # >>32
  161. vxor $in0,$in0,$tmp
  162. vsldoi $tmp,$zero,$tmp,12 # >>32
  163. vxor $in0,$in0,$tmp
  164. vadduwm $rcon,$rcon,$rcon
  165. vxor $in0,$in0,$key
  166. bdnz Loop128
  167. lvx $rcon,0,$ptr # last two round keys
  168. vperm $key,$in0,$in0,$mask # rotate-n-splat
  169. vsldoi $tmp,$zero,$in0,12 # >>32
  170. vperm $outtail,$in0,$in0,$outperm # rotate
  171. vsel $stage,$outhead,$outtail,$outmask
  172. vmr $outhead,$outtail
  173. vcipherlast $key,$key,$rcon
  174. stvx $stage,0,$out
  175. addi $out,$out,16
  176. vxor $in0,$in0,$tmp
  177. vsldoi $tmp,$zero,$tmp,12 # >>32
  178. vxor $in0,$in0,$tmp
  179. vsldoi $tmp,$zero,$tmp,12 # >>32
  180. vxor $in0,$in0,$tmp
  181. vadduwm $rcon,$rcon,$rcon
  182. vxor $in0,$in0,$key
  183. vperm $key,$in0,$in0,$mask # rotate-n-splat
  184. vsldoi $tmp,$zero,$in0,12 # >>32
  185. vperm $outtail,$in0,$in0,$outperm # rotate
  186. vsel $stage,$outhead,$outtail,$outmask
  187. vmr $outhead,$outtail
  188. vcipherlast $key,$key,$rcon
  189. stvx $stage,0,$out
  190. addi $out,$out,16
  191. vxor $in0,$in0,$tmp
  192. vsldoi $tmp,$zero,$tmp,12 # >>32
  193. vxor $in0,$in0,$tmp
  194. vsldoi $tmp,$zero,$tmp,12 # >>32
  195. vxor $in0,$in0,$tmp
  196. vxor $in0,$in0,$key
  197. vperm $outtail,$in0,$in0,$outperm # rotate
  198. vsel $stage,$outhead,$outtail,$outmask
  199. vmr $outhead,$outtail
  200. stvx $stage,0,$out
  201. addi $inp,$out,15 # 15 is not typo
  202. addi $out,$out,0x50
  203. li $rounds,10
  204. b Ldone
  205. .align 4
  206. L192:
  207. lvx $tmp,0,$inp
  208. li $cnt,4
  209. vperm $outtail,$in0,$in0,$outperm # rotate
  210. vsel $stage,$outhead,$outtail,$outmask
  211. vmr $outhead,$outtail
  212. stvx $stage,0,$out
  213. addi $out,$out,16
  214. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  215. vspltisb $key,8 # borrow $key
  216. mtctr $cnt
  217. vsububm $mask,$mask,$key # adjust the mask
  218. Loop192:
  219. vperm $key,$in1,$in1,$mask # roate-n-splat
  220. vsldoi $tmp,$zero,$in0,12 # >>32
  221. vcipherlast $key,$key,$rcon
  222. vxor $in0,$in0,$tmp
  223. vsldoi $tmp,$zero,$tmp,12 # >>32
  224. vxor $in0,$in0,$tmp
  225. vsldoi $tmp,$zero,$tmp,12 # >>32
  226. vxor $in0,$in0,$tmp
  227. vsldoi $stage,$zero,$in1,8
  228. vspltw $tmp,$in0,3
  229. vxor $tmp,$tmp,$in1
  230. vsldoi $in1,$zero,$in1,12 # >>32
  231. vadduwm $rcon,$rcon,$rcon
  232. vxor $in1,$in1,$tmp
  233. vxor $in0,$in0,$key
  234. vxor $in1,$in1,$key
  235. vsldoi $stage,$stage,$in0,8
  236. vperm $key,$in1,$in1,$mask # rotate-n-splat
  237. vsldoi $tmp,$zero,$in0,12 # >>32
  238. vperm $outtail,$stage,$stage,$outperm # rotate
  239. vsel $stage,$outhead,$outtail,$outmask
  240. vmr $outhead,$outtail
  241. vcipherlast $key,$key,$rcon
  242. stvx $stage,0,$out
  243. addi $out,$out,16
  244. vsldoi $stage,$in0,$in1,8
  245. vxor $in0,$in0,$tmp
  246. vsldoi $tmp,$zero,$tmp,12 # >>32
  247. vperm $outtail,$stage,$stage,$outperm # rotate
  248. vsel $stage,$outhead,$outtail,$outmask
  249. vmr $outhead,$outtail
  250. vxor $in0,$in0,$tmp
  251. vsldoi $tmp,$zero,$tmp,12 # >>32
  252. vxor $in0,$in0,$tmp
  253. stvx $stage,0,$out
  254. addi $out,$out,16
  255. vspltw $tmp,$in0,3
  256. vxor $tmp,$tmp,$in1
  257. vsldoi $in1,$zero,$in1,12 # >>32
  258. vadduwm $rcon,$rcon,$rcon
  259. vxor $in1,$in1,$tmp
  260. vxor $in0,$in0,$key
  261. vxor $in1,$in1,$key
  262. vperm $outtail,$in0,$in0,$outperm # rotate
  263. vsel $stage,$outhead,$outtail,$outmask
  264. vmr $outhead,$outtail
  265. stvx $stage,0,$out
  266. addi $inp,$out,15 # 15 is not typo
  267. addi $out,$out,16
  268. bdnz Loop192
  269. li $rounds,12
  270. addi $out,$out,0x20
  271. b Ldone
  272. .align 4
  273. L256:
  274. lvx $tmp,0,$inp
  275. li $cnt,7
  276. li $rounds,14
  277. vperm $outtail,$in0,$in0,$outperm # rotate
  278. vsel $stage,$outhead,$outtail,$outmask
  279. vmr $outhead,$outtail
  280. stvx $stage,0,$out
  281. addi $out,$out,16
  282. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  283. mtctr $cnt
  284. Loop256:
  285. vperm $key,$in1,$in1,$mask # rotate-n-splat
  286. vsldoi $tmp,$zero,$in0,12 # >>32
  287. vperm $outtail,$in1,$in1,$outperm # rotate
  288. vsel $stage,$outhead,$outtail,$outmask
  289. vmr $outhead,$outtail
  290. vcipherlast $key,$key,$rcon
  291. stvx $stage,0,$out
  292. addi $out,$out,16
  293. vxor $in0,$in0,$tmp
  294. vsldoi $tmp,$zero,$tmp,12 # >>32
  295. vxor $in0,$in0,$tmp
  296. vsldoi $tmp,$zero,$tmp,12 # >>32
  297. vxor $in0,$in0,$tmp
  298. vadduwm $rcon,$rcon,$rcon
  299. vxor $in0,$in0,$key
  300. vperm $outtail,$in0,$in0,$outperm # rotate
  301. vsel $stage,$outhead,$outtail,$outmask
  302. vmr $outhead,$outtail
  303. stvx $stage,0,$out
  304. addi $inp,$out,15 # 15 is not typo
  305. addi $out,$out,16
  306. bdz Ldone
  307. vspltw $key,$in0,3 # just splat
  308. vsldoi $tmp,$zero,$in1,12 # >>32
  309. vsbox $key,$key
  310. vxor $in1,$in1,$tmp
  311. vsldoi $tmp,$zero,$tmp,12 # >>32
  312. vxor $in1,$in1,$tmp
  313. vsldoi $tmp,$zero,$tmp,12 # >>32
  314. vxor $in1,$in1,$tmp
  315. vxor $in1,$in1,$key
  316. b Loop256
  317. .align 4
  318. Ldone:
  319. lvx $in1,0,$inp # redundant in aligned case
  320. vsel $in1,$outhead,$in1,$outmask
  321. stvx $in1,0,$inp
  322. li $ptr,0
  323. mtspr 256,$vrsave
  324. stw $rounds,0($out)
  325. Lenc_key_abort:
  326. mr r3,$ptr
  327. blr
  328. .long 0
  329. .byte 0,12,0x14,1,0,0,3,0
  330. .long 0
  331. .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
  332. .globl .${prefix}_set_decrypt_key
  333. .align 5
  334. .${prefix}_set_decrypt_key:
  335. $STU $sp,-$FRAME($sp)
  336. mflr r10
  337. $PUSH r10,$FRAME+$LRSAVE($sp)
  338. bl Lset_encrypt_key
  339. mtlr r10
  340. cmpwi r3,0
  341. bne- Ldec_key_abort
  342. slwi $cnt,$rounds,4
  343. subi $inp,$out,240 # first round key
  344. srwi $rounds,$rounds,1
  345. add $out,$inp,$cnt # last round key
  346. mtctr $rounds
  347. Ldeckey:
  348. lwz r0, 0($inp)
  349. lwz r6, 4($inp)
  350. lwz r7, 8($inp)
  351. lwz r8, 12($inp)
  352. addi $inp,$inp,16
  353. lwz r9, 0($out)
  354. lwz r10,4($out)
  355. lwz r11,8($out)
  356. lwz r12,12($out)
  357. stw r0, 0($out)
  358. stw r6, 4($out)
  359. stw r7, 8($out)
  360. stw r8, 12($out)
  361. subi $out,$out,16
  362. stw r9, -16($inp)
  363. stw r10,-12($inp)
  364. stw r11,-8($inp)
  365. stw r12,-4($inp)
  366. bdnz Ldeckey
  367. xor r3,r3,r3 # return value
  368. Ldec_key_abort:
  369. addi $sp,$sp,$FRAME
  370. blr
  371. .long 0
  372. .byte 0,12,4,1,0x80,0,3,0
  373. .long 0
  374. .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
  375. ___
  376. }}}
  377. #########################################################################
  378. {{{ # Single block en- and decrypt procedures #
  379. sub gen_block () {
  380. my $dir = shift;
  381. my $n = $dir eq "de" ? "n" : "";
  382. my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
  383. $code.=<<___;
  384. .globl .${prefix}_${dir}crypt
  385. .align 5
  386. .${prefix}_${dir}crypt:
  387. lwz $rounds,240($key)
  388. lis r0,0xfc00
  389. mfspr $vrsave,256
  390. li $idx,15 # 15 is not typo
  391. mtspr 256,r0
  392. lvx v0,0,$inp
  393. neg r11,$out
  394. lvx v1,$idx,$inp
  395. lvsl v2,0,$inp # inpperm
  396. le?vspltisb v4,0x0f
  397. ?lvsl v3,0,r11 # outperm
  398. le?vxor v2,v2,v4
  399. li $idx,16
  400. vperm v0,v0,v1,v2 # align [and byte swap in LE]
  401. lvx v1,0,$key
  402. ?lvsl v5,0,$key # keyperm
  403. srwi $rounds,$rounds,1
  404. lvx v2,$idx,$key
  405. addi $idx,$idx,16
  406. subi $rounds,$rounds,1
  407. ?vperm v1,v1,v2,v5 # align round key
  408. vxor v0,v0,v1
  409. lvx v1,$idx,$key
  410. addi $idx,$idx,16
  411. mtctr $rounds
  412. Loop_${dir}c:
  413. ?vperm v2,v2,v1,v5
  414. v${n}cipher v0,v0,v2
  415. lvx v2,$idx,$key
  416. addi $idx,$idx,16
  417. ?vperm v1,v1,v2,v5
  418. v${n}cipher v0,v0,v1
  419. lvx v1,$idx,$key
  420. addi $idx,$idx,16
  421. bdnz Loop_${dir}c
  422. ?vperm v2,v2,v1,v5
  423. v${n}cipher v0,v0,v2
  424. lvx v2,$idx,$key
  425. ?vperm v1,v1,v2,v5
  426. v${n}cipherlast v0,v0,v1
  427. vspltisb v2,-1
  428. vxor v1,v1,v1
  429. li $idx,15 # 15 is not typo
  430. ?vperm v2,v1,v2,v3 # outmask
  431. le?vxor v3,v3,v4
  432. lvx v1,0,$out # outhead
  433. vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
  434. vsel v1,v1,v0,v2
  435. lvx v4,$idx,$out
  436. stvx v1,0,$out
  437. vsel v0,v0,v4,v2
  438. stvx v0,$idx,$out
  439. mtspr 256,$vrsave
  440. blr
  441. .long 0
  442. .byte 0,12,0x14,0,0,0,3,0
  443. .long 0
  444. .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
  445. ___
  446. }
  447. &gen_block("en");
  448. &gen_block("de");
  449. }}}
  450. #########################################################################
  451. {{{ # CBC en- and decrypt procedures #
  452. my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
  453. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  454. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
  455. map("v$_",(4..10));
  456. $code.=<<___;
  457. .globl .${prefix}_cbc_encrypt
  458. .align 5
  459. .${prefix}_cbc_encrypt:
  460. ${UCMP}i $len,16
  461. bltlr-
  462. cmpwi $enc,0 # test direction
  463. lis r0,0xffe0
  464. mfspr $vrsave,256
  465. mtspr 256,r0
  466. li $idx,15
  467. vxor $rndkey0,$rndkey0,$rndkey0
  468. le?vspltisb $tmp,0x0f
  469. lvx $ivec,0,$ivp # load [unaligned] iv
  470. lvsl $inpperm,0,$ivp
  471. lvx $inptail,$idx,$ivp
  472. le?vxor $inpperm,$inpperm,$tmp
  473. vperm $ivec,$ivec,$inptail,$inpperm
  474. neg r11,$inp
  475. ?lvsl $keyperm,0,$key # prepare for unaligned key
  476. lwz $rounds,240($key)
  477. lvsr $inpperm,0,r11 # prepare for unaligned load
  478. lvx $inptail,0,$inp
  479. addi $inp,$inp,15 # 15 is not typo
  480. le?vxor $inpperm,$inpperm,$tmp
  481. ?lvsr $outperm,0,$out # prepare for unaligned store
  482. vspltisb $outmask,-1
  483. lvx $outhead,0,$out
  484. ?vperm $outmask,$rndkey0,$outmask,$outperm
  485. le?vxor $outperm,$outperm,$tmp
  486. srwi $rounds,$rounds,1
  487. li $idx,16
  488. subi $rounds,$rounds,1
  489. beq Lcbc_dec
  490. Lcbc_enc:
  491. vmr $inout,$inptail
  492. lvx $inptail,0,$inp
  493. addi $inp,$inp,16
  494. mtctr $rounds
  495. subi $len,$len,16 # len-=16
  496. lvx $rndkey0,0,$key
  497. vperm $inout,$inout,$inptail,$inpperm
  498. lvx $rndkey1,$idx,$key
  499. addi $idx,$idx,16
  500. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  501. vxor $inout,$inout,$rndkey0
  502. lvx $rndkey0,$idx,$key
  503. addi $idx,$idx,16
  504. vxor $inout,$inout,$ivec
  505. Loop_cbc_enc:
  506. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  507. vcipher $inout,$inout,$rndkey1
  508. lvx $rndkey1,$idx,$key
  509. addi $idx,$idx,16
  510. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  511. vcipher $inout,$inout,$rndkey0
  512. lvx $rndkey0,$idx,$key
  513. addi $idx,$idx,16
  514. bdnz Loop_cbc_enc
  515. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  516. vcipher $inout,$inout,$rndkey1
  517. lvx $rndkey1,$idx,$key
  518. li $idx,16
  519. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  520. vcipherlast $ivec,$inout,$rndkey0
  521. ${UCMP}i $len,16
  522. vperm $tmp,$ivec,$ivec,$outperm
  523. vsel $inout,$outhead,$tmp,$outmask
  524. vmr $outhead,$tmp
  525. stvx $inout,0,$out
  526. addi $out,$out,16
  527. bge Lcbc_enc
  528. b Lcbc_done
  529. .align 4
  530. Lcbc_dec:
  531. ${UCMP}i $len,128
  532. bge _aesp8_cbc_decrypt8x
  533. vmr $tmp,$inptail
  534. lvx $inptail,0,$inp
  535. addi $inp,$inp,16
  536. mtctr $rounds
  537. subi $len,$len,16 # len-=16
  538. lvx $rndkey0,0,$key
  539. vperm $tmp,$tmp,$inptail,$inpperm
  540. lvx $rndkey1,$idx,$key
  541. addi $idx,$idx,16
  542. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  543. vxor $inout,$tmp,$rndkey0
  544. lvx $rndkey0,$idx,$key
  545. addi $idx,$idx,16
  546. Loop_cbc_dec:
  547. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  548. vncipher $inout,$inout,$rndkey1
  549. lvx $rndkey1,$idx,$key
  550. addi $idx,$idx,16
  551. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  552. vncipher $inout,$inout,$rndkey0
  553. lvx $rndkey0,$idx,$key
  554. addi $idx,$idx,16
  555. bdnz Loop_cbc_dec
  556. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  557. vncipher $inout,$inout,$rndkey1
  558. lvx $rndkey1,$idx,$key
  559. li $idx,16
  560. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  561. vncipherlast $inout,$inout,$rndkey0
  562. ${UCMP}i $len,16
  563. vxor $inout,$inout,$ivec
  564. vmr $ivec,$tmp
  565. vperm $tmp,$inout,$inout,$outperm
  566. vsel $inout,$outhead,$tmp,$outmask
  567. vmr $outhead,$tmp
  568. stvx $inout,0,$out
  569. addi $out,$out,16
  570. bge Lcbc_dec
  571. Lcbc_done:
  572. addi $out,$out,-1
  573. lvx $inout,0,$out # redundant in aligned case
  574. vsel $inout,$outhead,$inout,$outmask
  575. stvx $inout,0,$out
  576. neg $enc,$ivp # write [unaligned] iv
  577. li $idx,15 # 15 is not typo
  578. vxor $rndkey0,$rndkey0,$rndkey0
  579. vspltisb $outmask,-1
  580. le?vspltisb $tmp,0x0f
  581. ?lvsl $outperm,0,$enc
  582. ?vperm $outmask,$rndkey0,$outmask,$outperm
  583. le?vxor $outperm,$outperm,$tmp
  584. lvx $outhead,0,$ivp
  585. vperm $ivec,$ivec,$ivec,$outperm
  586. vsel $inout,$outhead,$ivec,$outmask
  587. lvx $inptail,$idx,$ivp
  588. stvx $inout,0,$ivp
  589. vsel $inout,$ivec,$inptail,$outmask
  590. stvx $inout,$idx,$ivp
  591. mtspr 256,$vrsave
  592. blr
  593. .long 0
  594. .byte 0,12,0x14,0,0,0,6,0
  595. .long 0
  596. ___
  597. #########################################################################
  598. {{ # Optimized CBC decrypt procedure #
  599. my $key_="r11";
  600. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  601. $x00=0 if ($flavour =~ /osx/);
  602. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
  603. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
  604. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  605. # v26-v31 last 6 round keys
  606. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  607. $code.=<<___;
  608. .align 5
  609. _aesp8_cbc_decrypt8x:
  610. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  611. li r10,`$FRAME+8*16+15`
  612. li r11,`$FRAME+8*16+31`
  613. stvx v20,r10,$sp # ABI says so
  614. addi r10,r10,32
  615. stvx v21,r11,$sp
  616. addi r11,r11,32
  617. stvx v22,r10,$sp
  618. addi r10,r10,32
  619. stvx v23,r11,$sp
  620. addi r11,r11,32
  621. stvx v24,r10,$sp
  622. addi r10,r10,32
  623. stvx v25,r11,$sp
  624. addi r11,r11,32
  625. stvx v26,r10,$sp
  626. addi r10,r10,32
  627. stvx v27,r11,$sp
  628. addi r11,r11,32
  629. stvx v28,r10,$sp
  630. addi r10,r10,32
  631. stvx v29,r11,$sp
  632. addi r11,r11,32
  633. stvx v30,r10,$sp
  634. stvx v31,r11,$sp
  635. li r0,-1
  636. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  637. li $x10,0x10
  638. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  639. li $x20,0x20
  640. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  641. li $x30,0x30
  642. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  643. li $x40,0x40
  644. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  645. li $x50,0x50
  646. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  647. li $x60,0x60
  648. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  649. li $x70,0x70
  650. mtspr 256,r0
  651. subi $rounds,$rounds,3 # -4 in total
  652. subi $len,$len,128 # bias
  653. lvx $rndkey0,$x00,$key # load key schedule
  654. lvx v30,$x10,$key
  655. addi $key,$key,0x20
  656. lvx v31,$x00,$key
  657. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  658. addi $key_,$sp,$FRAME+15
  659. mtctr $rounds
  660. Load_cbc_dec_key:
  661. ?vperm v24,v30,v31,$keyperm
  662. lvx v30,$x10,$key
  663. addi $key,$key,0x20
  664. stvx v24,$x00,$key_ # off-load round[1]
  665. ?vperm v25,v31,v30,$keyperm
  666. lvx v31,$x00,$key
  667. stvx v25,$x10,$key_ # off-load round[2]
  668. addi $key_,$key_,0x20
  669. bdnz Load_cbc_dec_key
  670. lvx v26,$x10,$key
  671. ?vperm v24,v30,v31,$keyperm
  672. lvx v27,$x20,$key
  673. stvx v24,$x00,$key_ # off-load round[3]
  674. ?vperm v25,v31,v26,$keyperm
  675. lvx v28,$x30,$key
  676. stvx v25,$x10,$key_ # off-load round[4]
  677. addi $key_,$sp,$FRAME+15 # rewind $key_
  678. ?vperm v26,v26,v27,$keyperm
  679. lvx v29,$x40,$key
  680. ?vperm v27,v27,v28,$keyperm
  681. lvx v30,$x50,$key
  682. ?vperm v28,v28,v29,$keyperm
  683. lvx v31,$x60,$key
  684. ?vperm v29,v29,v30,$keyperm
  685. lvx $out0,$x70,$key # borrow $out0
  686. ?vperm v30,v30,v31,$keyperm
  687. lvx v24,$x00,$key_ # pre-load round[1]
  688. ?vperm v31,v31,$out0,$keyperm
  689. lvx v25,$x10,$key_ # pre-load round[2]
  690. #lvx $inptail,0,$inp # "caller" already did this
  691. #addi $inp,$inp,15 # 15 is not typo
  692. subi $inp,$inp,15 # undo "caller"
  693. le?li $idx,8
  694. lvx_u $in0,$x00,$inp # load first 8 "words"
  695. le?lvsl $inpperm,0,$idx
  696. le?vspltisb $tmp,0x0f
  697. lvx_u $in1,$x10,$inp
  698. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  699. lvx_u $in2,$x20,$inp
  700. le?vperm $in0,$in0,$in0,$inpperm
  701. lvx_u $in3,$x30,$inp
  702. le?vperm $in1,$in1,$in1,$inpperm
  703. lvx_u $in4,$x40,$inp
  704. le?vperm $in2,$in2,$in2,$inpperm
  705. vxor $out0,$in0,$rndkey0
  706. lvx_u $in5,$x50,$inp
  707. le?vperm $in3,$in3,$in3,$inpperm
  708. vxor $out1,$in1,$rndkey0
  709. lvx_u $in6,$x60,$inp
  710. le?vperm $in4,$in4,$in4,$inpperm
  711. vxor $out2,$in2,$rndkey0
  712. lvx_u $in7,$x70,$inp
  713. addi $inp,$inp,0x80
  714. le?vperm $in5,$in5,$in5,$inpperm
  715. vxor $out3,$in3,$rndkey0
  716. le?vperm $in6,$in6,$in6,$inpperm
  717. vxor $out4,$in4,$rndkey0
  718. le?vperm $in7,$in7,$in7,$inpperm
  719. vxor $out5,$in5,$rndkey0
  720. vxor $out6,$in6,$rndkey0
  721. vxor $out7,$in7,$rndkey0
  722. mtctr $rounds
  723. b Loop_cbc_dec8x
  724. .align 5
  725. Loop_cbc_dec8x:
  726. vncipher $out0,$out0,v24
  727. vncipher $out1,$out1,v24
  728. vncipher $out2,$out2,v24
  729. vncipher $out3,$out3,v24
  730. vncipher $out4,$out4,v24
  731. vncipher $out5,$out5,v24
  732. vncipher $out6,$out6,v24
  733. vncipher $out7,$out7,v24
  734. lvx v24,$x20,$key_ # round[3]
  735. addi $key_,$key_,0x20
  736. vncipher $out0,$out0,v25
  737. vncipher $out1,$out1,v25
  738. vncipher $out2,$out2,v25
  739. vncipher $out3,$out3,v25
  740. vncipher $out4,$out4,v25
  741. vncipher $out5,$out5,v25
  742. vncipher $out6,$out6,v25
  743. vncipher $out7,$out7,v25
  744. lvx v25,$x10,$key_ # round[4]
  745. bdnz Loop_cbc_dec8x
  746. subic $len,$len,128 # $len-=128
  747. vncipher $out0,$out0,v24
  748. vncipher $out1,$out1,v24
  749. vncipher $out2,$out2,v24
  750. vncipher $out3,$out3,v24
  751. vncipher $out4,$out4,v24
  752. vncipher $out5,$out5,v24
  753. vncipher $out6,$out6,v24
  754. vncipher $out7,$out7,v24
  755. subfe. r0,r0,r0 # borrow?-1:0
  756. vncipher $out0,$out0,v25
  757. vncipher $out1,$out1,v25
  758. vncipher $out2,$out2,v25
  759. vncipher $out3,$out3,v25
  760. vncipher $out4,$out4,v25
  761. vncipher $out5,$out5,v25
  762. vncipher $out6,$out6,v25
  763. vncipher $out7,$out7,v25
  764. and r0,r0,$len
  765. vncipher $out0,$out0,v26
  766. vncipher $out1,$out1,v26
  767. vncipher $out2,$out2,v26
  768. vncipher $out3,$out3,v26
  769. vncipher $out4,$out4,v26
  770. vncipher $out5,$out5,v26
  771. vncipher $out6,$out6,v26
  772. vncipher $out7,$out7,v26
  773. add $inp,$inp,r0 # $inp is adjusted in such
  774. # way that at exit from the
  775. # loop inX-in7 are loaded
  776. # with last "words"
  777. vncipher $out0,$out0,v27
  778. vncipher $out1,$out1,v27
  779. vncipher $out2,$out2,v27
  780. vncipher $out3,$out3,v27
  781. vncipher $out4,$out4,v27
  782. vncipher $out5,$out5,v27
  783. vncipher $out6,$out6,v27
  784. vncipher $out7,$out7,v27
  785. addi $key_,$sp,$FRAME+15 # rewind $key_
  786. vncipher $out0,$out0,v28
  787. vncipher $out1,$out1,v28
  788. vncipher $out2,$out2,v28
  789. vncipher $out3,$out3,v28
  790. vncipher $out4,$out4,v28
  791. vncipher $out5,$out5,v28
  792. vncipher $out6,$out6,v28
  793. vncipher $out7,$out7,v28
  794. lvx v24,$x00,$key_ # re-pre-load round[1]
  795. vncipher $out0,$out0,v29
  796. vncipher $out1,$out1,v29
  797. vncipher $out2,$out2,v29
  798. vncipher $out3,$out3,v29
  799. vncipher $out4,$out4,v29
  800. vncipher $out5,$out5,v29
  801. vncipher $out6,$out6,v29
  802. vncipher $out7,$out7,v29
  803. lvx v25,$x10,$key_ # re-pre-load round[2]
  804. vncipher $out0,$out0,v30
  805. vxor $ivec,$ivec,v31 # xor with last round key
  806. vncipher $out1,$out1,v30
  807. vxor $in0,$in0,v31
  808. vncipher $out2,$out2,v30
  809. vxor $in1,$in1,v31
  810. vncipher $out3,$out3,v30
  811. vxor $in2,$in2,v31
  812. vncipher $out4,$out4,v30
  813. vxor $in3,$in3,v31
  814. vncipher $out5,$out5,v30
  815. vxor $in4,$in4,v31
  816. vncipher $out6,$out6,v30
  817. vxor $in5,$in5,v31
  818. vncipher $out7,$out7,v30
  819. vxor $in6,$in6,v31
  820. vncipherlast $out0,$out0,$ivec
  821. vncipherlast $out1,$out1,$in0
  822. lvx_u $in0,$x00,$inp # load next input block
  823. vncipherlast $out2,$out2,$in1
  824. lvx_u $in1,$x10,$inp
  825. vncipherlast $out3,$out3,$in2
  826. le?vperm $in0,$in0,$in0,$inpperm
  827. lvx_u $in2,$x20,$inp
  828. vncipherlast $out4,$out4,$in3
  829. le?vperm $in1,$in1,$in1,$inpperm
  830. lvx_u $in3,$x30,$inp
  831. vncipherlast $out5,$out5,$in4
  832. le?vperm $in2,$in2,$in2,$inpperm
  833. lvx_u $in4,$x40,$inp
  834. vncipherlast $out6,$out6,$in5
  835. le?vperm $in3,$in3,$in3,$inpperm
  836. lvx_u $in5,$x50,$inp
  837. vncipherlast $out7,$out7,$in6
  838. le?vperm $in4,$in4,$in4,$inpperm
  839. lvx_u $in6,$x60,$inp
  840. vmr $ivec,$in7
  841. le?vperm $in5,$in5,$in5,$inpperm
  842. lvx_u $in7,$x70,$inp
  843. addi $inp,$inp,0x80
  844. le?vperm $out0,$out0,$out0,$inpperm
  845. le?vperm $out1,$out1,$out1,$inpperm
  846. stvx_u $out0,$x00,$out
  847. le?vperm $in6,$in6,$in6,$inpperm
  848. vxor $out0,$in0,$rndkey0
  849. le?vperm $out2,$out2,$out2,$inpperm
  850. stvx_u $out1,$x10,$out
  851. le?vperm $in7,$in7,$in7,$inpperm
  852. vxor $out1,$in1,$rndkey0
  853. le?vperm $out3,$out3,$out3,$inpperm
  854. stvx_u $out2,$x20,$out
  855. vxor $out2,$in2,$rndkey0
  856. le?vperm $out4,$out4,$out4,$inpperm
  857. stvx_u $out3,$x30,$out
  858. vxor $out3,$in3,$rndkey0
  859. le?vperm $out5,$out5,$out5,$inpperm
  860. stvx_u $out4,$x40,$out
  861. vxor $out4,$in4,$rndkey0
  862. le?vperm $out6,$out6,$out6,$inpperm
  863. stvx_u $out5,$x50,$out
  864. vxor $out5,$in5,$rndkey0
  865. le?vperm $out7,$out7,$out7,$inpperm
  866. stvx_u $out6,$x60,$out
  867. vxor $out6,$in6,$rndkey0
  868. stvx_u $out7,$x70,$out
  869. addi $out,$out,0x80
  870. vxor $out7,$in7,$rndkey0
  871. mtctr $rounds
  872. beq Loop_cbc_dec8x # did $len-=128 borrow?
  873. addic. $len,$len,128
  874. beq Lcbc_dec8x_done
  875. nop
  876. nop
  877. Loop_cbc_dec8x_tail: # up to 7 "words" tail...
  878. vncipher $out1,$out1,v24
  879. vncipher $out2,$out2,v24
  880. vncipher $out3,$out3,v24
  881. vncipher $out4,$out4,v24
  882. vncipher $out5,$out5,v24
  883. vncipher $out6,$out6,v24
  884. vncipher $out7,$out7,v24
  885. lvx v24,$x20,$key_ # round[3]
  886. addi $key_,$key_,0x20
  887. vncipher $out1,$out1,v25
  888. vncipher $out2,$out2,v25
  889. vncipher $out3,$out3,v25
  890. vncipher $out4,$out4,v25
  891. vncipher $out5,$out5,v25
  892. vncipher $out6,$out6,v25
  893. vncipher $out7,$out7,v25
  894. lvx v25,$x10,$key_ # round[4]
  895. bdnz Loop_cbc_dec8x_tail
  896. vncipher $out1,$out1,v24
  897. vncipher $out2,$out2,v24
  898. vncipher $out3,$out3,v24
  899. vncipher $out4,$out4,v24
  900. vncipher $out5,$out5,v24
  901. vncipher $out6,$out6,v24
  902. vncipher $out7,$out7,v24
  903. vncipher $out1,$out1,v25
  904. vncipher $out2,$out2,v25
  905. vncipher $out3,$out3,v25
  906. vncipher $out4,$out4,v25
  907. vncipher $out5,$out5,v25
  908. vncipher $out6,$out6,v25
  909. vncipher $out7,$out7,v25
  910. vncipher $out1,$out1,v26
  911. vncipher $out2,$out2,v26
  912. vncipher $out3,$out3,v26
  913. vncipher $out4,$out4,v26
  914. vncipher $out5,$out5,v26
  915. vncipher $out6,$out6,v26
  916. vncipher $out7,$out7,v26
  917. vncipher $out1,$out1,v27
  918. vncipher $out2,$out2,v27
  919. vncipher $out3,$out3,v27
  920. vncipher $out4,$out4,v27
  921. vncipher $out5,$out5,v27
  922. vncipher $out6,$out6,v27
  923. vncipher $out7,$out7,v27
  924. vncipher $out1,$out1,v28
  925. vncipher $out2,$out2,v28
  926. vncipher $out3,$out3,v28
  927. vncipher $out4,$out4,v28
  928. vncipher $out5,$out5,v28
  929. vncipher $out6,$out6,v28
  930. vncipher $out7,$out7,v28
  931. vncipher $out1,$out1,v29
  932. vncipher $out2,$out2,v29
  933. vncipher $out3,$out3,v29
  934. vncipher $out4,$out4,v29
  935. vncipher $out5,$out5,v29
  936. vncipher $out6,$out6,v29
  937. vncipher $out7,$out7,v29
  938. vncipher $out1,$out1,v30
  939. vxor $ivec,$ivec,v31 # last round key
  940. vncipher $out2,$out2,v30
  941. vxor $in1,$in1,v31
  942. vncipher $out3,$out3,v30
  943. vxor $in2,$in2,v31
  944. vncipher $out4,$out4,v30
  945. vxor $in3,$in3,v31
  946. vncipher $out5,$out5,v30
  947. vxor $in4,$in4,v31
  948. vncipher $out6,$out6,v30
  949. vxor $in5,$in5,v31
  950. vncipher $out7,$out7,v30
  951. vxor $in6,$in6,v31
  952. cmplwi $len,32 # switch($len)
  953. blt Lcbc_dec8x_one
  954. nop
  955. beq Lcbc_dec8x_two
  956. cmplwi $len,64
  957. blt Lcbc_dec8x_three
  958. nop
  959. beq Lcbc_dec8x_four
  960. cmplwi $len,96
  961. blt Lcbc_dec8x_five
  962. nop
  963. beq Lcbc_dec8x_six
  964. Lcbc_dec8x_seven:
  965. vncipherlast $out1,$out1,$ivec
  966. vncipherlast $out2,$out2,$in1
  967. vncipherlast $out3,$out3,$in2
  968. vncipherlast $out4,$out4,$in3
  969. vncipherlast $out5,$out5,$in4
  970. vncipherlast $out6,$out6,$in5
  971. vncipherlast $out7,$out7,$in6
  972. vmr $ivec,$in7
  973. le?vperm $out1,$out1,$out1,$inpperm
  974. le?vperm $out2,$out2,$out2,$inpperm
  975. stvx_u $out1,$x00,$out
  976. le?vperm $out3,$out3,$out3,$inpperm
  977. stvx_u $out2,$x10,$out
  978. le?vperm $out4,$out4,$out4,$inpperm
  979. stvx_u $out3,$x20,$out
  980. le?vperm $out5,$out5,$out5,$inpperm
  981. stvx_u $out4,$x30,$out
  982. le?vperm $out6,$out6,$out6,$inpperm
  983. stvx_u $out5,$x40,$out
  984. le?vperm $out7,$out7,$out7,$inpperm
  985. stvx_u $out6,$x50,$out
  986. stvx_u $out7,$x60,$out
  987. addi $out,$out,0x70
  988. b Lcbc_dec8x_done
  989. .align 5
  990. Lcbc_dec8x_six:
  991. vncipherlast $out2,$out2,$ivec
  992. vncipherlast $out3,$out3,$in2
  993. vncipherlast $out4,$out4,$in3
  994. vncipherlast $out5,$out5,$in4
  995. vncipherlast $out6,$out6,$in5
  996. vncipherlast $out7,$out7,$in6
  997. vmr $ivec,$in7
  998. le?vperm $out2,$out2,$out2,$inpperm
  999. le?vperm $out3,$out3,$out3,$inpperm
  1000. stvx_u $out2,$x00,$out
  1001. le?vperm $out4,$out4,$out4,$inpperm
  1002. stvx_u $out3,$x10,$out
  1003. le?vperm $out5,$out5,$out5,$inpperm
  1004. stvx_u $out4,$x20,$out
  1005. le?vperm $out6,$out6,$out6,$inpperm
  1006. stvx_u $out5,$x30,$out
  1007. le?vperm $out7,$out7,$out7,$inpperm
  1008. stvx_u $out6,$x40,$out
  1009. stvx_u $out7,$x50,$out
  1010. addi $out,$out,0x60
  1011. b Lcbc_dec8x_done
  1012. .align 5
  1013. Lcbc_dec8x_five:
  1014. vncipherlast $out3,$out3,$ivec
  1015. vncipherlast $out4,$out4,$in3
  1016. vncipherlast $out5,$out5,$in4
  1017. vncipherlast $out6,$out6,$in5
  1018. vncipherlast $out7,$out7,$in6
  1019. vmr $ivec,$in7
  1020. le?vperm $out3,$out3,$out3,$inpperm
  1021. le?vperm $out4,$out4,$out4,$inpperm
  1022. stvx_u $out3,$x00,$out
  1023. le?vperm $out5,$out5,$out5,$inpperm
  1024. stvx_u $out4,$x10,$out
  1025. le?vperm $out6,$out6,$out6,$inpperm
  1026. stvx_u $out5,$x20,$out
  1027. le?vperm $out7,$out7,$out7,$inpperm
  1028. stvx_u $out6,$x30,$out
  1029. stvx_u $out7,$x40,$out
  1030. addi $out,$out,0x50
  1031. b Lcbc_dec8x_done
  1032. .align 5
  1033. Lcbc_dec8x_four:
  1034. vncipherlast $out4,$out4,$ivec
  1035. vncipherlast $out5,$out5,$in4
  1036. vncipherlast $out6,$out6,$in5
  1037. vncipherlast $out7,$out7,$in6
  1038. vmr $ivec,$in7
  1039. le?vperm $out4,$out4,$out4,$inpperm
  1040. le?vperm $out5,$out5,$out5,$inpperm
  1041. stvx_u $out4,$x00,$out
  1042. le?vperm $out6,$out6,$out6,$inpperm
  1043. stvx_u $out5,$x10,$out
  1044. le?vperm $out7,$out7,$out7,$inpperm
  1045. stvx_u $out6,$x20,$out
  1046. stvx_u $out7,$x30,$out
  1047. addi $out,$out,0x40
  1048. b Lcbc_dec8x_done
  1049. .align 5
  1050. Lcbc_dec8x_three:
  1051. vncipherlast $out5,$out5,$ivec
  1052. vncipherlast $out6,$out6,$in5
  1053. vncipherlast $out7,$out7,$in6
  1054. vmr $ivec,$in7
  1055. le?vperm $out5,$out5,$out5,$inpperm
  1056. le?vperm $out6,$out6,$out6,$inpperm
  1057. stvx_u $out5,$x00,$out
  1058. le?vperm $out7,$out7,$out7,$inpperm
  1059. stvx_u $out6,$x10,$out
  1060. stvx_u $out7,$x20,$out
  1061. addi $out,$out,0x30
  1062. b Lcbc_dec8x_done
  1063. .align 5
  1064. Lcbc_dec8x_two:
  1065. vncipherlast $out6,$out6,$ivec
  1066. vncipherlast $out7,$out7,$in6
  1067. vmr $ivec,$in7
  1068. le?vperm $out6,$out6,$out6,$inpperm
  1069. le?vperm $out7,$out7,$out7,$inpperm
  1070. stvx_u $out6,$x00,$out
  1071. stvx_u $out7,$x10,$out
  1072. addi $out,$out,0x20
  1073. b Lcbc_dec8x_done
  1074. .align 5
  1075. Lcbc_dec8x_one:
  1076. vncipherlast $out7,$out7,$ivec
  1077. vmr $ivec,$in7
  1078. le?vperm $out7,$out7,$out7,$inpperm
  1079. stvx_u $out7,0,$out
  1080. addi $out,$out,0x10
  1081. Lcbc_dec8x_done:
  1082. le?vperm $ivec,$ivec,$ivec,$inpperm
  1083. stvx_u $ivec,0,$ivp # write [unaligned] iv
  1084. li r10,`$FRAME+15`
  1085. li r11,`$FRAME+31`
  1086. stvx $inpperm,r10,$sp # wipe copies of round keys
  1087. addi r10,r10,32
  1088. stvx $inpperm,r11,$sp
  1089. addi r11,r11,32
  1090. stvx $inpperm,r10,$sp
  1091. addi r10,r10,32
  1092. stvx $inpperm,r11,$sp
  1093. addi r11,r11,32
  1094. stvx $inpperm,r10,$sp
  1095. addi r10,r10,32
  1096. stvx $inpperm,r11,$sp
  1097. addi r11,r11,32
  1098. stvx $inpperm,r10,$sp
  1099. addi r10,r10,32
  1100. stvx $inpperm,r11,$sp
  1101. addi r11,r11,32
  1102. mtspr 256,$vrsave
  1103. lvx v20,r10,$sp # ABI says so
  1104. addi r10,r10,32
  1105. lvx v21,r11,$sp
  1106. addi r11,r11,32
  1107. lvx v22,r10,$sp
  1108. addi r10,r10,32
  1109. lvx v23,r11,$sp
  1110. addi r11,r11,32
  1111. lvx v24,r10,$sp
  1112. addi r10,r10,32
  1113. lvx v25,r11,$sp
  1114. addi r11,r11,32
  1115. lvx v26,r10,$sp
  1116. addi r10,r10,32
  1117. lvx v27,r11,$sp
  1118. addi r11,r11,32
  1119. lvx v28,r10,$sp
  1120. addi r10,r10,32
  1121. lvx v29,r11,$sp
  1122. addi r11,r11,32
  1123. lvx v30,r10,$sp
  1124. lvx v31,r11,$sp
  1125. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1126. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1127. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1128. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1129. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1130. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1131. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1132. blr
  1133. .long 0
  1134. .byte 0,12,0x04,0,0x80,6,6,0
  1135. .long 0
  1136. .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
  1137. ___
  1138. }} }}}
  1139. #########################################################################
  1140. {{{ # CTR procedure[s] #
  1141. my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
  1142. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  1143. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
  1144. map("v$_",(4..11));
  1145. my $dat=$tmp;
  1146. $code.=<<___;
  1147. .globl .${prefix}_ctr32_encrypt_blocks
  1148. .align 5
  1149. .${prefix}_ctr32_encrypt_blocks:
  1150. ${UCMP}i $len,1
  1151. bltlr-
  1152. lis r0,0xfff0
  1153. mfspr $vrsave,256
  1154. mtspr 256,r0
  1155. li $idx,15
  1156. vxor $rndkey0,$rndkey0,$rndkey0
  1157. le?vspltisb $tmp,0x0f
  1158. lvx $ivec,0,$ivp # load [unaligned] iv
  1159. lvsl $inpperm,0,$ivp
  1160. lvx $inptail,$idx,$ivp
  1161. vspltisb $one,1
  1162. le?vxor $inpperm,$inpperm,$tmp
  1163. vperm $ivec,$ivec,$inptail,$inpperm
  1164. vsldoi $one,$rndkey0,$one,1
  1165. neg r11,$inp
  1166. ?lvsl $keyperm,0,$key # prepare for unaligned key
  1167. lwz $rounds,240($key)
  1168. lvsr $inpperm,0,r11 # prepare for unaligned load
  1169. lvx $inptail,0,$inp
  1170. addi $inp,$inp,15 # 15 is not typo
  1171. le?vxor $inpperm,$inpperm,$tmp
  1172. srwi $rounds,$rounds,1
  1173. li $idx,16
  1174. subi $rounds,$rounds,1
  1175. ${UCMP}i $len,8
  1176. bge _aesp8_ctr32_encrypt8x
  1177. ?lvsr $outperm,0,$out # prepare for unaligned store
  1178. vspltisb $outmask,-1
  1179. lvx $outhead,0,$out
  1180. ?vperm $outmask,$rndkey0,$outmask,$outperm
  1181. le?vxor $outperm,$outperm,$tmp
  1182. lvx $rndkey0,0,$key
  1183. mtctr $rounds
  1184. lvx $rndkey1,$idx,$key
  1185. addi $idx,$idx,16
  1186. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1187. vxor $inout,$ivec,$rndkey0
  1188. lvx $rndkey0,$idx,$key
  1189. addi $idx,$idx,16
  1190. b Loop_ctr32_enc
  1191. .align 5
  1192. Loop_ctr32_enc:
  1193. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1194. vcipher $inout,$inout,$rndkey1
  1195. lvx $rndkey1,$idx,$key
  1196. addi $idx,$idx,16
  1197. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1198. vcipher $inout,$inout,$rndkey0
  1199. lvx $rndkey0,$idx,$key
  1200. addi $idx,$idx,16
  1201. bdnz Loop_ctr32_enc
  1202. vadduwm $ivec,$ivec,$one
  1203. vmr $dat,$inptail
  1204. lvx $inptail,0,$inp
  1205. addi $inp,$inp,16
  1206. subic. $len,$len,1 # blocks--
  1207. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1208. vcipher $inout,$inout,$rndkey1
  1209. lvx $rndkey1,$idx,$key
  1210. vperm $dat,$dat,$inptail,$inpperm
  1211. li $idx,16
  1212. ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
  1213. lvx $rndkey0,0,$key
  1214. vxor $dat,$dat,$rndkey1 # last round key
  1215. vcipherlast $inout,$inout,$dat
  1216. lvx $rndkey1,$idx,$key
  1217. addi $idx,$idx,16
  1218. vperm $inout,$inout,$inout,$outperm
  1219. vsel $dat,$outhead,$inout,$outmask
  1220. mtctr $rounds
  1221. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1222. vmr $outhead,$inout
  1223. vxor $inout,$ivec,$rndkey0
  1224. lvx $rndkey0,$idx,$key
  1225. addi $idx,$idx,16
  1226. stvx $dat,0,$out
  1227. addi $out,$out,16
  1228. bne Loop_ctr32_enc
  1229. addi $out,$out,-1
  1230. lvx $inout,0,$out # redundant in aligned case
  1231. vsel $inout,$outhead,$inout,$outmask
  1232. stvx $inout,0,$out
  1233. mtspr 256,$vrsave
  1234. blr
  1235. .long 0
  1236. .byte 0,12,0x14,0,0,0,6,0
  1237. .long 0
  1238. ___
  1239. #########################################################################
  1240. {{ # Optimized CTR procedure #
  1241. my $key_="r11";
  1242. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  1243. $x00=0 if ($flavour =~ /osx/);
  1244. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
  1245. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
  1246. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  1247. # v26-v31 last 6 round keys
  1248. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  1249. my ($two,$three,$four)=($outhead,$outperm,$outmask);
  1250. $code.=<<___;
  1251. .align 5
  1252. _aesp8_ctr32_encrypt8x:
  1253. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  1254. li r10,`$FRAME+8*16+15`
  1255. li r11,`$FRAME+8*16+31`
  1256. stvx v20,r10,$sp # ABI says so
  1257. addi r10,r10,32
  1258. stvx v21,r11,$sp
  1259. addi r11,r11,32
  1260. stvx v22,r10,$sp
  1261. addi r10,r10,32
  1262. stvx v23,r11,$sp
  1263. addi r11,r11,32
  1264. stvx v24,r10,$sp
  1265. addi r10,r10,32
  1266. stvx v25,r11,$sp
  1267. addi r11,r11,32
  1268. stvx v26,r10,$sp
  1269. addi r10,r10,32
  1270. stvx v27,r11,$sp
  1271. addi r11,r11,32
  1272. stvx v28,r10,$sp
  1273. addi r10,r10,32
  1274. stvx v29,r11,$sp
  1275. addi r11,r11,32
  1276. stvx v30,r10,$sp
  1277. stvx v31,r11,$sp
  1278. li r0,-1
  1279. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  1280. li $x10,0x10
  1281. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1282. li $x20,0x20
  1283. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1284. li $x30,0x30
  1285. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1286. li $x40,0x40
  1287. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1288. li $x50,0x50
  1289. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1290. li $x60,0x60
  1291. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1292. li $x70,0x70
  1293. mtspr 256,r0
  1294. subi $rounds,$rounds,3 # -4 in total
  1295. lvx $rndkey0,$x00,$key # load key schedule
  1296. lvx v30,$x10,$key
  1297. addi $key,$key,0x20
  1298. lvx v31,$x00,$key
  1299. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  1300. addi $key_,$sp,$FRAME+15
  1301. mtctr $rounds
  1302. Load_ctr32_enc_key:
  1303. ?vperm v24,v30,v31,$keyperm
  1304. lvx v30,$x10,$key
  1305. addi $key,$key,0x20
  1306. stvx v24,$x00,$key_ # off-load round[1]
  1307. ?vperm v25,v31,v30,$keyperm
  1308. lvx v31,$x00,$key
  1309. stvx v25,$x10,$key_ # off-load round[2]
  1310. addi $key_,$key_,0x20
  1311. bdnz Load_ctr32_enc_key
  1312. lvx v26,$x10,$key
  1313. ?vperm v24,v30,v31,$keyperm
  1314. lvx v27,$x20,$key
  1315. stvx v24,$x00,$key_ # off-load round[3]
  1316. ?vperm v25,v31,v26,$keyperm
  1317. lvx v28,$x30,$key
  1318. stvx v25,$x10,$key_ # off-load round[4]
  1319. addi $key_,$sp,$FRAME+15 # rewind $key_
  1320. ?vperm v26,v26,v27,$keyperm
  1321. lvx v29,$x40,$key
  1322. ?vperm v27,v27,v28,$keyperm
  1323. lvx v30,$x50,$key
  1324. ?vperm v28,v28,v29,$keyperm
  1325. lvx v31,$x60,$key
  1326. ?vperm v29,v29,v30,$keyperm
  1327. lvx $out0,$x70,$key # borrow $out0
  1328. ?vperm v30,v30,v31,$keyperm
  1329. lvx v24,$x00,$key_ # pre-load round[1]
  1330. ?vperm v31,v31,$out0,$keyperm
  1331. lvx v25,$x10,$key_ # pre-load round[2]
  1332. vadduwm $two,$one,$one
  1333. subi $inp,$inp,15 # undo "caller"
  1334. $SHL $len,$len,4
  1335. vadduwm $out1,$ivec,$one # counter values ...
  1336. vadduwm $out2,$ivec,$two
  1337. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1338. le?li $idx,8
  1339. vadduwm $out3,$out1,$two
  1340. vxor $out1,$out1,$rndkey0
  1341. le?lvsl $inpperm,0,$idx
  1342. vadduwm $out4,$out2,$two
  1343. vxor $out2,$out2,$rndkey0
  1344. le?vspltisb $tmp,0x0f
  1345. vadduwm $out5,$out3,$two
  1346. vxor $out3,$out3,$rndkey0
  1347. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  1348. vadduwm $out6,$out4,$two
  1349. vxor $out4,$out4,$rndkey0
  1350. vadduwm $out7,$out5,$two
  1351. vxor $out5,$out5,$rndkey0
  1352. vadduwm $ivec,$out6,$two # next counter value
  1353. vxor $out6,$out6,$rndkey0
  1354. vxor $out7,$out7,$rndkey0
  1355. mtctr $rounds
  1356. b Loop_ctr32_enc8x
  1357. .align 5
  1358. Loop_ctr32_enc8x:
  1359. vcipher $out0,$out0,v24
  1360. vcipher $out1,$out1,v24
  1361. vcipher $out2,$out2,v24
  1362. vcipher $out3,$out3,v24
  1363. vcipher $out4,$out4,v24
  1364. vcipher $out5,$out5,v24
  1365. vcipher $out6,$out6,v24
  1366. vcipher $out7,$out7,v24
  1367. Loop_ctr32_enc8x_middle:
  1368. lvx v24,$x20,$key_ # round[3]
  1369. addi $key_,$key_,0x20
  1370. vcipher $out0,$out0,v25
  1371. vcipher $out1,$out1,v25
  1372. vcipher $out2,$out2,v25
  1373. vcipher $out3,$out3,v25
  1374. vcipher $out4,$out4,v25
  1375. vcipher $out5,$out5,v25
  1376. vcipher $out6,$out6,v25
  1377. vcipher $out7,$out7,v25
  1378. lvx v25,$x10,$key_ # round[4]
  1379. bdnz Loop_ctr32_enc8x
  1380. subic r11,$len,256 # $len-256, borrow $key_
  1381. vcipher $out0,$out0,v24
  1382. vcipher $out1,$out1,v24
  1383. vcipher $out2,$out2,v24
  1384. vcipher $out3,$out3,v24
  1385. vcipher $out4,$out4,v24
  1386. vcipher $out5,$out5,v24
  1387. vcipher $out6,$out6,v24
  1388. vcipher $out7,$out7,v24
  1389. subfe r0,r0,r0 # borrow?-1:0
  1390. vcipher $out0,$out0,v25
  1391. vcipher $out1,$out1,v25
  1392. vcipher $out2,$out2,v25
  1393. vcipher $out3,$out3,v25
  1394. vcipher $out4,$out4,v25
  1395. vcipher $out5,$out5,v25
  1396. vcipher $out6,$out6,v25
  1397. vcipher $out7,$out7,v25
  1398. and r0,r0,r11
  1399. addi $key_,$sp,$FRAME+15 # rewind $key_
  1400. vcipher $out0,$out0,v26
  1401. vcipher $out1,$out1,v26
  1402. vcipher $out2,$out2,v26
  1403. vcipher $out3,$out3,v26
  1404. vcipher $out4,$out4,v26
  1405. vcipher $out5,$out5,v26
  1406. vcipher $out6,$out6,v26
  1407. vcipher $out7,$out7,v26
  1408. lvx v24,$x00,$key_ # re-pre-load round[1]
  1409. subic $len,$len,129 # $len-=129
  1410. vcipher $out0,$out0,v27
  1411. addi $len,$len,1 # $len-=128 really
  1412. vcipher $out1,$out1,v27
  1413. vcipher $out2,$out2,v27
  1414. vcipher $out3,$out3,v27
  1415. vcipher $out4,$out4,v27
  1416. vcipher $out5,$out5,v27
  1417. vcipher $out6,$out6,v27
  1418. vcipher $out7,$out7,v27
  1419. lvx v25,$x10,$key_ # re-pre-load round[2]
  1420. vcipher $out0,$out0,v28
  1421. lvx_u $in0,$x00,$inp # load input
  1422. vcipher $out1,$out1,v28
  1423. lvx_u $in1,$x10,$inp
  1424. vcipher $out2,$out2,v28
  1425. lvx_u $in2,$x20,$inp
  1426. vcipher $out3,$out3,v28
  1427. lvx_u $in3,$x30,$inp
  1428. vcipher $out4,$out4,v28
  1429. lvx_u $in4,$x40,$inp
  1430. vcipher $out5,$out5,v28
  1431. lvx_u $in5,$x50,$inp
  1432. vcipher $out6,$out6,v28
  1433. lvx_u $in6,$x60,$inp
  1434. vcipher $out7,$out7,v28
  1435. lvx_u $in7,$x70,$inp
  1436. addi $inp,$inp,0x80
  1437. vcipher $out0,$out0,v29
  1438. le?vperm $in0,$in0,$in0,$inpperm
  1439. vcipher $out1,$out1,v29
  1440. le?vperm $in1,$in1,$in1,$inpperm
  1441. vcipher $out2,$out2,v29
  1442. le?vperm $in2,$in2,$in2,$inpperm
  1443. vcipher $out3,$out3,v29
  1444. le?vperm $in3,$in3,$in3,$inpperm
  1445. vcipher $out4,$out4,v29
  1446. le?vperm $in4,$in4,$in4,$inpperm
  1447. vcipher $out5,$out5,v29
  1448. le?vperm $in5,$in5,$in5,$inpperm
  1449. vcipher $out6,$out6,v29
  1450. le?vperm $in6,$in6,$in6,$inpperm
  1451. vcipher $out7,$out7,v29
  1452. le?vperm $in7,$in7,$in7,$inpperm
  1453. add $inp,$inp,r0 # $inp is adjusted in such
  1454. # way that at exit from the
  1455. # loop inX-in7 are loaded
  1456. # with last "words"
  1457. subfe. r0,r0,r0 # borrow?-1:0
  1458. vcipher $out0,$out0,v30
  1459. vxor $in0,$in0,v31 # xor with last round key
  1460. vcipher $out1,$out1,v30
  1461. vxor $in1,$in1,v31
  1462. vcipher $out2,$out2,v30
  1463. vxor $in2,$in2,v31
  1464. vcipher $out3,$out3,v30
  1465. vxor $in3,$in3,v31
  1466. vcipher $out4,$out4,v30
  1467. vxor $in4,$in4,v31
  1468. vcipher $out5,$out5,v30
  1469. vxor $in5,$in5,v31
  1470. vcipher $out6,$out6,v30
  1471. vxor $in6,$in6,v31
  1472. vcipher $out7,$out7,v30
  1473. vxor $in7,$in7,v31
  1474. bne Lctr32_enc8x_break # did $len-129 borrow?
  1475. vcipherlast $in0,$out0,$in0
  1476. vcipherlast $in1,$out1,$in1
  1477. vadduwm $out1,$ivec,$one # counter values ...
  1478. vcipherlast $in2,$out2,$in2
  1479. vadduwm $out2,$ivec,$two
  1480. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1481. vcipherlast $in3,$out3,$in3
  1482. vadduwm $out3,$out1,$two
  1483. vxor $out1,$out1,$rndkey0
  1484. vcipherlast $in4,$out4,$in4
  1485. vadduwm $out4,$out2,$two
  1486. vxor $out2,$out2,$rndkey0
  1487. vcipherlast $in5,$out5,$in5
  1488. vadduwm $out5,$out3,$two
  1489. vxor $out3,$out3,$rndkey0
  1490. vcipherlast $in6,$out6,$in6
  1491. vadduwm $out6,$out4,$two
  1492. vxor $out4,$out4,$rndkey0
  1493. vcipherlast $in7,$out7,$in7
  1494. vadduwm $out7,$out5,$two
  1495. vxor $out5,$out5,$rndkey0
  1496. le?vperm $in0,$in0,$in0,$inpperm
  1497. vadduwm $ivec,$out6,$two # next counter value
  1498. vxor $out6,$out6,$rndkey0
  1499. le?vperm $in1,$in1,$in1,$inpperm
  1500. vxor $out7,$out7,$rndkey0
  1501. mtctr $rounds
  1502. vcipher $out0,$out0,v24
  1503. stvx_u $in0,$x00,$out
  1504. le?vperm $in2,$in2,$in2,$inpperm
  1505. vcipher $out1,$out1,v24
  1506. stvx_u $in1,$x10,$out
  1507. le?vperm $in3,$in3,$in3,$inpperm
  1508. vcipher $out2,$out2,v24
  1509. stvx_u $in2,$x20,$out
  1510. le?vperm $in4,$in4,$in4,$inpperm
  1511. vcipher $out3,$out3,v24
  1512. stvx_u $in3,$x30,$out
  1513. le?vperm $in5,$in5,$in5,$inpperm
  1514. vcipher $out4,$out4,v24
  1515. stvx_u $in4,$x40,$out
  1516. le?vperm $in6,$in6,$in6,$inpperm
  1517. vcipher $out5,$out5,v24
  1518. stvx_u $in5,$x50,$out
  1519. le?vperm $in7,$in7,$in7,$inpperm
  1520. vcipher $out6,$out6,v24
  1521. stvx_u $in6,$x60,$out
  1522. vcipher $out7,$out7,v24
  1523. stvx_u $in7,$x70,$out
  1524. addi $out,$out,0x80
  1525. b Loop_ctr32_enc8x_middle
  1526. .align 5
  1527. Lctr32_enc8x_break:
  1528. cmpwi $len,-0x60
  1529. blt Lctr32_enc8x_one
  1530. nop
  1531. beq Lctr32_enc8x_two
  1532. cmpwi $len,-0x40
  1533. blt Lctr32_enc8x_three
  1534. nop
  1535. beq Lctr32_enc8x_four
  1536. cmpwi $len,-0x20
  1537. blt Lctr32_enc8x_five
  1538. nop
  1539. beq Lctr32_enc8x_six
  1540. cmpwi $len,0x00
  1541. blt Lctr32_enc8x_seven
  1542. Lctr32_enc8x_eight:
  1543. vcipherlast $out0,$out0,$in0
  1544. vcipherlast $out1,$out1,$in1
  1545. vcipherlast $out2,$out2,$in2
  1546. vcipherlast $out3,$out3,$in3
  1547. vcipherlast $out4,$out4,$in4
  1548. vcipherlast $out5,$out5,$in5
  1549. vcipherlast $out6,$out6,$in6
  1550. vcipherlast $out7,$out7,$in7
  1551. le?vperm $out0,$out0,$out0,$inpperm
  1552. le?vperm $out1,$out1,$out1,$inpperm
  1553. stvx_u $out0,$x00,$out
  1554. le?vperm $out2,$out2,$out2,$inpperm
  1555. stvx_u $out1,$x10,$out
  1556. le?vperm $out3,$out3,$out3,$inpperm
  1557. stvx_u $out2,$x20,$out
  1558. le?vperm $out4,$out4,$out4,$inpperm
  1559. stvx_u $out3,$x30,$out
  1560. le?vperm $out5,$out5,$out5,$inpperm
  1561. stvx_u $out4,$x40,$out
  1562. le?vperm $out6,$out6,$out6,$inpperm
  1563. stvx_u $out5,$x50,$out
  1564. le?vperm $out7,$out7,$out7,$inpperm
  1565. stvx_u $out6,$x60,$out
  1566. stvx_u $out7,$x70,$out
  1567. addi $out,$out,0x80
  1568. b Lctr32_enc8x_done
  1569. .align 5
  1570. Lctr32_enc8x_seven:
  1571. vcipherlast $out0,$out0,$in1
  1572. vcipherlast $out1,$out1,$in2
  1573. vcipherlast $out2,$out2,$in3
  1574. vcipherlast $out3,$out3,$in4
  1575. vcipherlast $out4,$out4,$in5
  1576. vcipherlast $out5,$out5,$in6
  1577. vcipherlast $out6,$out6,$in7
  1578. le?vperm $out0,$out0,$out0,$inpperm
  1579. le?vperm $out1,$out1,$out1,$inpperm
  1580. stvx_u $out0,$x00,$out
  1581. le?vperm $out2,$out2,$out2,$inpperm
  1582. stvx_u $out1,$x10,$out
  1583. le?vperm $out3,$out3,$out3,$inpperm
  1584. stvx_u $out2,$x20,$out
  1585. le?vperm $out4,$out4,$out4,$inpperm
  1586. stvx_u $out3,$x30,$out
  1587. le?vperm $out5,$out5,$out5,$inpperm
  1588. stvx_u $out4,$x40,$out
  1589. le?vperm $out6,$out6,$out6,$inpperm
  1590. stvx_u $out5,$x50,$out
  1591. stvx_u $out6,$x60,$out
  1592. addi $out,$out,0x70
  1593. b Lctr32_enc8x_done
  1594. .align 5
  1595. Lctr32_enc8x_six:
  1596. vcipherlast $out0,$out0,$in2
  1597. vcipherlast $out1,$out1,$in3
  1598. vcipherlast $out2,$out2,$in4
  1599. vcipherlast $out3,$out3,$in5
  1600. vcipherlast $out4,$out4,$in6
  1601. vcipherlast $out5,$out5,$in7
  1602. le?vperm $out0,$out0,$out0,$inpperm
  1603. le?vperm $out1,$out1,$out1,$inpperm
  1604. stvx_u $out0,$x00,$out
  1605. le?vperm $out2,$out2,$out2,$inpperm
  1606. stvx_u $out1,$x10,$out
  1607. le?vperm $out3,$out3,$out3,$inpperm
  1608. stvx_u $out2,$x20,$out
  1609. le?vperm $out4,$out4,$out4,$inpperm
  1610. stvx_u $out3,$x30,$out
  1611. le?vperm $out5,$out5,$out5,$inpperm
  1612. stvx_u $out4,$x40,$out
  1613. stvx_u $out5,$x50,$out
  1614. addi $out,$out,0x60
  1615. b Lctr32_enc8x_done
  1616. .align 5
  1617. Lctr32_enc8x_five:
  1618. vcipherlast $out0,$out0,$in3
  1619. vcipherlast $out1,$out1,$in4
  1620. vcipherlast $out2,$out2,$in5
  1621. vcipherlast $out3,$out3,$in6
  1622. vcipherlast $out4,$out4,$in7
  1623. le?vperm $out0,$out0,$out0,$inpperm
  1624. le?vperm $out1,$out1,$out1,$inpperm
  1625. stvx_u $out0,$x00,$out
  1626. le?vperm $out2,$out2,$out2,$inpperm
  1627. stvx_u $out1,$x10,$out
  1628. le?vperm $out3,$out3,$out3,$inpperm
  1629. stvx_u $out2,$x20,$out
  1630. le?vperm $out4,$out4,$out4,$inpperm
  1631. stvx_u $out3,$x30,$out
  1632. stvx_u $out4,$x40,$out
  1633. addi $out,$out,0x50
  1634. b Lctr32_enc8x_done
  1635. .align 5
  1636. Lctr32_enc8x_four:
  1637. vcipherlast $out0,$out0,$in4
  1638. vcipherlast $out1,$out1,$in5
  1639. vcipherlast $out2,$out2,$in6
  1640. vcipherlast $out3,$out3,$in7
  1641. le?vperm $out0,$out0,$out0,$inpperm
  1642. le?vperm $out1,$out1,$out1,$inpperm
  1643. stvx_u $out0,$x00,$out
  1644. le?vperm $out2,$out2,$out2,$inpperm
  1645. stvx_u $out1,$x10,$out
  1646. le?vperm $out3,$out3,$out3,$inpperm
  1647. stvx_u $out2,$x20,$out
  1648. stvx_u $out3,$x30,$out
  1649. addi $out,$out,0x40
  1650. b Lctr32_enc8x_done
  1651. .align 5
  1652. Lctr32_enc8x_three:
  1653. vcipherlast $out0,$out0,$in5
  1654. vcipherlast $out1,$out1,$in6
  1655. vcipherlast $out2,$out2,$in7
  1656. le?vperm $out0,$out0,$out0,$inpperm
  1657. le?vperm $out1,$out1,$out1,$inpperm
  1658. stvx_u $out0,$x00,$out
  1659. le?vperm $out2,$out2,$out2,$inpperm
  1660. stvx_u $out1,$x10,$out
  1661. stvx_u $out2,$x20,$out
  1662. addi $out,$out,0x30
  1663. b Lctr32_enc8x_done
  1664. .align 5
  1665. Lctr32_enc8x_two:
  1666. vcipherlast $out0,$out0,$in6
  1667. vcipherlast $out1,$out1,$in7
  1668. le?vperm $out0,$out0,$out0,$inpperm
  1669. le?vperm $out1,$out1,$out1,$inpperm
  1670. stvx_u $out0,$x00,$out
  1671. stvx_u $out1,$x10,$out
  1672. addi $out,$out,0x20
  1673. b Lctr32_enc8x_done
  1674. .align 5
  1675. Lctr32_enc8x_one:
  1676. vcipherlast $out0,$out0,$in7
  1677. le?vperm $out0,$out0,$out0,$inpperm
  1678. stvx_u $out0,0,$out
  1679. addi $out,$out,0x10
  1680. Lctr32_enc8x_done:
  1681. li r10,`$FRAME+15`
  1682. li r11,`$FRAME+31`
  1683. stvx $inpperm,r10,$sp # wipe copies of round keys
  1684. addi r10,r10,32
  1685. stvx $inpperm,r11,$sp
  1686. addi r11,r11,32
  1687. stvx $inpperm,r10,$sp
  1688. addi r10,r10,32
  1689. stvx $inpperm,r11,$sp
  1690. addi r11,r11,32
  1691. stvx $inpperm,r10,$sp
  1692. addi r10,r10,32
  1693. stvx $inpperm,r11,$sp
  1694. addi r11,r11,32
  1695. stvx $inpperm,r10,$sp
  1696. addi r10,r10,32
  1697. stvx $inpperm,r11,$sp
  1698. addi r11,r11,32
  1699. mtspr 256,$vrsave
  1700. lvx v20,r10,$sp # ABI says so
  1701. addi r10,r10,32
  1702. lvx v21,r11,$sp
  1703. addi r11,r11,32
  1704. lvx v22,r10,$sp
  1705. addi r10,r10,32
  1706. lvx v23,r11,$sp
  1707. addi r11,r11,32
  1708. lvx v24,r10,$sp
  1709. addi r10,r10,32
  1710. lvx v25,r11,$sp
  1711. addi r11,r11,32
  1712. lvx v26,r10,$sp
  1713. addi r10,r10,32
  1714. lvx v27,r11,$sp
  1715. addi r11,r11,32
  1716. lvx v28,r10,$sp
  1717. addi r10,r10,32
  1718. lvx v29,r11,$sp
  1719. addi r11,r11,32
  1720. lvx v30,r10,$sp
  1721. lvx v31,r11,$sp
  1722. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1723. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1724. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1725. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1726. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1727. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1728. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1729. blr
  1730. .long 0
  1731. .byte 0,12,0x04,0,0x80,6,6,0
  1732. .long 0
  1733. .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
  1734. ___
  1735. }} }}}
  1736. #########################################################################
  1737. {{{ # XTS procedures #
  1738. # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
  1739. # const AES_KEY *key1, const AES_KEY *key2, #
  1740. # [const] unsigned char iv[16]); #
  1741. # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
  1742. # input tweak value is assumed to be encrypted already, and last tweak #
  1743. # value, one suitable for consecutive call on same chunk of data, is #
  1744. # written back to original buffer. In addition, in "tweak chaining" #
  1745. # mode only complete input blocks are processed. #
  1746. my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
  1747. my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
  1748. my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
  1749. my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
  1750. my $taillen = $key2;
  1751. ($inp,$idx) = ($idx,$inp); # reassign
  1752. $code.=<<___;
  1753. .globl .${prefix}_xts_encrypt
  1754. .align 5
  1755. .${prefix}_xts_encrypt:
  1756. mr $inp,r3 # reassign
  1757. li r3,-1
  1758. ${UCMP}i $len,16
  1759. bltlr-
  1760. lis r0,0xfff0
  1761. mfspr r12,256 # save vrsave
  1762. li r11,0
  1763. mtspr 256,r0
  1764. vspltisb $seven,0x07 # 0x070707..07
  1765. le?lvsl $leperm,r11,r11
  1766. le?vspltisb $tmp,0x0f
  1767. le?vxor $leperm,$leperm,$seven
  1768. li $idx,15
  1769. lvx $tweak,0,$ivp # load [unaligned] iv
  1770. lvsl $inpperm,0,$ivp
  1771. lvx $inptail,$idx,$ivp
  1772. le?vxor $inpperm,$inpperm,$tmp
  1773. vperm $tweak,$tweak,$inptail,$inpperm
  1774. neg r11,$inp
  1775. lvsr $inpperm,0,r11 # prepare for unaligned load
  1776. lvx $inout,0,$inp
  1777. addi $inp,$inp,15 # 15 is not typo
  1778. le?vxor $inpperm,$inpperm,$tmp
  1779. ${UCMP}i $key2,0 # key2==NULL?
  1780. beq Lxts_enc_no_key2
  1781. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1782. lwz $rounds,240($key2)
  1783. srwi $rounds,$rounds,1
  1784. subi $rounds,$rounds,1
  1785. li $idx,16
  1786. lvx $rndkey0,0,$key2
  1787. lvx $rndkey1,$idx,$key2
  1788. addi $idx,$idx,16
  1789. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1790. vxor $tweak,$tweak,$rndkey0
  1791. lvx $rndkey0,$idx,$key2
  1792. addi $idx,$idx,16
  1793. mtctr $rounds
  1794. Ltweak_xts_enc:
  1795. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1796. vcipher $tweak,$tweak,$rndkey1
  1797. lvx $rndkey1,$idx,$key2
  1798. addi $idx,$idx,16
  1799. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1800. vcipher $tweak,$tweak,$rndkey0
  1801. lvx $rndkey0,$idx,$key2
  1802. addi $idx,$idx,16
  1803. bdnz Ltweak_xts_enc
  1804. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1805. vcipher $tweak,$tweak,$rndkey1
  1806. lvx $rndkey1,$idx,$key2
  1807. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1808. vcipherlast $tweak,$tweak,$rndkey0
  1809. li $ivp,0 # don't chain the tweak
  1810. b Lxts_enc
  1811. Lxts_enc_no_key2:
  1812. li $idx,-16
  1813. and $len,$len,$idx # in "tweak chaining"
  1814. # mode only complete
  1815. # blocks are processed
  1816. Lxts_enc:
  1817. lvx $inptail,0,$inp
  1818. addi $inp,$inp,16
  1819. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  1820. lwz $rounds,240($key1)
  1821. srwi $rounds,$rounds,1
  1822. subi $rounds,$rounds,1
  1823. li $idx,16
  1824. vslb $eighty7,$seven,$seven # 0x808080..80
  1825. vor $eighty7,$eighty7,$seven # 0x878787..87
  1826. vspltisb $tmp,1 # 0x010101..01
  1827. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  1828. ${UCMP}i $len,96
  1829. bge _aesp8_xts_encrypt6x
  1830. andi. $taillen,$len,15
  1831. subic r0,$len,32
  1832. subi $taillen,$taillen,16
  1833. subfe r0,r0,r0
  1834. and r0,r0,$taillen
  1835. add $inp,$inp,r0
  1836. lvx $rndkey0,0,$key1
  1837. lvx $rndkey1,$idx,$key1
  1838. addi $idx,$idx,16
  1839. vperm $inout,$inout,$inptail,$inpperm
  1840. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1841. vxor $inout,$inout,$tweak
  1842. vxor $inout,$inout,$rndkey0
  1843. lvx $rndkey0,$idx,$key1
  1844. addi $idx,$idx,16
  1845. mtctr $rounds
  1846. b Loop_xts_enc
  1847. .align 5
  1848. Loop_xts_enc:
  1849. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1850. vcipher $inout,$inout,$rndkey1
  1851. lvx $rndkey1,$idx,$key1
  1852. addi $idx,$idx,16
  1853. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1854. vcipher $inout,$inout,$rndkey0
  1855. lvx $rndkey0,$idx,$key1
  1856. addi $idx,$idx,16
  1857. bdnz Loop_xts_enc
  1858. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1859. vcipher $inout,$inout,$rndkey1
  1860. lvx $rndkey1,$idx,$key1
  1861. li $idx,16
  1862. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1863. vxor $rndkey0,$rndkey0,$tweak
  1864. vcipherlast $output,$inout,$rndkey0
  1865. le?vperm $tmp,$output,$output,$leperm
  1866. be?nop
  1867. le?stvx_u $tmp,0,$out
  1868. be?stvx_u $output,0,$out
  1869. addi $out,$out,16
  1870. subic. $len,$len,16
  1871. beq Lxts_enc_done
  1872. vmr $inout,$inptail
  1873. lvx $inptail,0,$inp
  1874. addi $inp,$inp,16
  1875. lvx $rndkey0,0,$key1
  1876. lvx $rndkey1,$idx,$key1
  1877. addi $idx,$idx,16
  1878. subic r0,$len,32
  1879. subfe r0,r0,r0
  1880. and r0,r0,$taillen
  1881. add $inp,$inp,r0
  1882. vsrab $tmp,$tweak,$seven # next tweak value
  1883. vaddubm $tweak,$tweak,$tweak
  1884. vsldoi $tmp,$tmp,$tmp,15
  1885. vand $tmp,$tmp,$eighty7
  1886. vxor $tweak,$tweak,$tmp
  1887. vperm $inout,$inout,$inptail,$inpperm
  1888. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1889. vxor $inout,$inout,$tweak
  1890. vxor $output,$output,$rndkey0 # just in case $len<16
  1891. vxor $inout,$inout,$rndkey0
  1892. lvx $rndkey0,$idx,$key1
  1893. addi $idx,$idx,16
  1894. mtctr $rounds
  1895. ${UCMP}i $len,16
  1896. bge Loop_xts_enc
  1897. vxor $output,$output,$tweak
  1898. lvsr $inpperm,0,$len # $inpperm is no longer needed
  1899. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  1900. vspltisb $tmp,-1
  1901. vperm $inptail,$inptail,$tmp,$inpperm
  1902. vsel $inout,$inout,$output,$inptail
  1903. subi r11,$out,17
  1904. subi $out,$out,16
  1905. mtctr $len
  1906. li $len,16
  1907. Loop_xts_enc_steal:
  1908. lbzu r0,1(r11)
  1909. stb r0,16(r11)
  1910. bdnz Loop_xts_enc_steal
  1911. mtctr $rounds
  1912. b Loop_xts_enc # one more time...
  1913. Lxts_enc_done:
  1914. ${UCMP}i $ivp,0
  1915. beq Lxts_enc_ret
  1916. vsrab $tmp,$tweak,$seven # next tweak value
  1917. vaddubm $tweak,$tweak,$tweak
  1918. vsldoi $tmp,$tmp,$tmp,15
  1919. vand $tmp,$tmp,$eighty7
  1920. vxor $tweak,$tweak,$tmp
  1921. le?vperm $tweak,$tweak,$tweak,$leperm
  1922. stvx_u $tweak,0,$ivp
  1923. Lxts_enc_ret:
  1924. mtspr 256,r12 # restore vrsave
  1925. li r3,0
  1926. blr
  1927. .long 0
  1928. .byte 0,12,0x04,0,0x80,6,6,0
  1929. .long 0
  1930. .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
  1931. .globl .${prefix}_xts_decrypt
  1932. .align 5
  1933. .${prefix}_xts_decrypt:
  1934. mr $inp,r3 # reassign
  1935. li r3,-1
  1936. ${UCMP}i $len,16
  1937. bltlr-
  1938. lis r0,0xfff8
  1939. mfspr r12,256 # save vrsave
  1940. li r11,0
  1941. mtspr 256,r0
  1942. andi. r0,$len,15
  1943. neg r0,r0
  1944. andi. r0,r0,16
  1945. sub $len,$len,r0
  1946. vspltisb $seven,0x07 # 0x070707..07
  1947. le?lvsl $leperm,r11,r11
  1948. le?vspltisb $tmp,0x0f
  1949. le?vxor $leperm,$leperm,$seven
  1950. li $idx,15
  1951. lvx $tweak,0,$ivp # load [unaligned] iv
  1952. lvsl $inpperm,0,$ivp
  1953. lvx $inptail,$idx,$ivp
  1954. le?vxor $inpperm,$inpperm,$tmp
  1955. vperm $tweak,$tweak,$inptail,$inpperm
  1956. neg r11,$inp
  1957. lvsr $inpperm,0,r11 # prepare for unaligned load
  1958. lvx $inout,0,$inp
  1959. addi $inp,$inp,15 # 15 is not typo
  1960. le?vxor $inpperm,$inpperm,$tmp
  1961. ${UCMP}i $key2,0 # key2==NULL?
  1962. beq Lxts_dec_no_key2
  1963. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1964. lwz $rounds,240($key2)
  1965. srwi $rounds,$rounds,1
  1966. subi $rounds,$rounds,1
  1967. li $idx,16
  1968. lvx $rndkey0,0,$key2
  1969. lvx $rndkey1,$idx,$key2
  1970. addi $idx,$idx,16
  1971. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1972. vxor $tweak,$tweak,$rndkey0
  1973. lvx $rndkey0,$idx,$key2
  1974. addi $idx,$idx,16
  1975. mtctr $rounds
  1976. Ltweak_xts_dec:
  1977. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1978. vcipher $tweak,$tweak,$rndkey1
  1979. lvx $rndkey1,$idx,$key2
  1980. addi $idx,$idx,16
  1981. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1982. vcipher $tweak,$tweak,$rndkey0
  1983. lvx $rndkey0,$idx,$key2
  1984. addi $idx,$idx,16
  1985. bdnz Ltweak_xts_dec
  1986. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1987. vcipher $tweak,$tweak,$rndkey1
  1988. lvx $rndkey1,$idx,$key2
  1989. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1990. vcipherlast $tweak,$tweak,$rndkey0
  1991. li $ivp,0 # don't chain the tweak
  1992. b Lxts_dec
  1993. Lxts_dec_no_key2:
  1994. neg $idx,$len
  1995. andi. $idx,$idx,15
  1996. add $len,$len,$idx # in "tweak chaining"
  1997. # mode only complete
  1998. # blocks are processed
  1999. Lxts_dec:
  2000. lvx $inptail,0,$inp
  2001. addi $inp,$inp,16
  2002. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  2003. lwz $rounds,240($key1)
  2004. srwi $rounds,$rounds,1
  2005. subi $rounds,$rounds,1
  2006. li $idx,16
  2007. vslb $eighty7,$seven,$seven # 0x808080..80
  2008. vor $eighty7,$eighty7,$seven # 0x878787..87
  2009. vspltisb $tmp,1 # 0x010101..01
  2010. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  2011. ${UCMP}i $len,96
  2012. bge _aesp8_xts_decrypt6x
  2013. lvx $rndkey0,0,$key1
  2014. lvx $rndkey1,$idx,$key1
  2015. addi $idx,$idx,16
  2016. vperm $inout,$inout,$inptail,$inpperm
  2017. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2018. vxor $inout,$inout,$tweak
  2019. vxor $inout,$inout,$rndkey0
  2020. lvx $rndkey0,$idx,$key1
  2021. addi $idx,$idx,16
  2022. mtctr $rounds
  2023. ${UCMP}i $len,16
  2024. blt Ltail_xts_dec
  2025. be?b Loop_xts_dec
  2026. .align 5
  2027. Loop_xts_dec:
  2028. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2029. vncipher $inout,$inout,$rndkey1
  2030. lvx $rndkey1,$idx,$key1
  2031. addi $idx,$idx,16
  2032. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2033. vncipher $inout,$inout,$rndkey0
  2034. lvx $rndkey0,$idx,$key1
  2035. addi $idx,$idx,16
  2036. bdnz Loop_xts_dec
  2037. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2038. vncipher $inout,$inout,$rndkey1
  2039. lvx $rndkey1,$idx,$key1
  2040. li $idx,16
  2041. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2042. vxor $rndkey0,$rndkey0,$tweak
  2043. vncipherlast $output,$inout,$rndkey0
  2044. le?vperm $tmp,$output,$output,$leperm
  2045. be?nop
  2046. le?stvx_u $tmp,0,$out
  2047. be?stvx_u $output,0,$out
  2048. addi $out,$out,16
  2049. subic. $len,$len,16
  2050. beq Lxts_dec_done
  2051. vmr $inout,$inptail
  2052. lvx $inptail,0,$inp
  2053. addi $inp,$inp,16
  2054. lvx $rndkey0,0,$key1
  2055. lvx $rndkey1,$idx,$key1
  2056. addi $idx,$idx,16
  2057. vsrab $tmp,$tweak,$seven # next tweak value
  2058. vaddubm $tweak,$tweak,$tweak
  2059. vsldoi $tmp,$tmp,$tmp,15
  2060. vand $tmp,$tmp,$eighty7
  2061. vxor $tweak,$tweak,$tmp
  2062. vperm $inout,$inout,$inptail,$inpperm
  2063. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2064. vxor $inout,$inout,$tweak
  2065. vxor $inout,$inout,$rndkey0
  2066. lvx $rndkey0,$idx,$key1
  2067. addi $idx,$idx,16
  2068. mtctr $rounds
  2069. ${UCMP}i $len,16
  2070. bge Loop_xts_dec
  2071. Ltail_xts_dec:
  2072. vsrab $tmp,$tweak,$seven # next tweak value
  2073. vaddubm $tweak1,$tweak,$tweak
  2074. vsldoi $tmp,$tmp,$tmp,15
  2075. vand $tmp,$tmp,$eighty7
  2076. vxor $tweak1,$tweak1,$tmp
  2077. subi $inp,$inp,16
  2078. add $inp,$inp,$len
  2079. vxor $inout,$inout,$tweak # :-(
  2080. vxor $inout,$inout,$tweak1 # :-)
  2081. Loop_xts_dec_short:
  2082. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2083. vncipher $inout,$inout,$rndkey1
  2084. lvx $rndkey1,$idx,$key1
  2085. addi $idx,$idx,16
  2086. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2087. vncipher $inout,$inout,$rndkey0
  2088. lvx $rndkey0,$idx,$key1
  2089. addi $idx,$idx,16
  2090. bdnz Loop_xts_dec_short
  2091. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2092. vncipher $inout,$inout,$rndkey1
  2093. lvx $rndkey1,$idx,$key1
  2094. li $idx,16
  2095. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2096. vxor $rndkey0,$rndkey0,$tweak1
  2097. vncipherlast $output,$inout,$rndkey0
  2098. le?vperm $tmp,$output,$output,$leperm
  2099. be?nop
  2100. le?stvx_u $tmp,0,$out
  2101. be?stvx_u $output,0,$out
  2102. vmr $inout,$inptail
  2103. lvx $inptail,0,$inp
  2104. #addi $inp,$inp,16
  2105. lvx $rndkey0,0,$key1
  2106. lvx $rndkey1,$idx,$key1
  2107. addi $idx,$idx,16
  2108. vperm $inout,$inout,$inptail,$inpperm
  2109. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2110. lvsr $inpperm,0,$len # $inpperm is no longer needed
  2111. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  2112. vspltisb $tmp,-1
  2113. vperm $inptail,$inptail,$tmp,$inpperm
  2114. vsel $inout,$inout,$output,$inptail
  2115. vxor $rndkey0,$rndkey0,$tweak
  2116. vxor $inout,$inout,$rndkey0
  2117. lvx $rndkey0,$idx,$key1
  2118. addi $idx,$idx,16
  2119. subi r11,$out,1
  2120. mtctr $len
  2121. li $len,16
  2122. Loop_xts_dec_steal:
  2123. lbzu r0,1(r11)
  2124. stb r0,16(r11)
  2125. bdnz Loop_xts_dec_steal
  2126. mtctr $rounds
  2127. b Loop_xts_dec # one more time...
  2128. Lxts_dec_done:
  2129. ${UCMP}i $ivp,0
  2130. beq Lxts_dec_ret
  2131. vsrab $tmp,$tweak,$seven # next tweak value
  2132. vaddubm $tweak,$tweak,$tweak
  2133. vsldoi $tmp,$tmp,$tmp,15
  2134. vand $tmp,$tmp,$eighty7
  2135. vxor $tweak,$tweak,$tmp
  2136. le?vperm $tweak,$tweak,$tweak,$leperm
  2137. stvx_u $tweak,0,$ivp
  2138. Lxts_dec_ret:
  2139. mtspr 256,r12 # restore vrsave
  2140. li r3,0
  2141. blr
  2142. .long 0
  2143. .byte 0,12,0x04,0,0x80,6,6,0
  2144. .long 0
  2145. .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
  2146. ___
  2147. #########################################################################
  2148. {{ # Optimized XTS procedures #
  2149. my $key_=$key2;
  2150. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
  2151. $x00=0 if ($flavour =~ /osx/);
  2152. my ($in0, $in1, $in2, $in3, $in4, $in5)=map("v$_",(0..5));
  2153. my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
  2154. my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
  2155. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  2156. # v26-v31 last 6 round keys
  2157. my ($keyperm)=($out0); # aliases with "caller", redundant assignment
  2158. my $taillen=$x70;
  2159. $code.=<<___;
  2160. .align 5
  2161. _aesp8_xts_encrypt6x:
  2162. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2163. mflr r11
  2164. li r7,`$FRAME+8*16+15`
  2165. li r3,`$FRAME+8*16+31`
  2166. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2167. stvx v20,r7,$sp # ABI says so
  2168. addi r7,r7,32
  2169. stvx v21,r3,$sp
  2170. addi r3,r3,32
  2171. stvx v22,r7,$sp
  2172. addi r7,r7,32
  2173. stvx v23,r3,$sp
  2174. addi r3,r3,32
  2175. stvx v24,r7,$sp
  2176. addi r7,r7,32
  2177. stvx v25,r3,$sp
  2178. addi r3,r3,32
  2179. stvx v26,r7,$sp
  2180. addi r7,r7,32
  2181. stvx v27,r3,$sp
  2182. addi r3,r3,32
  2183. stvx v28,r7,$sp
  2184. addi r7,r7,32
  2185. stvx v29,r3,$sp
  2186. addi r3,r3,32
  2187. stvx v30,r7,$sp
  2188. stvx v31,r3,$sp
  2189. li r0,-1
  2190. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2191. li $x10,0x10
  2192. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2193. li $x20,0x20
  2194. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2195. li $x30,0x30
  2196. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2197. li $x40,0x40
  2198. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2199. li $x50,0x50
  2200. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2201. li $x60,0x60
  2202. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2203. li $x70,0x70
  2204. mtspr 256,r0
  2205. # Reverse eighty7 to 0x010101..87
  2206. xxlor 2, 32+$eighty7, 32+$eighty7
  2207. vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
  2208. xxlor 1, 32+$eighty7, 32+$eighty7
  2209. # Load XOR contents. 0xf102132435465768798a9bacbdcedfe
  2210. mr $x70, r6
  2211. bl Lconsts
  2212. lxvw4x 0, $x40, r6 # load XOR contents
  2213. mr r6, $x70
  2214. li $x70,0x70
  2215. subi $rounds,$rounds,3 # -4 in total
  2216. lvx $rndkey0,$x00,$key1 # load key schedule
  2217. lvx v30,$x10,$key1
  2218. addi $key1,$key1,0x20
  2219. lvx v31,$x00,$key1
  2220. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2221. addi $key_,$sp,$FRAME+15
  2222. mtctr $rounds
  2223. Load_xts_enc_key:
  2224. ?vperm v24,v30,v31,$keyperm
  2225. lvx v30,$x10,$key1
  2226. addi $key1,$key1,0x20
  2227. stvx v24,$x00,$key_ # off-load round[1]
  2228. ?vperm v25,v31,v30,$keyperm
  2229. lvx v31,$x00,$key1
  2230. stvx v25,$x10,$key_ # off-load round[2]
  2231. addi $key_,$key_,0x20
  2232. bdnz Load_xts_enc_key
  2233. lvx v26,$x10,$key1
  2234. ?vperm v24,v30,v31,$keyperm
  2235. lvx v27,$x20,$key1
  2236. stvx v24,$x00,$key_ # off-load round[3]
  2237. ?vperm v25,v31,v26,$keyperm
  2238. lvx v28,$x30,$key1
  2239. stvx v25,$x10,$key_ # off-load round[4]
  2240. addi $key_,$sp,$FRAME+15 # rewind $key_
  2241. ?vperm v26,v26,v27,$keyperm
  2242. lvx v29,$x40,$key1
  2243. ?vperm v27,v27,v28,$keyperm
  2244. lvx v30,$x50,$key1
  2245. ?vperm v28,v28,v29,$keyperm
  2246. lvx v31,$x60,$key1
  2247. ?vperm v29,v29,v30,$keyperm
  2248. lvx $twk5,$x70,$key1 # borrow $twk5
  2249. ?vperm v30,v30,v31,$keyperm
  2250. lvx v24,$x00,$key_ # pre-load round[1]
  2251. ?vperm v31,v31,$twk5,$keyperm
  2252. lvx v25,$x10,$key_ # pre-load round[2]
  2253. # Switch to use the following codes with 0x010101..87 to generate tweak.
  2254. # eighty7 = 0x010101..87
  2255. # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
  2256. # vand tmp, tmp, eighty7 # last byte with carry
  2257. # vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
  2258. # xxlor vsx, 0, 0
  2259. # vpermxor tweak, tweak, tmp, vsx
  2260. vperm $in0,$inout,$inptail,$inpperm
  2261. subi $inp,$inp,31 # undo "caller"
  2262. vxor $twk0,$tweak,$rndkey0
  2263. vsrab $tmp,$tweak,$seven # next tweak value
  2264. vaddubm $tweak,$tweak,$tweak
  2265. vand $tmp,$tmp,$eighty7
  2266. vxor $out0,$in0,$twk0
  2267. xxlor 32+$in1, 0, 0
  2268. vpermxor $tweak, $tweak, $tmp, $in1
  2269. lvx_u $in1,$x10,$inp
  2270. vxor $twk1,$tweak,$rndkey0
  2271. vsrab $tmp,$tweak,$seven # next tweak value
  2272. vaddubm $tweak,$tweak,$tweak
  2273. le?vperm $in1,$in1,$in1,$leperm
  2274. vand $tmp,$tmp,$eighty7
  2275. vxor $out1,$in1,$twk1
  2276. xxlor 32+$in2, 0, 0
  2277. vpermxor $tweak, $tweak, $tmp, $in2
  2278. lvx_u $in2,$x20,$inp
  2279. andi. $taillen,$len,15
  2280. vxor $twk2,$tweak,$rndkey0
  2281. vsrab $tmp,$tweak,$seven # next tweak value
  2282. vaddubm $tweak,$tweak,$tweak
  2283. le?vperm $in2,$in2,$in2,$leperm
  2284. vand $tmp,$tmp,$eighty7
  2285. vxor $out2,$in2,$twk2
  2286. xxlor 32+$in3, 0, 0
  2287. vpermxor $tweak, $tweak, $tmp, $in3
  2288. lvx_u $in3,$x30,$inp
  2289. sub $len,$len,$taillen
  2290. vxor $twk3,$tweak,$rndkey0
  2291. vsrab $tmp,$tweak,$seven # next tweak value
  2292. vaddubm $tweak,$tweak,$tweak
  2293. le?vperm $in3,$in3,$in3,$leperm
  2294. vand $tmp,$tmp,$eighty7
  2295. vxor $out3,$in3,$twk3
  2296. xxlor 32+$in4, 0, 0
  2297. vpermxor $tweak, $tweak, $tmp, $in4
  2298. lvx_u $in4,$x40,$inp
  2299. subi $len,$len,0x60
  2300. vxor $twk4,$tweak,$rndkey0
  2301. vsrab $tmp,$tweak,$seven # next tweak value
  2302. vaddubm $tweak,$tweak,$tweak
  2303. le?vperm $in4,$in4,$in4,$leperm
  2304. vand $tmp,$tmp,$eighty7
  2305. vxor $out4,$in4,$twk4
  2306. xxlor 32+$in5, 0, 0
  2307. vpermxor $tweak, $tweak, $tmp, $in5
  2308. lvx_u $in5,$x50,$inp
  2309. addi $inp,$inp,0x60
  2310. vxor $twk5,$tweak,$rndkey0
  2311. vsrab $tmp,$tweak,$seven # next tweak value
  2312. vaddubm $tweak,$tweak,$tweak
  2313. le?vperm $in5,$in5,$in5,$leperm
  2314. vand $tmp,$tmp,$eighty7
  2315. vxor $out5,$in5,$twk5
  2316. xxlor 32+$in0, 0, 0
  2317. vpermxor $tweak, $tweak, $tmp, $in0
  2318. vxor v31,v31,$rndkey0
  2319. mtctr $rounds
  2320. b Loop_xts_enc6x
  2321. .align 5
  2322. Loop_xts_enc6x:
  2323. vcipher $out0,$out0,v24
  2324. vcipher $out1,$out1,v24
  2325. vcipher $out2,$out2,v24
  2326. vcipher $out3,$out3,v24
  2327. vcipher $out4,$out4,v24
  2328. vcipher $out5,$out5,v24
  2329. lvx v24,$x20,$key_ # round[3]
  2330. addi $key_,$key_,0x20
  2331. vcipher $out0,$out0,v25
  2332. vcipher $out1,$out1,v25
  2333. vcipher $out2,$out2,v25
  2334. vcipher $out3,$out3,v25
  2335. vcipher $out4,$out4,v25
  2336. vcipher $out5,$out5,v25
  2337. lvx v25,$x10,$key_ # round[4]
  2338. bdnz Loop_xts_enc6x
  2339. xxlor 32+$eighty7, 1, 1 # 0x010101..87
  2340. subic $len,$len,96 # $len-=96
  2341. vxor $in0,$twk0,v31 # xor with last round key
  2342. vcipher $out0,$out0,v24
  2343. vcipher $out1,$out1,v24
  2344. vsrab $tmp,$tweak,$seven # next tweak value
  2345. vxor $twk0,$tweak,$rndkey0
  2346. vaddubm $tweak,$tweak,$tweak
  2347. vcipher $out2,$out2,v24
  2348. vcipher $out3,$out3,v24
  2349. vcipher $out4,$out4,v24
  2350. vcipher $out5,$out5,v24
  2351. subfe. r0,r0,r0 # borrow?-1:0
  2352. vand $tmp,$tmp,$eighty7
  2353. vcipher $out0,$out0,v25
  2354. vcipher $out1,$out1,v25
  2355. xxlor 32+$in1, 0, 0
  2356. vpermxor $tweak, $tweak, $tmp, $in1
  2357. vcipher $out2,$out2,v25
  2358. vcipher $out3,$out3,v25
  2359. vxor $in1,$twk1,v31
  2360. vsrab $tmp,$tweak,$seven # next tweak value
  2361. vxor $twk1,$tweak,$rndkey0
  2362. vcipher $out4,$out4,v25
  2363. vcipher $out5,$out5,v25
  2364. and r0,r0,$len
  2365. vaddubm $tweak,$tweak,$tweak
  2366. vcipher $out0,$out0,v26
  2367. vcipher $out1,$out1,v26
  2368. vand $tmp,$tmp,$eighty7
  2369. vcipher $out2,$out2,v26
  2370. vcipher $out3,$out3,v26
  2371. xxlor 32+$in2, 0, 0
  2372. vpermxor $tweak, $tweak, $tmp, $in2
  2373. vcipher $out4,$out4,v26
  2374. vcipher $out5,$out5,v26
  2375. add $inp,$inp,r0 # $inp is adjusted in such
  2376. # way that at exit from the
  2377. # loop inX-in5 are loaded
  2378. # with last "words"
  2379. vxor $in2,$twk2,v31
  2380. vsrab $tmp,$tweak,$seven # next tweak value
  2381. vxor $twk2,$tweak,$rndkey0
  2382. vaddubm $tweak,$tweak,$tweak
  2383. vcipher $out0,$out0,v27
  2384. vcipher $out1,$out1,v27
  2385. vcipher $out2,$out2,v27
  2386. vcipher $out3,$out3,v27
  2387. vand $tmp,$tmp,$eighty7
  2388. vcipher $out4,$out4,v27
  2389. vcipher $out5,$out5,v27
  2390. addi $key_,$sp,$FRAME+15 # rewind $key_
  2391. xxlor 32+$in3, 0, 0
  2392. vpermxor $tweak, $tweak, $tmp, $in3
  2393. vcipher $out0,$out0,v28
  2394. vcipher $out1,$out1,v28
  2395. vxor $in3,$twk3,v31
  2396. vsrab $tmp,$tweak,$seven # next tweak value
  2397. vxor $twk3,$tweak,$rndkey0
  2398. vcipher $out2,$out2,v28
  2399. vcipher $out3,$out3,v28
  2400. vaddubm $tweak,$tweak,$tweak
  2401. vcipher $out4,$out4,v28
  2402. vcipher $out5,$out5,v28
  2403. lvx v24,$x00,$key_ # re-pre-load round[1]
  2404. vand $tmp,$tmp,$eighty7
  2405. vcipher $out0,$out0,v29
  2406. vcipher $out1,$out1,v29
  2407. xxlor 32+$in4, 0, 0
  2408. vpermxor $tweak, $tweak, $tmp, $in4
  2409. vcipher $out2,$out2,v29
  2410. vcipher $out3,$out3,v29
  2411. vxor $in4,$twk4,v31
  2412. vsrab $tmp,$tweak,$seven # next tweak value
  2413. vxor $twk4,$tweak,$rndkey0
  2414. vcipher $out4,$out4,v29
  2415. vcipher $out5,$out5,v29
  2416. lvx v25,$x10,$key_ # re-pre-load round[2]
  2417. vaddubm $tweak,$tweak,$tweak
  2418. vcipher $out0,$out0,v30
  2419. vcipher $out1,$out1,v30
  2420. vand $tmp,$tmp,$eighty7
  2421. vcipher $out2,$out2,v30
  2422. vcipher $out3,$out3,v30
  2423. xxlor 32+$in5, 0, 0
  2424. vpermxor $tweak, $tweak, $tmp, $in5
  2425. vcipher $out4,$out4,v30
  2426. vcipher $out5,$out5,v30
  2427. vxor $in5,$twk5,v31
  2428. vsrab $tmp,$tweak,$seven # next tweak value
  2429. vxor $twk5,$tweak,$rndkey0
  2430. vcipherlast $out0,$out0,$in0
  2431. lvx_u $in0,$x00,$inp # load next input block
  2432. vaddubm $tweak,$tweak,$tweak
  2433. vcipherlast $out1,$out1,$in1
  2434. lvx_u $in1,$x10,$inp
  2435. vcipherlast $out2,$out2,$in2
  2436. le?vperm $in0,$in0,$in0,$leperm
  2437. lvx_u $in2,$x20,$inp
  2438. vand $tmp,$tmp,$eighty7
  2439. vcipherlast $out3,$out3,$in3
  2440. le?vperm $in1,$in1,$in1,$leperm
  2441. lvx_u $in3,$x30,$inp
  2442. vcipherlast $out4,$out4,$in4
  2443. le?vperm $in2,$in2,$in2,$leperm
  2444. lvx_u $in4,$x40,$inp
  2445. xxlor 10, 32+$in0, 32+$in0
  2446. xxlor 32+$in0, 0, 0
  2447. vpermxor $tweak, $tweak, $tmp, $in0
  2448. xxlor 32+$in0, 10, 10
  2449. vcipherlast $tmp,$out5,$in5 # last block might be needed
  2450. # in stealing mode
  2451. le?vperm $in3,$in3,$in3,$leperm
  2452. lvx_u $in5,$x50,$inp
  2453. addi $inp,$inp,0x60
  2454. le?vperm $in4,$in4,$in4,$leperm
  2455. le?vperm $in5,$in5,$in5,$leperm
  2456. le?vperm $out0,$out0,$out0,$leperm
  2457. le?vperm $out1,$out1,$out1,$leperm
  2458. stvx_u $out0,$x00,$out # store output
  2459. vxor $out0,$in0,$twk0
  2460. le?vperm $out2,$out2,$out2,$leperm
  2461. stvx_u $out1,$x10,$out
  2462. vxor $out1,$in1,$twk1
  2463. le?vperm $out3,$out3,$out3,$leperm
  2464. stvx_u $out2,$x20,$out
  2465. vxor $out2,$in2,$twk2
  2466. le?vperm $out4,$out4,$out4,$leperm
  2467. stvx_u $out3,$x30,$out
  2468. vxor $out3,$in3,$twk3
  2469. le?vperm $out5,$tmp,$tmp,$leperm
  2470. stvx_u $out4,$x40,$out
  2471. vxor $out4,$in4,$twk4
  2472. le?stvx_u $out5,$x50,$out
  2473. be?stvx_u $tmp, $x50,$out
  2474. vxor $out5,$in5,$twk5
  2475. addi $out,$out,0x60
  2476. mtctr $rounds
  2477. beq Loop_xts_enc6x # did $len-=96 borrow?
  2478. xxlor 32+$eighty7, 2, 2 # 0x870101..01
  2479. addic. $len,$len,0x60
  2480. beq Lxts_enc6x_zero
  2481. cmpwi $len,0x20
  2482. blt Lxts_enc6x_one
  2483. nop
  2484. beq Lxts_enc6x_two
  2485. cmpwi $len,0x40
  2486. blt Lxts_enc6x_three
  2487. nop
  2488. beq Lxts_enc6x_four
  2489. Lxts_enc6x_five:
  2490. vxor $out0,$in1,$twk0
  2491. vxor $out1,$in2,$twk1
  2492. vxor $out2,$in3,$twk2
  2493. vxor $out3,$in4,$twk3
  2494. vxor $out4,$in5,$twk4
  2495. bl _aesp8_xts_enc5x
  2496. le?vperm $out0,$out0,$out0,$leperm
  2497. vmr $twk0,$twk5 # unused tweak
  2498. le?vperm $out1,$out1,$out1,$leperm
  2499. stvx_u $out0,$x00,$out # store output
  2500. le?vperm $out2,$out2,$out2,$leperm
  2501. stvx_u $out1,$x10,$out
  2502. le?vperm $out3,$out3,$out3,$leperm
  2503. stvx_u $out2,$x20,$out
  2504. vxor $tmp,$out4,$twk5 # last block prep for stealing
  2505. le?vperm $out4,$out4,$out4,$leperm
  2506. stvx_u $out3,$x30,$out
  2507. stvx_u $out4,$x40,$out
  2508. addi $out,$out,0x50
  2509. bne Lxts_enc6x_steal
  2510. b Lxts_enc6x_done
  2511. .align 4
  2512. Lxts_enc6x_four:
  2513. vxor $out0,$in2,$twk0
  2514. vxor $out1,$in3,$twk1
  2515. vxor $out2,$in4,$twk2
  2516. vxor $out3,$in5,$twk3
  2517. vxor $out4,$out4,$out4
  2518. bl _aesp8_xts_enc5x
  2519. le?vperm $out0,$out0,$out0,$leperm
  2520. vmr $twk0,$twk4 # unused tweak
  2521. le?vperm $out1,$out1,$out1,$leperm
  2522. stvx_u $out0,$x00,$out # store output
  2523. le?vperm $out2,$out2,$out2,$leperm
  2524. stvx_u $out1,$x10,$out
  2525. vxor $tmp,$out3,$twk4 # last block prep for stealing
  2526. le?vperm $out3,$out3,$out3,$leperm
  2527. stvx_u $out2,$x20,$out
  2528. stvx_u $out3,$x30,$out
  2529. addi $out,$out,0x40
  2530. bne Lxts_enc6x_steal
  2531. b Lxts_enc6x_done
  2532. .align 4
  2533. Lxts_enc6x_three:
  2534. vxor $out0,$in3,$twk0
  2535. vxor $out1,$in4,$twk1
  2536. vxor $out2,$in5,$twk2
  2537. vxor $out3,$out3,$out3
  2538. vxor $out4,$out4,$out4
  2539. bl _aesp8_xts_enc5x
  2540. le?vperm $out0,$out0,$out0,$leperm
  2541. vmr $twk0,$twk3 # unused tweak
  2542. le?vperm $out1,$out1,$out1,$leperm
  2543. stvx_u $out0,$x00,$out # store output
  2544. vxor $tmp,$out2,$twk3 # last block prep for stealing
  2545. le?vperm $out2,$out2,$out2,$leperm
  2546. stvx_u $out1,$x10,$out
  2547. stvx_u $out2,$x20,$out
  2548. addi $out,$out,0x30
  2549. bne Lxts_enc6x_steal
  2550. b Lxts_enc6x_done
  2551. .align 4
  2552. Lxts_enc6x_two:
  2553. vxor $out0,$in4,$twk0
  2554. vxor $out1,$in5,$twk1
  2555. vxor $out2,$out2,$out2
  2556. vxor $out3,$out3,$out3
  2557. vxor $out4,$out4,$out4
  2558. bl _aesp8_xts_enc5x
  2559. le?vperm $out0,$out0,$out0,$leperm
  2560. vmr $twk0,$twk2 # unused tweak
  2561. vxor $tmp,$out1,$twk2 # last block prep for stealing
  2562. le?vperm $out1,$out1,$out1,$leperm
  2563. stvx_u $out0,$x00,$out # store output
  2564. stvx_u $out1,$x10,$out
  2565. addi $out,$out,0x20
  2566. bne Lxts_enc6x_steal
  2567. b Lxts_enc6x_done
  2568. .align 4
  2569. Lxts_enc6x_one:
  2570. vxor $out0,$in5,$twk0
  2571. nop
  2572. Loop_xts_enc1x:
  2573. vcipher $out0,$out0,v24
  2574. lvx v24,$x20,$key_ # round[3]
  2575. addi $key_,$key_,0x20
  2576. vcipher $out0,$out0,v25
  2577. lvx v25,$x10,$key_ # round[4]
  2578. bdnz Loop_xts_enc1x
  2579. add $inp,$inp,$taillen
  2580. cmpwi $taillen,0
  2581. vcipher $out0,$out0,v24
  2582. subi $inp,$inp,16
  2583. vcipher $out0,$out0,v25
  2584. lvsr $inpperm,0,$taillen
  2585. vcipher $out0,$out0,v26
  2586. lvx_u $in0,0,$inp
  2587. vcipher $out0,$out0,v27
  2588. addi $key_,$sp,$FRAME+15 # rewind $key_
  2589. vcipher $out0,$out0,v28
  2590. lvx v24,$x00,$key_ # re-pre-load round[1]
  2591. vcipher $out0,$out0,v29
  2592. lvx v25,$x10,$key_ # re-pre-load round[2]
  2593. vxor $twk0,$twk0,v31
  2594. le?vperm $in0,$in0,$in0,$leperm
  2595. vcipher $out0,$out0,v30
  2596. vperm $in0,$in0,$in0,$inpperm
  2597. vcipherlast $out0,$out0,$twk0
  2598. vmr $twk0,$twk1 # unused tweak
  2599. vxor $tmp,$out0,$twk1 # last block prep for stealing
  2600. le?vperm $out0,$out0,$out0,$leperm
  2601. stvx_u $out0,$x00,$out # store output
  2602. addi $out,$out,0x10
  2603. bne Lxts_enc6x_steal
  2604. b Lxts_enc6x_done
  2605. .align 4
  2606. Lxts_enc6x_zero:
  2607. cmpwi $taillen,0
  2608. beq Lxts_enc6x_done
  2609. add $inp,$inp,$taillen
  2610. subi $inp,$inp,16
  2611. lvx_u $in0,0,$inp
  2612. lvsr $inpperm,0,$taillen # $in5 is no more
  2613. le?vperm $in0,$in0,$in0,$leperm
  2614. vperm $in0,$in0,$in0,$inpperm
  2615. vxor $tmp,$tmp,$twk0
  2616. Lxts_enc6x_steal:
  2617. vxor $in0,$in0,$twk0
  2618. vxor $out0,$out0,$out0
  2619. vspltisb $out1,-1
  2620. vperm $out0,$out0,$out1,$inpperm
  2621. vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
  2622. subi r30,$out,17
  2623. subi $out,$out,16
  2624. mtctr $taillen
  2625. Loop_xts_enc6x_steal:
  2626. lbzu r0,1(r30)
  2627. stb r0,16(r30)
  2628. bdnz Loop_xts_enc6x_steal
  2629. li $taillen,0
  2630. mtctr $rounds
  2631. b Loop_xts_enc1x # one more time...
  2632. .align 4
  2633. Lxts_enc6x_done:
  2634. ${UCMP}i $ivp,0
  2635. beq Lxts_enc6x_ret
  2636. vxor $tweak,$twk0,$rndkey0
  2637. le?vperm $tweak,$tweak,$tweak,$leperm
  2638. stvx_u $tweak,0,$ivp
  2639. Lxts_enc6x_ret:
  2640. mtlr r11
  2641. li r10,`$FRAME+15`
  2642. li r11,`$FRAME+31`
  2643. stvx $seven,r10,$sp # wipe copies of round keys
  2644. addi r10,r10,32
  2645. stvx $seven,r11,$sp
  2646. addi r11,r11,32
  2647. stvx $seven,r10,$sp
  2648. addi r10,r10,32
  2649. stvx $seven,r11,$sp
  2650. addi r11,r11,32
  2651. stvx $seven,r10,$sp
  2652. addi r10,r10,32
  2653. stvx $seven,r11,$sp
  2654. addi r11,r11,32
  2655. stvx $seven,r10,$sp
  2656. addi r10,r10,32
  2657. stvx $seven,r11,$sp
  2658. addi r11,r11,32
  2659. mtspr 256,$vrsave
  2660. lvx v20,r10,$sp # ABI says so
  2661. addi r10,r10,32
  2662. lvx v21,r11,$sp
  2663. addi r11,r11,32
  2664. lvx v22,r10,$sp
  2665. addi r10,r10,32
  2666. lvx v23,r11,$sp
  2667. addi r11,r11,32
  2668. lvx v24,r10,$sp
  2669. addi r10,r10,32
  2670. lvx v25,r11,$sp
  2671. addi r11,r11,32
  2672. lvx v26,r10,$sp
  2673. addi r10,r10,32
  2674. lvx v27,r11,$sp
  2675. addi r11,r11,32
  2676. lvx v28,r10,$sp
  2677. addi r10,r10,32
  2678. lvx v29,r11,$sp
  2679. addi r11,r11,32
  2680. lvx v30,r10,$sp
  2681. lvx v31,r11,$sp
  2682. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2683. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2684. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2685. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2686. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2687. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2688. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  2689. blr
  2690. .long 0
  2691. .byte 0,12,0x04,1,0x80,6,6,0
  2692. .long 0
  2693. .align 5
  2694. _aesp8_xts_enc5x:
  2695. vcipher $out0,$out0,v24
  2696. vcipher $out1,$out1,v24
  2697. vcipher $out2,$out2,v24
  2698. vcipher $out3,$out3,v24
  2699. vcipher $out4,$out4,v24
  2700. lvx v24,$x20,$key_ # round[3]
  2701. addi $key_,$key_,0x20
  2702. vcipher $out0,$out0,v25
  2703. vcipher $out1,$out1,v25
  2704. vcipher $out2,$out2,v25
  2705. vcipher $out3,$out3,v25
  2706. vcipher $out4,$out4,v25
  2707. lvx v25,$x10,$key_ # round[4]
  2708. bdnz _aesp8_xts_enc5x
  2709. add $inp,$inp,$taillen
  2710. cmpwi $taillen,0
  2711. vcipher $out0,$out0,v24
  2712. vcipher $out1,$out1,v24
  2713. vcipher $out2,$out2,v24
  2714. vcipher $out3,$out3,v24
  2715. vcipher $out4,$out4,v24
  2716. subi $inp,$inp,16
  2717. vcipher $out0,$out0,v25
  2718. vcipher $out1,$out1,v25
  2719. vcipher $out2,$out2,v25
  2720. vcipher $out3,$out3,v25
  2721. vcipher $out4,$out4,v25
  2722. vxor $twk0,$twk0,v31
  2723. vcipher $out0,$out0,v26
  2724. lvsr $inpperm,0,$taillen # $in5 is no more
  2725. vcipher $out1,$out1,v26
  2726. vcipher $out2,$out2,v26
  2727. vcipher $out3,$out3,v26
  2728. vcipher $out4,$out4,v26
  2729. vxor $in1,$twk1,v31
  2730. vcipher $out0,$out0,v27
  2731. lvx_u $in0,0,$inp
  2732. vcipher $out1,$out1,v27
  2733. vcipher $out2,$out2,v27
  2734. vcipher $out3,$out3,v27
  2735. vcipher $out4,$out4,v27
  2736. vxor $in2,$twk2,v31
  2737. addi $key_,$sp,$FRAME+15 # rewind $key_
  2738. vcipher $out0,$out0,v28
  2739. vcipher $out1,$out1,v28
  2740. vcipher $out2,$out2,v28
  2741. vcipher $out3,$out3,v28
  2742. vcipher $out4,$out4,v28
  2743. lvx v24,$x00,$key_ # re-pre-load round[1]
  2744. vxor $in3,$twk3,v31
  2745. vcipher $out0,$out0,v29
  2746. le?vperm $in0,$in0,$in0,$leperm
  2747. vcipher $out1,$out1,v29
  2748. vcipher $out2,$out2,v29
  2749. vcipher $out3,$out3,v29
  2750. vcipher $out4,$out4,v29
  2751. lvx v25,$x10,$key_ # re-pre-load round[2]
  2752. vxor $in4,$twk4,v31
  2753. vcipher $out0,$out0,v30
  2754. vperm $in0,$in0,$in0,$inpperm
  2755. vcipher $out1,$out1,v30
  2756. vcipher $out2,$out2,v30
  2757. vcipher $out3,$out3,v30
  2758. vcipher $out4,$out4,v30
  2759. vcipherlast $out0,$out0,$twk0
  2760. vcipherlast $out1,$out1,$in1
  2761. vcipherlast $out2,$out2,$in2
  2762. vcipherlast $out3,$out3,$in3
  2763. vcipherlast $out4,$out4,$in4
  2764. blr
  2765. .long 0
  2766. .byte 0,12,0x14,0,0,0,0,0
  2767. .align 5
  2768. _aesp8_xts_decrypt6x:
  2769. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2770. mflr r11
  2771. li r7,`$FRAME+8*16+15`
  2772. li r3,`$FRAME+8*16+31`
  2773. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2774. stvx v20,r7,$sp # ABI says so
  2775. addi r7,r7,32
  2776. stvx v21,r3,$sp
  2777. addi r3,r3,32
  2778. stvx v22,r7,$sp
  2779. addi r7,r7,32
  2780. stvx v23,r3,$sp
  2781. addi r3,r3,32
  2782. stvx v24,r7,$sp
  2783. addi r7,r7,32
  2784. stvx v25,r3,$sp
  2785. addi r3,r3,32
  2786. stvx v26,r7,$sp
  2787. addi r7,r7,32
  2788. stvx v27,r3,$sp
  2789. addi r3,r3,32
  2790. stvx v28,r7,$sp
  2791. addi r7,r7,32
  2792. stvx v29,r3,$sp
  2793. addi r3,r3,32
  2794. stvx v30,r7,$sp
  2795. stvx v31,r3,$sp
  2796. li r0,-1
  2797. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2798. li $x10,0x10
  2799. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2800. li $x20,0x20
  2801. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2802. li $x30,0x30
  2803. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2804. li $x40,0x40
  2805. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2806. li $x50,0x50
  2807. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2808. li $x60,0x60
  2809. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2810. li $x70,0x70
  2811. mtspr 256,r0
  2812. # Reverse eighty7 to 0x010101..87
  2813. xxlor 2, 32+$eighty7, 32+$eighty7
  2814. vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
  2815. xxlor 1, 32+$eighty7, 32+$eighty7
  2816. # Load XOR contents. 0xf102132435465768798a9bacbdcedfe
  2817. mr $x70, r6
  2818. bl Lconsts
  2819. lxvw4x 0, $x40, r6 # load XOR contents
  2820. mr r6, $x70
  2821. li $x70,0x70
  2822. subi $rounds,$rounds,3 # -4 in total
  2823. lvx $rndkey0,$x00,$key1 # load key schedule
  2824. lvx v30,$x10,$key1
  2825. addi $key1,$key1,0x20
  2826. lvx v31,$x00,$key1
  2827. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2828. addi $key_,$sp,$FRAME+15
  2829. mtctr $rounds
  2830. Load_xts_dec_key:
  2831. ?vperm v24,v30,v31,$keyperm
  2832. lvx v30,$x10,$key1
  2833. addi $key1,$key1,0x20
  2834. stvx v24,$x00,$key_ # off-load round[1]
  2835. ?vperm v25,v31,v30,$keyperm
  2836. lvx v31,$x00,$key1
  2837. stvx v25,$x10,$key_ # off-load round[2]
  2838. addi $key_,$key_,0x20
  2839. bdnz Load_xts_dec_key
  2840. lvx v26,$x10,$key1
  2841. ?vperm v24,v30,v31,$keyperm
  2842. lvx v27,$x20,$key1
  2843. stvx v24,$x00,$key_ # off-load round[3]
  2844. ?vperm v25,v31,v26,$keyperm
  2845. lvx v28,$x30,$key1
  2846. stvx v25,$x10,$key_ # off-load round[4]
  2847. addi $key_,$sp,$FRAME+15 # rewind $key_
  2848. ?vperm v26,v26,v27,$keyperm
  2849. lvx v29,$x40,$key1
  2850. ?vperm v27,v27,v28,$keyperm
  2851. lvx v30,$x50,$key1
  2852. ?vperm v28,v28,v29,$keyperm
  2853. lvx v31,$x60,$key1
  2854. ?vperm v29,v29,v30,$keyperm
  2855. lvx $twk5,$x70,$key1 # borrow $twk5
  2856. ?vperm v30,v30,v31,$keyperm
  2857. lvx v24,$x00,$key_ # pre-load round[1]
  2858. ?vperm v31,v31,$twk5,$keyperm
  2859. lvx v25,$x10,$key_ # pre-load round[2]
  2860. vperm $in0,$inout,$inptail,$inpperm
  2861. subi $inp,$inp,31 # undo "caller"
  2862. vxor $twk0,$tweak,$rndkey0
  2863. vsrab $tmp,$tweak,$seven # next tweak value
  2864. vaddubm $tweak,$tweak,$tweak
  2865. vand $tmp,$tmp,$eighty7
  2866. vxor $out0,$in0,$twk0
  2867. xxlor 32+$in1, 0, 0
  2868. vpermxor $tweak, $tweak, $tmp, $in1
  2869. lvx_u $in1,$x10,$inp
  2870. vxor $twk1,$tweak,$rndkey0
  2871. vsrab $tmp,$tweak,$seven # next tweak value
  2872. vaddubm $tweak,$tweak,$tweak
  2873. le?vperm $in1,$in1,$in1,$leperm
  2874. vand $tmp,$tmp,$eighty7
  2875. vxor $out1,$in1,$twk1
  2876. xxlor 32+$in2, 0, 0
  2877. vpermxor $tweak, $tweak, $tmp, $in2
  2878. lvx_u $in2,$x20,$inp
  2879. andi. $taillen,$len,15
  2880. vxor $twk2,$tweak,$rndkey0
  2881. vsrab $tmp,$tweak,$seven # next tweak value
  2882. vaddubm $tweak,$tweak,$tweak
  2883. le?vperm $in2,$in2,$in2,$leperm
  2884. vand $tmp,$tmp,$eighty7
  2885. vxor $out2,$in2,$twk2
  2886. xxlor 32+$in3, 0, 0
  2887. vpermxor $tweak, $tweak, $tmp, $in3
  2888. lvx_u $in3,$x30,$inp
  2889. sub $len,$len,$taillen
  2890. vxor $twk3,$tweak,$rndkey0
  2891. vsrab $tmp,$tweak,$seven # next tweak value
  2892. vaddubm $tweak,$tweak,$tweak
  2893. le?vperm $in3,$in3,$in3,$leperm
  2894. vand $tmp,$tmp,$eighty7
  2895. vxor $out3,$in3,$twk3
  2896. xxlor 32+$in4, 0, 0
  2897. vpermxor $tweak, $tweak, $tmp, $in4
  2898. lvx_u $in4,$x40,$inp
  2899. subi $len,$len,0x60
  2900. vxor $twk4,$tweak,$rndkey0
  2901. vsrab $tmp,$tweak,$seven # next tweak value
  2902. vaddubm $tweak,$tweak,$tweak
  2903. le?vperm $in4,$in4,$in4,$leperm
  2904. vand $tmp,$tmp,$eighty7
  2905. vxor $out4,$in4,$twk4
  2906. xxlor 32+$in5, 0, 0
  2907. vpermxor $tweak, $tweak, $tmp, $in5
  2908. lvx_u $in5,$x50,$inp
  2909. addi $inp,$inp,0x60
  2910. vxor $twk5,$tweak,$rndkey0
  2911. vsrab $tmp,$tweak,$seven # next tweak value
  2912. vaddubm $tweak,$tweak,$tweak
  2913. le?vperm $in5,$in5,$in5,$leperm
  2914. vand $tmp,$tmp,$eighty7
  2915. vxor $out5,$in5,$twk5
  2916. xxlor 32+$in0, 0, 0
  2917. vpermxor $tweak, $tweak, $tmp, $in0
  2918. vxor v31,v31,$rndkey0
  2919. mtctr $rounds
  2920. b Loop_xts_dec6x
  2921. .align 5
  2922. Loop_xts_dec6x:
  2923. vncipher $out0,$out0,v24
  2924. vncipher $out1,$out1,v24
  2925. vncipher $out2,$out2,v24
  2926. vncipher $out3,$out3,v24
  2927. vncipher $out4,$out4,v24
  2928. vncipher $out5,$out5,v24
  2929. lvx v24,$x20,$key_ # round[3]
  2930. addi $key_,$key_,0x20
  2931. vncipher $out0,$out0,v25
  2932. vncipher $out1,$out1,v25
  2933. vncipher $out2,$out2,v25
  2934. vncipher $out3,$out3,v25
  2935. vncipher $out4,$out4,v25
  2936. vncipher $out5,$out5,v25
  2937. lvx v25,$x10,$key_ # round[4]
  2938. bdnz Loop_xts_dec6x
  2939. xxlor 32+$eighty7, 1, 1
  2940. subic $len,$len,96 # $len-=96
  2941. vxor $in0,$twk0,v31 # xor with last round key
  2942. vncipher $out0,$out0,v24
  2943. vncipher $out1,$out1,v24
  2944. vsrab $tmp,$tweak,$seven # next tweak value
  2945. vxor $twk0,$tweak,$rndkey0
  2946. vaddubm $tweak,$tweak,$tweak
  2947. vncipher $out2,$out2,v24
  2948. vncipher $out3,$out3,v24
  2949. vncipher $out4,$out4,v24
  2950. vncipher $out5,$out5,v24
  2951. subfe. r0,r0,r0 # borrow?-1:0
  2952. vand $tmp,$tmp,$eighty7
  2953. vncipher $out0,$out0,v25
  2954. vncipher $out1,$out1,v25
  2955. xxlor 32+$in1, 0, 0
  2956. vpermxor $tweak, $tweak, $tmp, $in1
  2957. vncipher $out2,$out2,v25
  2958. vncipher $out3,$out3,v25
  2959. vxor $in1,$twk1,v31
  2960. vsrab $tmp,$tweak,$seven # next tweak value
  2961. vxor $twk1,$tweak,$rndkey0
  2962. vncipher $out4,$out4,v25
  2963. vncipher $out5,$out5,v25
  2964. and r0,r0,$len
  2965. vaddubm $tweak,$tweak,$tweak
  2966. vncipher $out0,$out0,v26
  2967. vncipher $out1,$out1,v26
  2968. vand $tmp,$tmp,$eighty7
  2969. vncipher $out2,$out2,v26
  2970. vncipher $out3,$out3,v26
  2971. xxlor 32+$in2, 0, 0
  2972. vpermxor $tweak, $tweak, $tmp, $in2
  2973. vncipher $out4,$out4,v26
  2974. vncipher $out5,$out5,v26
  2975. add $inp,$inp,r0 # $inp is adjusted in such
  2976. # way that at exit from the
  2977. # loop inX-in5 are loaded
  2978. # with last "words"
  2979. vxor $in2,$twk2,v31
  2980. vsrab $tmp,$tweak,$seven # next tweak value
  2981. vxor $twk2,$tweak,$rndkey0
  2982. vaddubm $tweak,$tweak,$tweak
  2983. vncipher $out0,$out0,v27
  2984. vncipher $out1,$out1,v27
  2985. vncipher $out2,$out2,v27
  2986. vncipher $out3,$out3,v27
  2987. vand $tmp,$tmp,$eighty7
  2988. vncipher $out4,$out4,v27
  2989. vncipher $out5,$out5,v27
  2990. addi $key_,$sp,$FRAME+15 # rewind $key_
  2991. xxlor 32+$in3, 0, 0
  2992. vpermxor $tweak, $tweak, $tmp, $in3
  2993. vncipher $out0,$out0,v28
  2994. vncipher $out1,$out1,v28
  2995. vxor $in3,$twk3,v31
  2996. vsrab $tmp,$tweak,$seven # next tweak value
  2997. vxor $twk3,$tweak,$rndkey0
  2998. vncipher $out2,$out2,v28
  2999. vncipher $out3,$out3,v28
  3000. vaddubm $tweak,$tweak,$tweak
  3001. vncipher $out4,$out4,v28
  3002. vncipher $out5,$out5,v28
  3003. lvx v24,$x00,$key_ # re-pre-load round[1]
  3004. vand $tmp,$tmp,$eighty7
  3005. vncipher $out0,$out0,v29
  3006. vncipher $out1,$out1,v29
  3007. xxlor 32+$in4, 0, 0
  3008. vpermxor $tweak, $tweak, $tmp, $in4
  3009. vncipher $out2,$out2,v29
  3010. vncipher $out3,$out3,v29
  3011. vxor $in4,$twk4,v31
  3012. vsrab $tmp,$tweak,$seven # next tweak value
  3013. vxor $twk4,$tweak,$rndkey0
  3014. vncipher $out4,$out4,v29
  3015. vncipher $out5,$out5,v29
  3016. lvx v25,$x10,$key_ # re-pre-load round[2]
  3017. vaddubm $tweak,$tweak,$tweak
  3018. vncipher $out0,$out0,v30
  3019. vncipher $out1,$out1,v30
  3020. vand $tmp,$tmp,$eighty7
  3021. vncipher $out2,$out2,v30
  3022. vncipher $out3,$out3,v30
  3023. xxlor 32+$in5, 0, 0
  3024. vpermxor $tweak, $tweak, $tmp, $in5
  3025. vncipher $out4,$out4,v30
  3026. vncipher $out5,$out5,v30
  3027. vxor $in5,$twk5,v31
  3028. vsrab $tmp,$tweak,$seven # next tweak value
  3029. vxor $twk5,$tweak,$rndkey0
  3030. vncipherlast $out0,$out0,$in0
  3031. lvx_u $in0,$x00,$inp # load next input block
  3032. vaddubm $tweak,$tweak,$tweak
  3033. vncipherlast $out1,$out1,$in1
  3034. lvx_u $in1,$x10,$inp
  3035. vncipherlast $out2,$out2,$in2
  3036. le?vperm $in0,$in0,$in0,$leperm
  3037. lvx_u $in2,$x20,$inp
  3038. vand $tmp,$tmp,$eighty7
  3039. vncipherlast $out3,$out3,$in3
  3040. le?vperm $in1,$in1,$in1,$leperm
  3041. lvx_u $in3,$x30,$inp
  3042. vncipherlast $out4,$out4,$in4
  3043. le?vperm $in2,$in2,$in2,$leperm
  3044. lvx_u $in4,$x40,$inp
  3045. xxlor 10, 32+$in0, 32+$in0
  3046. xxlor 32+$in0, 0, 0
  3047. vpermxor $tweak, $tweak, $tmp, $in0
  3048. xxlor 32+$in0, 10, 10
  3049. vncipherlast $out5,$out5,$in5
  3050. le?vperm $in3,$in3,$in3,$leperm
  3051. lvx_u $in5,$x50,$inp
  3052. addi $inp,$inp,0x60
  3053. le?vperm $in4,$in4,$in4,$leperm
  3054. le?vperm $in5,$in5,$in5,$leperm
  3055. le?vperm $out0,$out0,$out0,$leperm
  3056. le?vperm $out1,$out1,$out1,$leperm
  3057. stvx_u $out0,$x00,$out # store output
  3058. vxor $out0,$in0,$twk0
  3059. le?vperm $out2,$out2,$out2,$leperm
  3060. stvx_u $out1,$x10,$out
  3061. vxor $out1,$in1,$twk1
  3062. le?vperm $out3,$out3,$out3,$leperm
  3063. stvx_u $out2,$x20,$out
  3064. vxor $out2,$in2,$twk2
  3065. le?vperm $out4,$out4,$out4,$leperm
  3066. stvx_u $out3,$x30,$out
  3067. vxor $out3,$in3,$twk3
  3068. le?vperm $out5,$out5,$out5,$leperm
  3069. stvx_u $out4,$x40,$out
  3070. vxor $out4,$in4,$twk4
  3071. stvx_u $out5,$x50,$out
  3072. vxor $out5,$in5,$twk5
  3073. addi $out,$out,0x60
  3074. mtctr $rounds
  3075. beq Loop_xts_dec6x # did $len-=96 borrow?
  3076. xxlor 32+$eighty7, 2, 2
  3077. addic. $len,$len,0x60
  3078. beq Lxts_dec6x_zero
  3079. cmpwi $len,0x20
  3080. blt Lxts_dec6x_one
  3081. nop
  3082. beq Lxts_dec6x_two
  3083. cmpwi $len,0x40
  3084. blt Lxts_dec6x_three
  3085. nop
  3086. beq Lxts_dec6x_four
  3087. Lxts_dec6x_five:
  3088. vxor $out0,$in1,$twk0
  3089. vxor $out1,$in2,$twk1
  3090. vxor $out2,$in3,$twk2
  3091. vxor $out3,$in4,$twk3
  3092. vxor $out4,$in5,$twk4
  3093. bl _aesp8_xts_dec5x
  3094. le?vperm $out0,$out0,$out0,$leperm
  3095. vmr $twk0,$twk5 # unused tweak
  3096. vxor $twk1,$tweak,$rndkey0
  3097. le?vperm $out1,$out1,$out1,$leperm
  3098. stvx_u $out0,$x00,$out # store output
  3099. vxor $out0,$in0,$twk1
  3100. le?vperm $out2,$out2,$out2,$leperm
  3101. stvx_u $out1,$x10,$out
  3102. le?vperm $out3,$out3,$out3,$leperm
  3103. stvx_u $out2,$x20,$out
  3104. le?vperm $out4,$out4,$out4,$leperm
  3105. stvx_u $out3,$x30,$out
  3106. stvx_u $out4,$x40,$out
  3107. addi $out,$out,0x50
  3108. bne Lxts_dec6x_steal
  3109. b Lxts_dec6x_done
  3110. .align 4
  3111. Lxts_dec6x_four:
  3112. vxor $out0,$in2,$twk0
  3113. vxor $out1,$in3,$twk1
  3114. vxor $out2,$in4,$twk2
  3115. vxor $out3,$in5,$twk3
  3116. vxor $out4,$out4,$out4
  3117. bl _aesp8_xts_dec5x
  3118. le?vperm $out0,$out0,$out0,$leperm
  3119. vmr $twk0,$twk4 # unused tweak
  3120. vmr $twk1,$twk5
  3121. le?vperm $out1,$out1,$out1,$leperm
  3122. stvx_u $out0,$x00,$out # store output
  3123. vxor $out0,$in0,$twk5
  3124. le?vperm $out2,$out2,$out2,$leperm
  3125. stvx_u $out1,$x10,$out
  3126. le?vperm $out3,$out3,$out3,$leperm
  3127. stvx_u $out2,$x20,$out
  3128. stvx_u $out3,$x30,$out
  3129. addi $out,$out,0x40
  3130. bne Lxts_dec6x_steal
  3131. b Lxts_dec6x_done
  3132. .align 4
  3133. Lxts_dec6x_three:
  3134. vxor $out0,$in3,$twk0
  3135. vxor $out1,$in4,$twk1
  3136. vxor $out2,$in5,$twk2
  3137. vxor $out3,$out3,$out3
  3138. vxor $out4,$out4,$out4
  3139. bl _aesp8_xts_dec5x
  3140. le?vperm $out0,$out0,$out0,$leperm
  3141. vmr $twk0,$twk3 # unused tweak
  3142. vmr $twk1,$twk4
  3143. le?vperm $out1,$out1,$out1,$leperm
  3144. stvx_u $out0,$x00,$out # store output
  3145. vxor $out0,$in0,$twk4
  3146. le?vperm $out2,$out2,$out2,$leperm
  3147. stvx_u $out1,$x10,$out
  3148. stvx_u $out2,$x20,$out
  3149. addi $out,$out,0x30
  3150. bne Lxts_dec6x_steal
  3151. b Lxts_dec6x_done
  3152. .align 4
  3153. Lxts_dec6x_two:
  3154. vxor $out0,$in4,$twk0
  3155. vxor $out1,$in5,$twk1
  3156. vxor $out2,$out2,$out2
  3157. vxor $out3,$out3,$out3
  3158. vxor $out4,$out4,$out4
  3159. bl _aesp8_xts_dec5x
  3160. le?vperm $out0,$out0,$out0,$leperm
  3161. vmr $twk0,$twk2 # unused tweak
  3162. vmr $twk1,$twk3
  3163. le?vperm $out1,$out1,$out1,$leperm
  3164. stvx_u $out0,$x00,$out # store output
  3165. vxor $out0,$in0,$twk3
  3166. stvx_u $out1,$x10,$out
  3167. addi $out,$out,0x20
  3168. bne Lxts_dec6x_steal
  3169. b Lxts_dec6x_done
  3170. .align 4
  3171. Lxts_dec6x_one:
  3172. vxor $out0,$in5,$twk0
  3173. nop
  3174. Loop_xts_dec1x:
  3175. vncipher $out0,$out0,v24
  3176. lvx v24,$x20,$key_ # round[3]
  3177. addi $key_,$key_,0x20
  3178. vncipher $out0,$out0,v25
  3179. lvx v25,$x10,$key_ # round[4]
  3180. bdnz Loop_xts_dec1x
  3181. subi r0,$taillen,1
  3182. vncipher $out0,$out0,v24
  3183. andi. r0,r0,16
  3184. cmpwi $taillen,0
  3185. vncipher $out0,$out0,v25
  3186. sub $inp,$inp,r0
  3187. vncipher $out0,$out0,v26
  3188. lvx_u $in0,0,$inp
  3189. vncipher $out0,$out0,v27
  3190. addi $key_,$sp,$FRAME+15 # rewind $key_
  3191. vncipher $out0,$out0,v28
  3192. lvx v24,$x00,$key_ # re-pre-load round[1]
  3193. vncipher $out0,$out0,v29
  3194. lvx v25,$x10,$key_ # re-pre-load round[2]
  3195. vxor $twk0,$twk0,v31
  3196. le?vperm $in0,$in0,$in0,$leperm
  3197. vncipher $out0,$out0,v30
  3198. mtctr $rounds
  3199. vncipherlast $out0,$out0,$twk0
  3200. vmr $twk0,$twk1 # unused tweak
  3201. vmr $twk1,$twk2
  3202. le?vperm $out0,$out0,$out0,$leperm
  3203. stvx_u $out0,$x00,$out # store output
  3204. addi $out,$out,0x10
  3205. vxor $out0,$in0,$twk2
  3206. bne Lxts_dec6x_steal
  3207. b Lxts_dec6x_done
  3208. .align 4
  3209. Lxts_dec6x_zero:
  3210. cmpwi $taillen,0
  3211. beq Lxts_dec6x_done
  3212. lvx_u $in0,0,$inp
  3213. le?vperm $in0,$in0,$in0,$leperm
  3214. vxor $out0,$in0,$twk1
  3215. Lxts_dec6x_steal:
  3216. vncipher $out0,$out0,v24
  3217. lvx v24,$x20,$key_ # round[3]
  3218. addi $key_,$key_,0x20
  3219. vncipher $out0,$out0,v25
  3220. lvx v25,$x10,$key_ # round[4]
  3221. bdnz Lxts_dec6x_steal
  3222. add $inp,$inp,$taillen
  3223. vncipher $out0,$out0,v24
  3224. cmpwi $taillen,0
  3225. vncipher $out0,$out0,v25
  3226. lvx_u $in0,0,$inp
  3227. vncipher $out0,$out0,v26
  3228. lvsr $inpperm,0,$taillen # $in5 is no more
  3229. vncipher $out0,$out0,v27
  3230. addi $key_,$sp,$FRAME+15 # rewind $key_
  3231. vncipher $out0,$out0,v28
  3232. lvx v24,$x00,$key_ # re-pre-load round[1]
  3233. vncipher $out0,$out0,v29
  3234. lvx v25,$x10,$key_ # re-pre-load round[2]
  3235. vxor $twk1,$twk1,v31
  3236. le?vperm $in0,$in0,$in0,$leperm
  3237. vncipher $out0,$out0,v30
  3238. vperm $in0,$in0,$in0,$inpperm
  3239. vncipherlast $tmp,$out0,$twk1
  3240. le?vperm $out0,$tmp,$tmp,$leperm
  3241. le?stvx_u $out0,0,$out
  3242. be?stvx_u $tmp,0,$out
  3243. vxor $out0,$out0,$out0
  3244. vspltisb $out1,-1
  3245. vperm $out0,$out0,$out1,$inpperm
  3246. vsel $out0,$in0,$tmp,$out0
  3247. vxor $out0,$out0,$twk0
  3248. subi r30,$out,1
  3249. mtctr $taillen
  3250. Loop_xts_dec6x_steal:
  3251. lbzu r0,1(r30)
  3252. stb r0,16(r30)
  3253. bdnz Loop_xts_dec6x_steal
  3254. li $taillen,0
  3255. mtctr $rounds
  3256. b Loop_xts_dec1x # one more time...
  3257. .align 4
  3258. Lxts_dec6x_done:
  3259. ${UCMP}i $ivp,0
  3260. beq Lxts_dec6x_ret
  3261. vxor $tweak,$twk0,$rndkey0
  3262. le?vperm $tweak,$tweak,$tweak,$leperm
  3263. stvx_u $tweak,0,$ivp
  3264. Lxts_dec6x_ret:
  3265. mtlr r11
  3266. li r10,`$FRAME+15`
  3267. li r11,`$FRAME+31`
  3268. stvx $seven,r10,$sp # wipe copies of round keys
  3269. addi r10,r10,32
  3270. stvx $seven,r11,$sp
  3271. addi r11,r11,32
  3272. stvx $seven,r10,$sp
  3273. addi r10,r10,32
  3274. stvx $seven,r11,$sp
  3275. addi r11,r11,32
  3276. stvx $seven,r10,$sp
  3277. addi r10,r10,32
  3278. stvx $seven,r11,$sp
  3279. addi r11,r11,32
  3280. stvx $seven,r10,$sp
  3281. addi r10,r10,32
  3282. stvx $seven,r11,$sp
  3283. addi r11,r11,32
  3284. mtspr 256,$vrsave
  3285. lvx v20,r10,$sp # ABI says so
  3286. addi r10,r10,32
  3287. lvx v21,r11,$sp
  3288. addi r11,r11,32
  3289. lvx v22,r10,$sp
  3290. addi r10,r10,32
  3291. lvx v23,r11,$sp
  3292. addi r11,r11,32
  3293. lvx v24,r10,$sp
  3294. addi r10,r10,32
  3295. lvx v25,r11,$sp
  3296. addi r11,r11,32
  3297. lvx v26,r10,$sp
  3298. addi r10,r10,32
  3299. lvx v27,r11,$sp
  3300. addi r11,r11,32
  3301. lvx v28,r10,$sp
  3302. addi r10,r10,32
  3303. lvx v29,r11,$sp
  3304. addi r11,r11,32
  3305. lvx v30,r10,$sp
  3306. lvx v31,r11,$sp
  3307. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  3308. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  3309. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  3310. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  3311. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  3312. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  3313. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  3314. blr
  3315. .long 0
  3316. .byte 0,12,0x04,1,0x80,6,6,0
  3317. .long 0
  3318. .align 5
  3319. _aesp8_xts_dec5x:
  3320. vncipher $out0,$out0,v24
  3321. vncipher $out1,$out1,v24
  3322. vncipher $out2,$out2,v24
  3323. vncipher $out3,$out3,v24
  3324. vncipher $out4,$out4,v24
  3325. lvx v24,$x20,$key_ # round[3]
  3326. addi $key_,$key_,0x20
  3327. vncipher $out0,$out0,v25
  3328. vncipher $out1,$out1,v25
  3329. vncipher $out2,$out2,v25
  3330. vncipher $out3,$out3,v25
  3331. vncipher $out4,$out4,v25
  3332. lvx v25,$x10,$key_ # round[4]
  3333. bdnz _aesp8_xts_dec5x
  3334. subi r0,$taillen,1
  3335. vncipher $out0,$out0,v24
  3336. vncipher $out1,$out1,v24
  3337. vncipher $out2,$out2,v24
  3338. vncipher $out3,$out3,v24
  3339. vncipher $out4,$out4,v24
  3340. andi. r0,r0,16
  3341. cmpwi $taillen,0
  3342. vncipher $out0,$out0,v25
  3343. vncipher $out1,$out1,v25
  3344. vncipher $out2,$out2,v25
  3345. vncipher $out3,$out3,v25
  3346. vncipher $out4,$out4,v25
  3347. vxor $twk0,$twk0,v31
  3348. sub $inp,$inp,r0
  3349. vncipher $out0,$out0,v26
  3350. vncipher $out1,$out1,v26
  3351. vncipher $out2,$out2,v26
  3352. vncipher $out3,$out3,v26
  3353. vncipher $out4,$out4,v26
  3354. vxor $in1,$twk1,v31
  3355. vncipher $out0,$out0,v27
  3356. lvx_u $in0,0,$inp
  3357. vncipher $out1,$out1,v27
  3358. vncipher $out2,$out2,v27
  3359. vncipher $out3,$out3,v27
  3360. vncipher $out4,$out4,v27
  3361. vxor $in2,$twk2,v31
  3362. addi $key_,$sp,$FRAME+15 # rewind $key_
  3363. vncipher $out0,$out0,v28
  3364. vncipher $out1,$out1,v28
  3365. vncipher $out2,$out2,v28
  3366. vncipher $out3,$out3,v28
  3367. vncipher $out4,$out4,v28
  3368. lvx v24,$x00,$key_ # re-pre-load round[1]
  3369. vxor $in3,$twk3,v31
  3370. vncipher $out0,$out0,v29
  3371. le?vperm $in0,$in0,$in0,$leperm
  3372. vncipher $out1,$out1,v29
  3373. vncipher $out2,$out2,v29
  3374. vncipher $out3,$out3,v29
  3375. vncipher $out4,$out4,v29
  3376. lvx v25,$x10,$key_ # re-pre-load round[2]
  3377. vxor $in4,$twk4,v31
  3378. vncipher $out0,$out0,v30
  3379. vncipher $out1,$out1,v30
  3380. vncipher $out2,$out2,v30
  3381. vncipher $out3,$out3,v30
  3382. vncipher $out4,$out4,v30
  3383. vncipherlast $out0,$out0,$twk0
  3384. vncipherlast $out1,$out1,$in1
  3385. vncipherlast $out2,$out2,$in2
  3386. vncipherlast $out3,$out3,$in3
  3387. vncipherlast $out4,$out4,$in4
  3388. mtctr $rounds
  3389. blr
  3390. .long 0
  3391. .byte 0,12,0x14,0,0,0,0,0
  3392. ___
  3393. }} }}}
  3394. my $consts=1;
  3395. foreach(split("\n",$code)) {
  3396. s/\`([^\`]*)\`/eval($1)/geo;
  3397. # constants table endian-specific conversion
  3398. if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
  3399. my $conv=$3;
  3400. my @bytes=();
  3401. # convert to endian-agnostic format
  3402. if ($1 eq "long") {
  3403. foreach (split(/,\s*/,$2)) {
  3404. my $l = /^0/?oct:int;
  3405. push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
  3406. }
  3407. } else {
  3408. @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
  3409. }
  3410. # little-endian conversion
  3411. if ($flavour =~ /le$/o) {
  3412. SWITCH: for($conv) {
  3413. /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
  3414. /\?rev/ && do { @bytes=reverse(@bytes); last; };
  3415. }
  3416. }
  3417. #emit
  3418. print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
  3419. next;
  3420. }
  3421. $consts=0 if (m/Lconsts:/o); # end of table
  3422. # instructions prefixed with '?' are endian-specific and need
  3423. # to be adjusted accordingly...
  3424. if ($flavour =~ /le$/o) { # little-endian
  3425. s/le\?//o or
  3426. s/be\?/#be#/o or
  3427. s/\?lvsr/lvsl/o or
  3428. s/\?lvsl/lvsr/o or
  3429. s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
  3430. s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
  3431. s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
  3432. } else { # big-endian
  3433. s/le\?/#le#/o or
  3434. s/be\?//o or
  3435. s/\?([a-z]+)/$1/o;
  3436. }
  3437. print $_,"\n";
  3438. }
  3439. close STDOUT or die "error closing STDOUT: $!";