2
0

aes-ppc.pl 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463
  1. #! /usr/bin/env perl
  2. # Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  10. # project. The module is, however, dual licensed under OpenSSL and
  11. # CRYPTOGAMS licenses depending on where you obtain it. For further
  12. # details see http://www.openssl.org/~appro/cryptogams/.
  13. # ====================================================================
  14. # Needs more work: key setup, CBC routine...
  15. #
  16. # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
  17. # 128-bit key, which is ~40% better than 64-bit code generated by gcc
  18. # 4.0. But these are not the ones currently used! Their "compact"
  19. # counterparts are, for security reason. ppc_AES_encrypt_compact runs
  20. # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
  21. # at 1/3 of ppc_AES_decrypt.
  22. # February 2010
  23. #
  24. # Rescheduling instructions to favour Power6 pipeline gave 10%
  25. # performance improvement on the platform in question (and marginal
  26. # improvement even on others). It should be noted that Power6 fails
  27. # to process byte in 18 cycles, only in 23, because it fails to issue
  28. # 4 load instructions in two cycles, only in 3. As result non-compact
  29. # block subroutines are 25% slower than one would expect. Compact
  30. # functions scale better, because they have pure computational part,
  31. # which scales perfectly with clock frequency. To be specific
  32. # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
  33. # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
  34. # $output is the last argument if it looks like a file (it has an extension)
  35. # $flavour is the first argument if it doesn't look like a file
  36. $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  37. $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  38. if ($flavour =~ /64/) {
  39. $SIZE_T =8;
  40. $LRSAVE =2*$SIZE_T;
  41. $STU ="stdu";
  42. $POP ="ld";
  43. $PUSH ="std";
  44. } elsif ($flavour =~ /32/) {
  45. $SIZE_T =4;
  46. $LRSAVE =$SIZE_T;
  47. $STU ="stwu";
  48. $POP ="lwz";
  49. $PUSH ="stw";
  50. } else { die "nonsense $flavour"; }
  51. $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
  52. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  53. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  54. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  55. die "can't locate ppc-xlate.pl";
  56. open STDOUT,"| $^X $xlate $flavour \"$output\""
  57. or die "can't call $xlate: $!";
  58. $FRAME=32*$SIZE_T;
  59. sub _data_word()
  60. { my $i;
  61. while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
  62. }
  63. $sp="r1";
  64. $toc="r2";
  65. $inp="r3";
  66. $out="r4";
  67. $key="r5";
  68. $Tbl0="r3";
  69. $Tbl1="r6";
  70. $Tbl2="r7";
  71. $Tbl3=$out; # stay away from "r2"; $out is offloaded to stack
  72. $s0="r8";
  73. $s1="r9";
  74. $s2="r10";
  75. $s3="r11";
  76. $t0="r12";
  77. $t1="r0"; # stay away from "r13";
  78. $t2="r14";
  79. $t3="r15";
  80. $acc00="r16";
  81. $acc01="r17";
  82. $acc02="r18";
  83. $acc03="r19";
  84. $acc04="r20";
  85. $acc05="r21";
  86. $acc06="r22";
  87. $acc07="r23";
  88. $acc08="r24";
  89. $acc09="r25";
  90. $acc10="r26";
  91. $acc11="r27";
  92. $acc12="r28";
  93. $acc13="r29";
  94. $acc14="r30";
  95. $acc15="r31";
  96. $mask80=$Tbl2;
  97. $mask1b=$Tbl3;
  98. $code.=<<___;
  99. .machine "any"
  100. .text
  101. .align 7
  102. LAES_Te:
  103. mflr r0
  104. bcl 20,31,\$+4
  105. mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
  106. addi $Tbl0,$Tbl0,`128-8`
  107. mtlr r0
  108. blr
  109. .long 0
  110. .byte 0,12,0x14,0,0,0,0,0
  111. .space `64-9*4`
  112. LAES_Td:
  113. mflr r0
  114. bcl 20,31,\$+4
  115. mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
  116. addi $Tbl0,$Tbl0,`128-64-8+2048+256`
  117. mtlr r0
  118. blr
  119. .long 0
  120. .byte 0,12,0x14,0,0,0,0,0
  121. .space `128-64-9*4`
  122. ___
  123. &_data_word(
  124. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  125. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  126. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  127. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  128. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  129. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  130. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  131. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  132. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  133. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  134. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  135. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  136. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  137. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  138. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  139. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  140. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  141. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  142. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  143. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  144. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  145. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  146. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  147. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  148. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  149. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  150. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  151. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  152. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  153. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  154. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  155. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  156. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  157. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  158. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  159. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  160. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  161. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  162. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  163. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  164. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  165. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  166. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  167. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  168. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  169. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  170. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  171. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  172. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  173. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  174. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  175. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  176. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  177. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  178. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  179. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  180. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  181. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  182. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  183. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  184. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  185. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  186. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  187. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  188. $code.=<<___;
  189. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  190. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  191. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  192. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  193. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  194. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  195. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  196. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  197. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  198. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  199. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  200. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  201. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  202. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  203. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  204. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  205. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  206. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  207. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  208. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  209. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  210. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  211. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  212. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  213. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  214. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  215. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  216. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  217. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  218. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  219. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  220. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  221. ___
  222. &_data_word(
  223. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  224. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  225. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  226. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  227. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  228. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  229. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  230. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  231. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  232. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  233. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  234. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  235. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  236. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  237. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  238. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  239. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  240. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  241. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  242. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  243. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  244. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  245. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  246. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  247. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  248. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  249. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  250. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  251. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  252. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  253. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  254. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  255. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  256. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  257. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  258. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  259. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  260. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  261. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  262. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  263. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  264. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  265. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  266. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  267. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  268. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  269. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  270. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  271. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  272. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  273. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  274. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  275. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  276. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  277. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  278. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  279. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  280. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  281. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  282. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  283. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  284. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  285. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  286. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  287. $code.=<<___;
  288. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  289. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  290. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  291. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  292. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  293. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  294. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  295. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  296. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  297. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  298. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  299. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  300. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  301. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  302. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  303. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  304. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  305. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  306. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  307. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  308. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  309. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  310. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  311. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  312. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  313. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  314. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  315. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  316. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  317. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  318. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  319. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  320. .globl .AES_encrypt
  321. .align 7
  322. .AES_encrypt:
  323. $STU $sp,-$FRAME($sp)
  324. mflr r0
  325. $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
  326. $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
  327. $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
  328. $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
  329. $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
  330. $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
  331. $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
  332. $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
  333. $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
  334. $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
  335. $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
  336. $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
  337. $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
  338. $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
  339. $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
  340. $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
  341. $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
  342. $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
  343. $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
  344. $PUSH r0,`$FRAME+$LRSAVE`($sp)
  345. andi. $t0,$inp,3
  346. andi. $t1,$out,3
  347. or. $t0,$t0,$t1
  348. bne Lenc_unaligned
  349. Lenc_unaligned_ok:
  350. ___
  351. $code.=<<___ if (!$LITTLE_ENDIAN);
  352. lwz $s0,0($inp)
  353. lwz $s1,4($inp)
  354. lwz $s2,8($inp)
  355. lwz $s3,12($inp)
  356. ___
  357. $code.=<<___ if ($LITTLE_ENDIAN);
  358. lwz $t0,0($inp)
  359. lwz $t1,4($inp)
  360. lwz $t2,8($inp)
  361. lwz $t3,12($inp)
  362. rotlwi $s0,$t0,8
  363. rotlwi $s1,$t1,8
  364. rotlwi $s2,$t2,8
  365. rotlwi $s3,$t3,8
  366. rlwimi $s0,$t0,24,0,7
  367. rlwimi $s1,$t1,24,0,7
  368. rlwimi $s2,$t2,24,0,7
  369. rlwimi $s3,$t3,24,0,7
  370. rlwimi $s0,$t0,24,16,23
  371. rlwimi $s1,$t1,24,16,23
  372. rlwimi $s2,$t2,24,16,23
  373. rlwimi $s3,$t3,24,16,23
  374. ___
  375. $code.=<<___;
  376. bl LAES_Te
  377. bl Lppc_AES_encrypt_compact
  378. $POP $out,`$FRAME-$SIZE_T*19`($sp)
  379. ___
  380. $code.=<<___ if ($LITTLE_ENDIAN);
  381. rotlwi $t0,$s0,8
  382. rotlwi $t1,$s1,8
  383. rotlwi $t2,$s2,8
  384. rotlwi $t3,$s3,8
  385. rlwimi $t0,$s0,24,0,7
  386. rlwimi $t1,$s1,24,0,7
  387. rlwimi $t2,$s2,24,0,7
  388. rlwimi $t3,$s3,24,0,7
  389. rlwimi $t0,$s0,24,16,23
  390. rlwimi $t1,$s1,24,16,23
  391. rlwimi $t2,$s2,24,16,23
  392. rlwimi $t3,$s3,24,16,23
  393. stw $t0,0($out)
  394. stw $t1,4($out)
  395. stw $t2,8($out)
  396. stw $t3,12($out)
  397. ___
  398. $code.=<<___ if (!$LITTLE_ENDIAN);
  399. stw $s0,0($out)
  400. stw $s1,4($out)
  401. stw $s2,8($out)
  402. stw $s3,12($out)
  403. ___
  404. $code.=<<___;
  405. b Lenc_done
  406. Lenc_unaligned:
  407. subfic $t0,$inp,4096
  408. subfic $t1,$out,4096
  409. andi. $t0,$t0,4096-16
  410. beq Lenc_xpage
  411. andi. $t1,$t1,4096-16
  412. bne Lenc_unaligned_ok
  413. Lenc_xpage:
  414. lbz $acc00,0($inp)
  415. lbz $acc01,1($inp)
  416. lbz $acc02,2($inp)
  417. lbz $s0,3($inp)
  418. lbz $acc04,4($inp)
  419. lbz $acc05,5($inp)
  420. lbz $acc06,6($inp)
  421. lbz $s1,7($inp)
  422. lbz $acc08,8($inp)
  423. lbz $acc09,9($inp)
  424. lbz $acc10,10($inp)
  425. insrwi $s0,$acc00,8,0
  426. lbz $s2,11($inp)
  427. insrwi $s1,$acc04,8,0
  428. lbz $acc12,12($inp)
  429. insrwi $s0,$acc01,8,8
  430. lbz $acc13,13($inp)
  431. insrwi $s1,$acc05,8,8
  432. lbz $acc14,14($inp)
  433. insrwi $s0,$acc02,8,16
  434. lbz $s3,15($inp)
  435. insrwi $s1,$acc06,8,16
  436. insrwi $s2,$acc08,8,0
  437. insrwi $s3,$acc12,8,0
  438. insrwi $s2,$acc09,8,8
  439. insrwi $s3,$acc13,8,8
  440. insrwi $s2,$acc10,8,16
  441. insrwi $s3,$acc14,8,16
  442. bl LAES_Te
  443. bl Lppc_AES_encrypt_compact
  444. $POP $out,`$FRAME-$SIZE_T*19`($sp)
  445. extrwi $acc00,$s0,8,0
  446. extrwi $acc01,$s0,8,8
  447. stb $acc00,0($out)
  448. extrwi $acc02,$s0,8,16
  449. stb $acc01,1($out)
  450. stb $acc02,2($out)
  451. extrwi $acc04,$s1,8,0
  452. stb $s0,3($out)
  453. extrwi $acc05,$s1,8,8
  454. stb $acc04,4($out)
  455. extrwi $acc06,$s1,8,16
  456. stb $acc05,5($out)
  457. stb $acc06,6($out)
  458. extrwi $acc08,$s2,8,0
  459. stb $s1,7($out)
  460. extrwi $acc09,$s2,8,8
  461. stb $acc08,8($out)
  462. extrwi $acc10,$s2,8,16
  463. stb $acc09,9($out)
  464. stb $acc10,10($out)
  465. extrwi $acc12,$s3,8,0
  466. stb $s2,11($out)
  467. extrwi $acc13,$s3,8,8
  468. stb $acc12,12($out)
  469. extrwi $acc14,$s3,8,16
  470. stb $acc13,13($out)
  471. stb $acc14,14($out)
  472. stb $s3,15($out)
  473. Lenc_done:
  474. $POP r0,`$FRAME+$LRSAVE`($sp)
  475. $POP r14,`$FRAME-$SIZE_T*18`($sp)
  476. $POP r15,`$FRAME-$SIZE_T*17`($sp)
  477. $POP r16,`$FRAME-$SIZE_T*16`($sp)
  478. $POP r17,`$FRAME-$SIZE_T*15`($sp)
  479. $POP r18,`$FRAME-$SIZE_T*14`($sp)
  480. $POP r19,`$FRAME-$SIZE_T*13`($sp)
  481. $POP r20,`$FRAME-$SIZE_T*12`($sp)
  482. $POP r21,`$FRAME-$SIZE_T*11`($sp)
  483. $POP r22,`$FRAME-$SIZE_T*10`($sp)
  484. $POP r23,`$FRAME-$SIZE_T*9`($sp)
  485. $POP r24,`$FRAME-$SIZE_T*8`($sp)
  486. $POP r25,`$FRAME-$SIZE_T*7`($sp)
  487. $POP r26,`$FRAME-$SIZE_T*6`($sp)
  488. $POP r27,`$FRAME-$SIZE_T*5`($sp)
  489. $POP r28,`$FRAME-$SIZE_T*4`($sp)
  490. $POP r29,`$FRAME-$SIZE_T*3`($sp)
  491. $POP r30,`$FRAME-$SIZE_T*2`($sp)
  492. $POP r31,`$FRAME-$SIZE_T*1`($sp)
  493. mtlr r0
  494. addi $sp,$sp,$FRAME
  495. blr
  496. .long 0
  497. .byte 0,12,4,1,0x80,18,3,0
  498. .long 0
  499. .align 5
  500. Lppc_AES_encrypt:
  501. lwz $acc00,240($key)
  502. addi $Tbl1,$Tbl0,3
  503. lwz $t0,0($key)
  504. addi $Tbl2,$Tbl0,2
  505. lwz $t1,4($key)
  506. addi $Tbl3,$Tbl0,1
  507. lwz $t2,8($key)
  508. addi $acc00,$acc00,-1
  509. lwz $t3,12($key)
  510. addi $key,$key,16
  511. xor $s0,$s0,$t0
  512. xor $s1,$s1,$t1
  513. xor $s2,$s2,$t2
  514. xor $s3,$s3,$t3
  515. mtctr $acc00
  516. .align 4
  517. Lenc_loop:
  518. rlwinm $acc00,$s0,`32-24+3`,21,28
  519. rlwinm $acc01,$s1,`32-24+3`,21,28
  520. rlwinm $acc02,$s2,`32-24+3`,21,28
  521. rlwinm $acc03,$s3,`32-24+3`,21,28
  522. lwz $t0,0($key)
  523. rlwinm $acc04,$s1,`32-16+3`,21,28
  524. lwz $t1,4($key)
  525. rlwinm $acc05,$s2,`32-16+3`,21,28
  526. lwz $t2,8($key)
  527. rlwinm $acc06,$s3,`32-16+3`,21,28
  528. lwz $t3,12($key)
  529. rlwinm $acc07,$s0,`32-16+3`,21,28
  530. lwzx $acc00,$Tbl0,$acc00
  531. rlwinm $acc08,$s2,`32-8+3`,21,28
  532. lwzx $acc01,$Tbl0,$acc01
  533. rlwinm $acc09,$s3,`32-8+3`,21,28
  534. lwzx $acc02,$Tbl0,$acc02
  535. rlwinm $acc10,$s0,`32-8+3`,21,28
  536. lwzx $acc03,$Tbl0,$acc03
  537. rlwinm $acc11,$s1,`32-8+3`,21,28
  538. lwzx $acc04,$Tbl1,$acc04
  539. rlwinm $acc12,$s3,`0+3`,21,28
  540. lwzx $acc05,$Tbl1,$acc05
  541. rlwinm $acc13,$s0,`0+3`,21,28
  542. lwzx $acc06,$Tbl1,$acc06
  543. rlwinm $acc14,$s1,`0+3`,21,28
  544. lwzx $acc07,$Tbl1,$acc07
  545. rlwinm $acc15,$s2,`0+3`,21,28
  546. lwzx $acc08,$Tbl2,$acc08
  547. xor $t0,$t0,$acc00
  548. lwzx $acc09,$Tbl2,$acc09
  549. xor $t1,$t1,$acc01
  550. lwzx $acc10,$Tbl2,$acc10
  551. xor $t2,$t2,$acc02
  552. lwzx $acc11,$Tbl2,$acc11
  553. xor $t3,$t3,$acc03
  554. lwzx $acc12,$Tbl3,$acc12
  555. xor $t0,$t0,$acc04
  556. lwzx $acc13,$Tbl3,$acc13
  557. xor $t1,$t1,$acc05
  558. lwzx $acc14,$Tbl3,$acc14
  559. xor $t2,$t2,$acc06
  560. lwzx $acc15,$Tbl3,$acc15
  561. xor $t3,$t3,$acc07
  562. xor $t0,$t0,$acc08
  563. xor $t1,$t1,$acc09
  564. xor $t2,$t2,$acc10
  565. xor $t3,$t3,$acc11
  566. xor $s0,$t0,$acc12
  567. xor $s1,$t1,$acc13
  568. xor $s2,$t2,$acc14
  569. xor $s3,$t3,$acc15
  570. addi $key,$key,16
  571. bdnz Lenc_loop
  572. addi $Tbl2,$Tbl0,2048
  573. nop
  574. lwz $t0,0($key)
  575. rlwinm $acc00,$s0,`32-24`,24,31
  576. lwz $t1,4($key)
  577. rlwinm $acc01,$s1,`32-24`,24,31
  578. lwz $t2,8($key)
  579. rlwinm $acc02,$s2,`32-24`,24,31
  580. lwz $t3,12($key)
  581. rlwinm $acc03,$s3,`32-24`,24,31
  582. lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
  583. rlwinm $acc04,$s1,`32-16`,24,31
  584. lwz $acc09,`2048+32`($Tbl0)
  585. rlwinm $acc05,$s2,`32-16`,24,31
  586. lwz $acc10,`2048+64`($Tbl0)
  587. rlwinm $acc06,$s3,`32-16`,24,31
  588. lwz $acc11,`2048+96`($Tbl0)
  589. rlwinm $acc07,$s0,`32-16`,24,31
  590. lwz $acc12,`2048+128`($Tbl0)
  591. rlwinm $acc08,$s2,`32-8`,24,31
  592. lwz $acc13,`2048+160`($Tbl0)
  593. rlwinm $acc09,$s3,`32-8`,24,31
  594. lwz $acc14,`2048+192`($Tbl0)
  595. rlwinm $acc10,$s0,`32-8`,24,31
  596. lwz $acc15,`2048+224`($Tbl0)
  597. rlwinm $acc11,$s1,`32-8`,24,31
  598. lbzx $acc00,$Tbl2,$acc00
  599. rlwinm $acc12,$s3,`0`,24,31
  600. lbzx $acc01,$Tbl2,$acc01
  601. rlwinm $acc13,$s0,`0`,24,31
  602. lbzx $acc02,$Tbl2,$acc02
  603. rlwinm $acc14,$s1,`0`,24,31
  604. lbzx $acc03,$Tbl2,$acc03
  605. rlwinm $acc15,$s2,`0`,24,31
  606. lbzx $acc04,$Tbl2,$acc04
  607. rlwinm $s0,$acc00,24,0,7
  608. lbzx $acc05,$Tbl2,$acc05
  609. rlwinm $s1,$acc01,24,0,7
  610. lbzx $acc06,$Tbl2,$acc06
  611. rlwinm $s2,$acc02,24,0,7
  612. lbzx $acc07,$Tbl2,$acc07
  613. rlwinm $s3,$acc03,24,0,7
  614. lbzx $acc08,$Tbl2,$acc08
  615. rlwimi $s0,$acc04,16,8,15
  616. lbzx $acc09,$Tbl2,$acc09
  617. rlwimi $s1,$acc05,16,8,15
  618. lbzx $acc10,$Tbl2,$acc10
  619. rlwimi $s2,$acc06,16,8,15
  620. lbzx $acc11,$Tbl2,$acc11
  621. rlwimi $s3,$acc07,16,8,15
  622. lbzx $acc12,$Tbl2,$acc12
  623. rlwimi $s0,$acc08,8,16,23
  624. lbzx $acc13,$Tbl2,$acc13
  625. rlwimi $s1,$acc09,8,16,23
  626. lbzx $acc14,$Tbl2,$acc14
  627. rlwimi $s2,$acc10,8,16,23
  628. lbzx $acc15,$Tbl2,$acc15
  629. rlwimi $s3,$acc11,8,16,23
  630. or $s0,$s0,$acc12
  631. or $s1,$s1,$acc13
  632. or $s2,$s2,$acc14
  633. or $s3,$s3,$acc15
  634. xor $s0,$s0,$t0
  635. xor $s1,$s1,$t1
  636. xor $s2,$s2,$t2
  637. xor $s3,$s3,$t3
  638. blr
  639. .long 0
  640. .byte 0,12,0x14,0,0,0,0,0
  641. .align 4
  642. Lppc_AES_encrypt_compact:
  643. lwz $acc00,240($key)
  644. addi $Tbl1,$Tbl0,2048
  645. lwz $t0,0($key)
  646. lis $mask80,0x8080
  647. lwz $t1,4($key)
  648. lis $mask1b,0x1b1b
  649. lwz $t2,8($key)
  650. ori $mask80,$mask80,0x8080
  651. lwz $t3,12($key)
  652. ori $mask1b,$mask1b,0x1b1b
  653. addi $key,$key,16
  654. mtctr $acc00
  655. .align 4
  656. Lenc_compact_loop:
  657. xor $s0,$s0,$t0
  658. xor $s1,$s1,$t1
  659. rlwinm $acc00,$s0,`32-24`,24,31
  660. xor $s2,$s2,$t2
  661. rlwinm $acc01,$s1,`32-24`,24,31
  662. xor $s3,$s3,$t3
  663. rlwinm $acc02,$s2,`32-24`,24,31
  664. rlwinm $acc03,$s3,`32-24`,24,31
  665. rlwinm $acc04,$s1,`32-16`,24,31
  666. rlwinm $acc05,$s2,`32-16`,24,31
  667. rlwinm $acc06,$s3,`32-16`,24,31
  668. rlwinm $acc07,$s0,`32-16`,24,31
  669. lbzx $acc00,$Tbl1,$acc00
  670. rlwinm $acc08,$s2,`32-8`,24,31
  671. lbzx $acc01,$Tbl1,$acc01
  672. rlwinm $acc09,$s3,`32-8`,24,31
  673. lbzx $acc02,$Tbl1,$acc02
  674. rlwinm $acc10,$s0,`32-8`,24,31
  675. lbzx $acc03,$Tbl1,$acc03
  676. rlwinm $acc11,$s1,`32-8`,24,31
  677. lbzx $acc04,$Tbl1,$acc04
  678. rlwinm $acc12,$s3,`0`,24,31
  679. lbzx $acc05,$Tbl1,$acc05
  680. rlwinm $acc13,$s0,`0`,24,31
  681. lbzx $acc06,$Tbl1,$acc06
  682. rlwinm $acc14,$s1,`0`,24,31
  683. lbzx $acc07,$Tbl1,$acc07
  684. rlwinm $acc15,$s2,`0`,24,31
  685. lbzx $acc08,$Tbl1,$acc08
  686. rlwinm $s0,$acc00,24,0,7
  687. lbzx $acc09,$Tbl1,$acc09
  688. rlwinm $s1,$acc01,24,0,7
  689. lbzx $acc10,$Tbl1,$acc10
  690. rlwinm $s2,$acc02,24,0,7
  691. lbzx $acc11,$Tbl1,$acc11
  692. rlwinm $s3,$acc03,24,0,7
  693. lbzx $acc12,$Tbl1,$acc12
  694. rlwimi $s0,$acc04,16,8,15
  695. lbzx $acc13,$Tbl1,$acc13
  696. rlwimi $s1,$acc05,16,8,15
  697. lbzx $acc14,$Tbl1,$acc14
  698. rlwimi $s2,$acc06,16,8,15
  699. lbzx $acc15,$Tbl1,$acc15
  700. rlwimi $s3,$acc07,16,8,15
  701. rlwimi $s0,$acc08,8,16,23
  702. rlwimi $s1,$acc09,8,16,23
  703. rlwimi $s2,$acc10,8,16,23
  704. rlwimi $s3,$acc11,8,16,23
  705. lwz $t0,0($key)
  706. or $s0,$s0,$acc12
  707. lwz $t1,4($key)
  708. or $s1,$s1,$acc13
  709. lwz $t2,8($key)
  710. or $s2,$s2,$acc14
  711. lwz $t3,12($key)
  712. or $s3,$s3,$acc15
  713. addi $key,$key,16
  714. bdz Lenc_compact_done
  715. and $acc00,$s0,$mask80 # r1=r0&0x80808080
  716. and $acc01,$s1,$mask80
  717. and $acc02,$s2,$mask80
  718. and $acc03,$s3,$mask80
  719. srwi $acc04,$acc00,7 # r1>>7
  720. andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
  721. srwi $acc05,$acc01,7
  722. andc $acc09,$s1,$mask80
  723. srwi $acc06,$acc02,7
  724. andc $acc10,$s2,$mask80
  725. srwi $acc07,$acc03,7
  726. andc $acc11,$s3,$mask80
  727. sub $acc00,$acc00,$acc04 # r1-(r1>>7)
  728. sub $acc01,$acc01,$acc05
  729. sub $acc02,$acc02,$acc06
  730. sub $acc03,$acc03,$acc07
  731. add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
  732. add $acc09,$acc09,$acc09
  733. add $acc10,$acc10,$acc10
  734. add $acc11,$acc11,$acc11
  735. and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  736. and $acc01,$acc01,$mask1b
  737. and $acc02,$acc02,$mask1b
  738. and $acc03,$acc03,$mask1b
  739. xor $acc00,$acc00,$acc08 # r2
  740. xor $acc01,$acc01,$acc09
  741. rotlwi $acc12,$s0,16 # ROTATE(r0,16)
  742. xor $acc02,$acc02,$acc10
  743. rotlwi $acc13,$s1,16
  744. xor $acc03,$acc03,$acc11
  745. rotlwi $acc14,$s2,16
  746. xor $s0,$s0,$acc00 # r0^r2
  747. rotlwi $acc15,$s3,16
  748. xor $s1,$s1,$acc01
  749. rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
  750. xor $s2,$s2,$acc02
  751. rotrwi $s1,$s1,24
  752. xor $s3,$s3,$acc03
  753. rotrwi $s2,$s2,24
  754. xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
  755. rotrwi $s3,$s3,24
  756. xor $s1,$s1,$acc01
  757. xor $s2,$s2,$acc02
  758. xor $s3,$s3,$acc03
  759. rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
  760. xor $s0,$s0,$acc12 #
  761. rotlwi $acc09,$acc13,8
  762. xor $s1,$s1,$acc13
  763. rotlwi $acc10,$acc14,8
  764. xor $s2,$s2,$acc14
  765. rotlwi $acc11,$acc15,8
  766. xor $s3,$s3,$acc15
  767. xor $s0,$s0,$acc08 #
  768. xor $s1,$s1,$acc09
  769. xor $s2,$s2,$acc10
  770. xor $s3,$s3,$acc11
  771. b Lenc_compact_loop
  772. .align 4
  773. Lenc_compact_done:
  774. xor $s0,$s0,$t0
  775. xor $s1,$s1,$t1
  776. xor $s2,$s2,$t2
  777. xor $s3,$s3,$t3
  778. blr
  779. .long 0
  780. .byte 0,12,0x14,0,0,0,0,0
  781. .size .AES_encrypt,.-.AES_encrypt
  782. .globl .AES_decrypt
  783. .align 7
  784. .AES_decrypt:
  785. $STU $sp,-$FRAME($sp)
  786. mflr r0
  787. $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
  788. $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
  789. $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
  790. $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
  791. $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
  792. $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
  793. $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
  794. $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
  795. $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
  796. $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
  797. $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
  798. $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
  799. $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
  800. $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
  801. $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
  802. $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
  803. $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
  804. $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
  805. $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
  806. $PUSH r0,`$FRAME+$LRSAVE`($sp)
  807. andi. $t0,$inp,3
  808. andi. $t1,$out,3
  809. or. $t0,$t0,$t1
  810. bne Ldec_unaligned
  811. Ldec_unaligned_ok:
  812. ___
  813. $code.=<<___ if (!$LITTLE_ENDIAN);
  814. lwz $s0,0($inp)
  815. lwz $s1,4($inp)
  816. lwz $s2,8($inp)
  817. lwz $s3,12($inp)
  818. ___
  819. $code.=<<___ if ($LITTLE_ENDIAN);
  820. lwz $t0,0($inp)
  821. lwz $t1,4($inp)
  822. lwz $t2,8($inp)
  823. lwz $t3,12($inp)
  824. rotlwi $s0,$t0,8
  825. rotlwi $s1,$t1,8
  826. rotlwi $s2,$t2,8
  827. rotlwi $s3,$t3,8
  828. rlwimi $s0,$t0,24,0,7
  829. rlwimi $s1,$t1,24,0,7
  830. rlwimi $s2,$t2,24,0,7
  831. rlwimi $s3,$t3,24,0,7
  832. rlwimi $s0,$t0,24,16,23
  833. rlwimi $s1,$t1,24,16,23
  834. rlwimi $s2,$t2,24,16,23
  835. rlwimi $s3,$t3,24,16,23
  836. ___
  837. $code.=<<___;
  838. bl LAES_Td
  839. bl Lppc_AES_decrypt_compact
  840. $POP $out,`$FRAME-$SIZE_T*19`($sp)
  841. ___
  842. $code.=<<___ if ($LITTLE_ENDIAN);
  843. rotlwi $t0,$s0,8
  844. rotlwi $t1,$s1,8
  845. rotlwi $t2,$s2,8
  846. rotlwi $t3,$s3,8
  847. rlwimi $t0,$s0,24,0,7
  848. rlwimi $t1,$s1,24,0,7
  849. rlwimi $t2,$s2,24,0,7
  850. rlwimi $t3,$s3,24,0,7
  851. rlwimi $t0,$s0,24,16,23
  852. rlwimi $t1,$s1,24,16,23
  853. rlwimi $t2,$s2,24,16,23
  854. rlwimi $t3,$s3,24,16,23
  855. stw $t0,0($out)
  856. stw $t1,4($out)
  857. stw $t2,8($out)
  858. stw $t3,12($out)
  859. ___
  860. $code.=<<___ if (!$LITTLE_ENDIAN);
  861. stw $s0,0($out)
  862. stw $s1,4($out)
  863. stw $s2,8($out)
  864. stw $s3,12($out)
  865. ___
  866. $code.=<<___;
  867. b Ldec_done
  868. Ldec_unaligned:
  869. subfic $t0,$inp,4096
  870. subfic $t1,$out,4096
  871. andi. $t0,$t0,4096-16
  872. beq Ldec_xpage
  873. andi. $t1,$t1,4096-16
  874. bne Ldec_unaligned_ok
  875. Ldec_xpage:
  876. lbz $acc00,0($inp)
  877. lbz $acc01,1($inp)
  878. lbz $acc02,2($inp)
  879. lbz $s0,3($inp)
  880. lbz $acc04,4($inp)
  881. lbz $acc05,5($inp)
  882. lbz $acc06,6($inp)
  883. lbz $s1,7($inp)
  884. lbz $acc08,8($inp)
  885. lbz $acc09,9($inp)
  886. lbz $acc10,10($inp)
  887. insrwi $s0,$acc00,8,0
  888. lbz $s2,11($inp)
  889. insrwi $s1,$acc04,8,0
  890. lbz $acc12,12($inp)
  891. insrwi $s0,$acc01,8,8
  892. lbz $acc13,13($inp)
  893. insrwi $s1,$acc05,8,8
  894. lbz $acc14,14($inp)
  895. insrwi $s0,$acc02,8,16
  896. lbz $s3,15($inp)
  897. insrwi $s1,$acc06,8,16
  898. insrwi $s2,$acc08,8,0
  899. insrwi $s3,$acc12,8,0
  900. insrwi $s2,$acc09,8,8
  901. insrwi $s3,$acc13,8,8
  902. insrwi $s2,$acc10,8,16
  903. insrwi $s3,$acc14,8,16
  904. bl LAES_Td
  905. bl Lppc_AES_decrypt_compact
  906. $POP $out,`$FRAME-$SIZE_T*19`($sp)
  907. extrwi $acc00,$s0,8,0
  908. extrwi $acc01,$s0,8,8
  909. stb $acc00,0($out)
  910. extrwi $acc02,$s0,8,16
  911. stb $acc01,1($out)
  912. stb $acc02,2($out)
  913. extrwi $acc04,$s1,8,0
  914. stb $s0,3($out)
  915. extrwi $acc05,$s1,8,8
  916. stb $acc04,4($out)
  917. extrwi $acc06,$s1,8,16
  918. stb $acc05,5($out)
  919. stb $acc06,6($out)
  920. extrwi $acc08,$s2,8,0
  921. stb $s1,7($out)
  922. extrwi $acc09,$s2,8,8
  923. stb $acc08,8($out)
  924. extrwi $acc10,$s2,8,16
  925. stb $acc09,9($out)
  926. stb $acc10,10($out)
  927. extrwi $acc12,$s3,8,0
  928. stb $s2,11($out)
  929. extrwi $acc13,$s3,8,8
  930. stb $acc12,12($out)
  931. extrwi $acc14,$s3,8,16
  932. stb $acc13,13($out)
  933. stb $acc14,14($out)
  934. stb $s3,15($out)
  935. Ldec_done:
  936. $POP r0,`$FRAME+$LRSAVE`($sp)
  937. $POP r14,`$FRAME-$SIZE_T*18`($sp)
  938. $POP r15,`$FRAME-$SIZE_T*17`($sp)
  939. $POP r16,`$FRAME-$SIZE_T*16`($sp)
  940. $POP r17,`$FRAME-$SIZE_T*15`($sp)
  941. $POP r18,`$FRAME-$SIZE_T*14`($sp)
  942. $POP r19,`$FRAME-$SIZE_T*13`($sp)
  943. $POP r20,`$FRAME-$SIZE_T*12`($sp)
  944. $POP r21,`$FRAME-$SIZE_T*11`($sp)
  945. $POP r22,`$FRAME-$SIZE_T*10`($sp)
  946. $POP r23,`$FRAME-$SIZE_T*9`($sp)
  947. $POP r24,`$FRAME-$SIZE_T*8`($sp)
  948. $POP r25,`$FRAME-$SIZE_T*7`($sp)
  949. $POP r26,`$FRAME-$SIZE_T*6`($sp)
  950. $POP r27,`$FRAME-$SIZE_T*5`($sp)
  951. $POP r28,`$FRAME-$SIZE_T*4`($sp)
  952. $POP r29,`$FRAME-$SIZE_T*3`($sp)
  953. $POP r30,`$FRAME-$SIZE_T*2`($sp)
  954. $POP r31,`$FRAME-$SIZE_T*1`($sp)
  955. mtlr r0
  956. addi $sp,$sp,$FRAME
  957. blr
  958. .long 0
  959. .byte 0,12,4,1,0x80,18,3,0
  960. .long 0
  961. .align 5
  962. Lppc_AES_decrypt:
  963. lwz $acc00,240($key)
  964. addi $Tbl1,$Tbl0,3
  965. lwz $t0,0($key)
  966. addi $Tbl2,$Tbl0,2
  967. lwz $t1,4($key)
  968. addi $Tbl3,$Tbl0,1
  969. lwz $t2,8($key)
  970. addi $acc00,$acc00,-1
  971. lwz $t3,12($key)
  972. addi $key,$key,16
  973. xor $s0,$s0,$t0
  974. xor $s1,$s1,$t1
  975. xor $s2,$s2,$t2
  976. xor $s3,$s3,$t3
  977. mtctr $acc00
  978. .align 4
  979. Ldec_loop:
  980. rlwinm $acc00,$s0,`32-24+3`,21,28
  981. rlwinm $acc01,$s1,`32-24+3`,21,28
  982. rlwinm $acc02,$s2,`32-24+3`,21,28
  983. rlwinm $acc03,$s3,`32-24+3`,21,28
  984. lwz $t0,0($key)
  985. rlwinm $acc04,$s3,`32-16+3`,21,28
  986. lwz $t1,4($key)
  987. rlwinm $acc05,$s0,`32-16+3`,21,28
  988. lwz $t2,8($key)
  989. rlwinm $acc06,$s1,`32-16+3`,21,28
  990. lwz $t3,12($key)
  991. rlwinm $acc07,$s2,`32-16+3`,21,28
  992. lwzx $acc00,$Tbl0,$acc00
  993. rlwinm $acc08,$s2,`32-8+3`,21,28
  994. lwzx $acc01,$Tbl0,$acc01
  995. rlwinm $acc09,$s3,`32-8+3`,21,28
  996. lwzx $acc02,$Tbl0,$acc02
  997. rlwinm $acc10,$s0,`32-8+3`,21,28
  998. lwzx $acc03,$Tbl0,$acc03
  999. rlwinm $acc11,$s1,`32-8+3`,21,28
  1000. lwzx $acc04,$Tbl1,$acc04
  1001. rlwinm $acc12,$s1,`0+3`,21,28
  1002. lwzx $acc05,$Tbl1,$acc05
  1003. rlwinm $acc13,$s2,`0+3`,21,28
  1004. lwzx $acc06,$Tbl1,$acc06
  1005. rlwinm $acc14,$s3,`0+3`,21,28
  1006. lwzx $acc07,$Tbl1,$acc07
  1007. rlwinm $acc15,$s0,`0+3`,21,28
  1008. lwzx $acc08,$Tbl2,$acc08
  1009. xor $t0,$t0,$acc00
  1010. lwzx $acc09,$Tbl2,$acc09
  1011. xor $t1,$t1,$acc01
  1012. lwzx $acc10,$Tbl2,$acc10
  1013. xor $t2,$t2,$acc02
  1014. lwzx $acc11,$Tbl2,$acc11
  1015. xor $t3,$t3,$acc03
  1016. lwzx $acc12,$Tbl3,$acc12
  1017. xor $t0,$t0,$acc04
  1018. lwzx $acc13,$Tbl3,$acc13
  1019. xor $t1,$t1,$acc05
  1020. lwzx $acc14,$Tbl3,$acc14
  1021. xor $t2,$t2,$acc06
  1022. lwzx $acc15,$Tbl3,$acc15
  1023. xor $t3,$t3,$acc07
  1024. xor $t0,$t0,$acc08
  1025. xor $t1,$t1,$acc09
  1026. xor $t2,$t2,$acc10
  1027. xor $t3,$t3,$acc11
  1028. xor $s0,$t0,$acc12
  1029. xor $s1,$t1,$acc13
  1030. xor $s2,$t2,$acc14
  1031. xor $s3,$t3,$acc15
  1032. addi $key,$key,16
  1033. bdnz Ldec_loop
  1034. addi $Tbl2,$Tbl0,2048
  1035. nop
  1036. lwz $t0,0($key)
  1037. rlwinm $acc00,$s0,`32-24`,24,31
  1038. lwz $t1,4($key)
  1039. rlwinm $acc01,$s1,`32-24`,24,31
  1040. lwz $t2,8($key)
  1041. rlwinm $acc02,$s2,`32-24`,24,31
  1042. lwz $t3,12($key)
  1043. rlwinm $acc03,$s3,`32-24`,24,31
  1044. lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
  1045. rlwinm $acc04,$s3,`32-16`,24,31
  1046. lwz $acc09,`2048+32`($Tbl0)
  1047. rlwinm $acc05,$s0,`32-16`,24,31
  1048. lwz $acc10,`2048+64`($Tbl0)
  1049. lbzx $acc00,$Tbl2,$acc00
  1050. lwz $acc11,`2048+96`($Tbl0)
  1051. lbzx $acc01,$Tbl2,$acc01
  1052. lwz $acc12,`2048+128`($Tbl0)
  1053. rlwinm $acc06,$s1,`32-16`,24,31
  1054. lwz $acc13,`2048+160`($Tbl0)
  1055. rlwinm $acc07,$s2,`32-16`,24,31
  1056. lwz $acc14,`2048+192`($Tbl0)
  1057. rlwinm $acc08,$s2,`32-8`,24,31
  1058. lwz $acc15,`2048+224`($Tbl0)
  1059. rlwinm $acc09,$s3,`32-8`,24,31
  1060. lbzx $acc02,$Tbl2,$acc02
  1061. rlwinm $acc10,$s0,`32-8`,24,31
  1062. lbzx $acc03,$Tbl2,$acc03
  1063. rlwinm $acc11,$s1,`32-8`,24,31
  1064. lbzx $acc04,$Tbl2,$acc04
  1065. rlwinm $acc12,$s1,`0`,24,31
  1066. lbzx $acc05,$Tbl2,$acc05
  1067. rlwinm $acc13,$s2,`0`,24,31
  1068. lbzx $acc06,$Tbl2,$acc06
  1069. rlwinm $acc14,$s3,`0`,24,31
  1070. lbzx $acc07,$Tbl2,$acc07
  1071. rlwinm $acc15,$s0,`0`,24,31
  1072. lbzx $acc08,$Tbl2,$acc08
  1073. rlwinm $s0,$acc00,24,0,7
  1074. lbzx $acc09,$Tbl2,$acc09
  1075. rlwinm $s1,$acc01,24,0,7
  1076. lbzx $acc10,$Tbl2,$acc10
  1077. rlwinm $s2,$acc02,24,0,7
  1078. lbzx $acc11,$Tbl2,$acc11
  1079. rlwinm $s3,$acc03,24,0,7
  1080. lbzx $acc12,$Tbl2,$acc12
  1081. rlwimi $s0,$acc04,16,8,15
  1082. lbzx $acc13,$Tbl2,$acc13
  1083. rlwimi $s1,$acc05,16,8,15
  1084. lbzx $acc14,$Tbl2,$acc14
  1085. rlwimi $s2,$acc06,16,8,15
  1086. lbzx $acc15,$Tbl2,$acc15
  1087. rlwimi $s3,$acc07,16,8,15
  1088. rlwimi $s0,$acc08,8,16,23
  1089. rlwimi $s1,$acc09,8,16,23
  1090. rlwimi $s2,$acc10,8,16,23
  1091. rlwimi $s3,$acc11,8,16,23
  1092. or $s0,$s0,$acc12
  1093. or $s1,$s1,$acc13
  1094. or $s2,$s2,$acc14
  1095. or $s3,$s3,$acc15
  1096. xor $s0,$s0,$t0
  1097. xor $s1,$s1,$t1
  1098. xor $s2,$s2,$t2
  1099. xor $s3,$s3,$t3
  1100. blr
  1101. .long 0
  1102. .byte 0,12,0x14,0,0,0,0,0
  1103. .align 4
  1104. Lppc_AES_decrypt_compact:
  1105. lwz $acc00,240($key)
  1106. addi $Tbl1,$Tbl0,2048
  1107. lwz $t0,0($key)
  1108. lis $mask80,0x8080
  1109. lwz $t1,4($key)
  1110. lis $mask1b,0x1b1b
  1111. lwz $t2,8($key)
  1112. ori $mask80,$mask80,0x8080
  1113. lwz $t3,12($key)
  1114. ori $mask1b,$mask1b,0x1b1b
  1115. addi $key,$key,16
  1116. ___
  1117. $code.=<<___ if ($SIZE_T==8);
  1118. insrdi $mask80,$mask80,32,0
  1119. insrdi $mask1b,$mask1b,32,0
  1120. ___
  1121. $code.=<<___;
  1122. mtctr $acc00
  1123. .align 4
  1124. Ldec_compact_loop:
  1125. xor $s0,$s0,$t0
  1126. xor $s1,$s1,$t1
  1127. rlwinm $acc00,$s0,`32-24`,24,31
  1128. xor $s2,$s2,$t2
  1129. rlwinm $acc01,$s1,`32-24`,24,31
  1130. xor $s3,$s3,$t3
  1131. rlwinm $acc02,$s2,`32-24`,24,31
  1132. rlwinm $acc03,$s3,`32-24`,24,31
  1133. rlwinm $acc04,$s3,`32-16`,24,31
  1134. rlwinm $acc05,$s0,`32-16`,24,31
  1135. rlwinm $acc06,$s1,`32-16`,24,31
  1136. rlwinm $acc07,$s2,`32-16`,24,31
  1137. lbzx $acc00,$Tbl1,$acc00
  1138. rlwinm $acc08,$s2,`32-8`,24,31
  1139. lbzx $acc01,$Tbl1,$acc01
  1140. rlwinm $acc09,$s3,`32-8`,24,31
  1141. lbzx $acc02,$Tbl1,$acc02
  1142. rlwinm $acc10,$s0,`32-8`,24,31
  1143. lbzx $acc03,$Tbl1,$acc03
  1144. rlwinm $acc11,$s1,`32-8`,24,31
  1145. lbzx $acc04,$Tbl1,$acc04
  1146. rlwinm $acc12,$s1,`0`,24,31
  1147. lbzx $acc05,$Tbl1,$acc05
  1148. rlwinm $acc13,$s2,`0`,24,31
  1149. lbzx $acc06,$Tbl1,$acc06
  1150. rlwinm $acc14,$s3,`0`,24,31
  1151. lbzx $acc07,$Tbl1,$acc07
  1152. rlwinm $acc15,$s0,`0`,24,31
  1153. lbzx $acc08,$Tbl1,$acc08
  1154. rlwinm $s0,$acc00,24,0,7
  1155. lbzx $acc09,$Tbl1,$acc09
  1156. rlwinm $s1,$acc01,24,0,7
  1157. lbzx $acc10,$Tbl1,$acc10
  1158. rlwinm $s2,$acc02,24,0,7
  1159. lbzx $acc11,$Tbl1,$acc11
  1160. rlwinm $s3,$acc03,24,0,7
  1161. lbzx $acc12,$Tbl1,$acc12
  1162. rlwimi $s0,$acc04,16,8,15
  1163. lbzx $acc13,$Tbl1,$acc13
  1164. rlwimi $s1,$acc05,16,8,15
  1165. lbzx $acc14,$Tbl1,$acc14
  1166. rlwimi $s2,$acc06,16,8,15
  1167. lbzx $acc15,$Tbl1,$acc15
  1168. rlwimi $s3,$acc07,16,8,15
  1169. rlwimi $s0,$acc08,8,16,23
  1170. rlwimi $s1,$acc09,8,16,23
  1171. rlwimi $s2,$acc10,8,16,23
  1172. rlwimi $s3,$acc11,8,16,23
  1173. lwz $t0,0($key)
  1174. or $s0,$s0,$acc12
  1175. lwz $t1,4($key)
  1176. or $s1,$s1,$acc13
  1177. lwz $t2,8($key)
  1178. or $s2,$s2,$acc14
  1179. lwz $t3,12($key)
  1180. or $s3,$s3,$acc15
  1181. addi $key,$key,16
  1182. bdz Ldec_compact_done
  1183. ___
  1184. $code.=<<___ if ($SIZE_T==8);
  1185. # vectorized permutation improves decrypt performance by 10%
  1186. insrdi $s0,$s1,32,0
  1187. insrdi $s2,$s3,32,0
  1188. and $acc00,$s0,$mask80 # r1=r0&0x80808080
  1189. and $acc02,$s2,$mask80
  1190. srdi $acc04,$acc00,7 # r1>>7
  1191. srdi $acc06,$acc02,7
  1192. andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
  1193. andc $acc10,$s2,$mask80
  1194. sub $acc00,$acc00,$acc04 # r1-(r1>>7)
  1195. sub $acc02,$acc02,$acc06
  1196. add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
  1197. add $acc10,$acc10,$acc10
  1198. and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1199. and $acc02,$acc02,$mask1b
  1200. xor $acc00,$acc00,$acc08 # r2
  1201. xor $acc02,$acc02,$acc10
  1202. and $acc04,$acc00,$mask80 # r1=r2&0x80808080
  1203. and $acc06,$acc02,$mask80
  1204. srdi $acc08,$acc04,7 # r1>>7
  1205. srdi $acc10,$acc06,7
  1206. andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
  1207. andc $acc14,$acc02,$mask80
  1208. sub $acc04,$acc04,$acc08 # r1-(r1>>7)
  1209. sub $acc06,$acc06,$acc10
  1210. add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
  1211. add $acc14,$acc14,$acc14
  1212. and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1213. and $acc06,$acc06,$mask1b
  1214. xor $acc04,$acc04,$acc12 # r4
  1215. xor $acc06,$acc06,$acc14
  1216. and $acc08,$acc04,$mask80 # r1=r4&0x80808080
  1217. and $acc10,$acc06,$mask80
  1218. srdi $acc12,$acc08,7 # r1>>7
  1219. srdi $acc14,$acc10,7
  1220. sub $acc08,$acc08,$acc12 # r1-(r1>>7)
  1221. sub $acc10,$acc10,$acc14
  1222. andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
  1223. andc $acc14,$acc06,$mask80
  1224. add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
  1225. add $acc14,$acc14,$acc14
  1226. and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1227. and $acc10,$acc10,$mask1b
  1228. xor $acc08,$acc08,$acc12 # r8
  1229. xor $acc10,$acc10,$acc14
  1230. xor $acc00,$acc00,$s0 # r2^r0
  1231. xor $acc02,$acc02,$s2
  1232. xor $acc04,$acc04,$s0 # r4^r0
  1233. xor $acc06,$acc06,$s2
  1234. extrdi $acc01,$acc00,32,0
  1235. extrdi $acc03,$acc02,32,0
  1236. extrdi $acc05,$acc04,32,0
  1237. extrdi $acc07,$acc06,32,0
  1238. extrdi $acc09,$acc08,32,0
  1239. extrdi $acc11,$acc10,32,0
  1240. ___
  1241. $code.=<<___ if ($SIZE_T==4);
  1242. and $acc00,$s0,$mask80 # r1=r0&0x80808080
  1243. and $acc01,$s1,$mask80
  1244. and $acc02,$s2,$mask80
  1245. and $acc03,$s3,$mask80
  1246. srwi $acc04,$acc00,7 # r1>>7
  1247. andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
  1248. srwi $acc05,$acc01,7
  1249. andc $acc09,$s1,$mask80
  1250. srwi $acc06,$acc02,7
  1251. andc $acc10,$s2,$mask80
  1252. srwi $acc07,$acc03,7
  1253. andc $acc11,$s3,$mask80
  1254. sub $acc00,$acc00,$acc04 # r1-(r1>>7)
  1255. sub $acc01,$acc01,$acc05
  1256. sub $acc02,$acc02,$acc06
  1257. sub $acc03,$acc03,$acc07
  1258. add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
  1259. add $acc09,$acc09,$acc09
  1260. add $acc10,$acc10,$acc10
  1261. add $acc11,$acc11,$acc11
  1262. and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1263. and $acc01,$acc01,$mask1b
  1264. and $acc02,$acc02,$mask1b
  1265. and $acc03,$acc03,$mask1b
  1266. xor $acc00,$acc00,$acc08 # r2
  1267. xor $acc01,$acc01,$acc09
  1268. xor $acc02,$acc02,$acc10
  1269. xor $acc03,$acc03,$acc11
  1270. and $acc04,$acc00,$mask80 # r1=r2&0x80808080
  1271. and $acc05,$acc01,$mask80
  1272. and $acc06,$acc02,$mask80
  1273. and $acc07,$acc03,$mask80
  1274. srwi $acc08,$acc04,7 # r1>>7
  1275. andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
  1276. srwi $acc09,$acc05,7
  1277. andc $acc13,$acc01,$mask80
  1278. srwi $acc10,$acc06,7
  1279. andc $acc14,$acc02,$mask80
  1280. srwi $acc11,$acc07,7
  1281. andc $acc15,$acc03,$mask80
  1282. sub $acc04,$acc04,$acc08 # r1-(r1>>7)
  1283. sub $acc05,$acc05,$acc09
  1284. sub $acc06,$acc06,$acc10
  1285. sub $acc07,$acc07,$acc11
  1286. add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
  1287. add $acc13,$acc13,$acc13
  1288. add $acc14,$acc14,$acc14
  1289. add $acc15,$acc15,$acc15
  1290. and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1291. and $acc05,$acc05,$mask1b
  1292. and $acc06,$acc06,$mask1b
  1293. and $acc07,$acc07,$mask1b
  1294. xor $acc04,$acc04,$acc12 # r4
  1295. xor $acc05,$acc05,$acc13
  1296. xor $acc06,$acc06,$acc14
  1297. xor $acc07,$acc07,$acc15
  1298. and $acc08,$acc04,$mask80 # r1=r4&0x80808080
  1299. and $acc09,$acc05,$mask80
  1300. srwi $acc12,$acc08,7 # r1>>7
  1301. and $acc10,$acc06,$mask80
  1302. srwi $acc13,$acc09,7
  1303. and $acc11,$acc07,$mask80
  1304. srwi $acc14,$acc10,7
  1305. sub $acc08,$acc08,$acc12 # r1-(r1>>7)
  1306. srwi $acc15,$acc11,7
  1307. sub $acc09,$acc09,$acc13
  1308. sub $acc10,$acc10,$acc14
  1309. sub $acc11,$acc11,$acc15
  1310. andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
  1311. andc $acc13,$acc05,$mask80
  1312. andc $acc14,$acc06,$mask80
  1313. andc $acc15,$acc07,$mask80
  1314. add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
  1315. add $acc13,$acc13,$acc13
  1316. add $acc14,$acc14,$acc14
  1317. add $acc15,$acc15,$acc15
  1318. and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1319. and $acc09,$acc09,$mask1b
  1320. and $acc10,$acc10,$mask1b
  1321. and $acc11,$acc11,$mask1b
  1322. xor $acc08,$acc08,$acc12 # r8
  1323. xor $acc09,$acc09,$acc13
  1324. xor $acc10,$acc10,$acc14
  1325. xor $acc11,$acc11,$acc15
  1326. xor $acc00,$acc00,$s0 # r2^r0
  1327. xor $acc01,$acc01,$s1
  1328. xor $acc02,$acc02,$s2
  1329. xor $acc03,$acc03,$s3
  1330. xor $acc04,$acc04,$s0 # r4^r0
  1331. xor $acc05,$acc05,$s1
  1332. xor $acc06,$acc06,$s2
  1333. xor $acc07,$acc07,$s3
  1334. ___
  1335. $code.=<<___;
  1336. rotrwi $s0,$s0,8 # = ROTATE(r0,8)
  1337. rotrwi $s1,$s1,8
  1338. xor $s0,$s0,$acc00 # ^= r2^r0
  1339. rotrwi $s2,$s2,8
  1340. xor $s1,$s1,$acc01
  1341. rotrwi $s3,$s3,8
  1342. xor $s2,$s2,$acc02
  1343. xor $s3,$s3,$acc03
  1344. xor $acc00,$acc00,$acc08
  1345. xor $acc01,$acc01,$acc09
  1346. xor $acc02,$acc02,$acc10
  1347. xor $acc03,$acc03,$acc11
  1348. xor $s0,$s0,$acc04 # ^= r4^r0
  1349. rotrwi $acc00,$acc00,24
  1350. xor $s1,$s1,$acc05
  1351. rotrwi $acc01,$acc01,24
  1352. xor $s2,$s2,$acc06
  1353. rotrwi $acc02,$acc02,24
  1354. xor $s3,$s3,$acc07
  1355. rotrwi $acc03,$acc03,24
  1356. xor $acc04,$acc04,$acc08
  1357. xor $acc05,$acc05,$acc09
  1358. xor $acc06,$acc06,$acc10
  1359. xor $acc07,$acc07,$acc11
  1360. xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
  1361. rotrwi $acc04,$acc04,16
  1362. xor $s1,$s1,$acc09
  1363. rotrwi $acc05,$acc05,16
  1364. xor $s2,$s2,$acc10
  1365. rotrwi $acc06,$acc06,16
  1366. xor $s3,$s3,$acc11
  1367. rotrwi $acc07,$acc07,16
  1368. xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
  1369. rotrwi $acc08,$acc08,8
  1370. xor $s1,$s1,$acc01
  1371. rotrwi $acc09,$acc09,8
  1372. xor $s2,$s2,$acc02
  1373. rotrwi $acc10,$acc10,8
  1374. xor $s3,$s3,$acc03
  1375. rotrwi $acc11,$acc11,8
  1376. xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
  1377. xor $s1,$s1,$acc05
  1378. xor $s2,$s2,$acc06
  1379. xor $s3,$s3,$acc07
  1380. xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
  1381. xor $s1,$s1,$acc09
  1382. xor $s2,$s2,$acc10
  1383. xor $s3,$s3,$acc11
  1384. b Ldec_compact_loop
  1385. .align 4
  1386. Ldec_compact_done:
  1387. xor $s0,$s0,$t0
  1388. xor $s1,$s1,$t1
  1389. xor $s2,$s2,$t2
  1390. xor $s3,$s3,$t3
  1391. blr
  1392. .long 0
  1393. .byte 0,12,0x14,0,0,0,0,0
  1394. .size .AES_decrypt,.-.AES_decrypt
  1395. .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
  1396. .align 7
  1397. ___
  1398. $code =~ s/\`([^\`]*)\`/eval $1/gem;
  1399. print $code;
  1400. close STDOUT or die "error closing STDOUT: $!";