aes-ppc.pl 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459
  1. #! /usr/bin/env perl
  2. # Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  10. # project. The module is, however, dual licensed under OpenSSL and
  11. # CRYPTOGAMS licenses depending on where you obtain it. For further
  12. # details see http://www.openssl.org/~appro/cryptogams/.
  13. # ====================================================================
  14. # Needs more work: key setup, CBC routine...
  15. #
  16. # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
  17. # 128-bit key, which is ~40% better than 64-bit code generated by gcc
  18. # 4.0. But these are not the ones currently used! Their "compact"
  19. # counterparts are, for security reason. ppc_AES_encrypt_compact runs
  20. # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
  21. # at 1/3 of ppc_AES_decrypt.
  22. # February 2010
  23. #
  24. # Rescheduling instructions to favour Power6 pipeline gave 10%
  25. # performance improvement on the platform in question (and marginal
  26. # improvement even on others). It should be noted that Power6 fails
  27. # to process byte in 18 cycles, only in 23, because it fails to issue
  28. # 4 load instructions in two cycles, only in 3. As result non-compact
  29. # block subroutines are 25% slower than one would expect. Compact
  30. # functions scale better, because they have pure computational part,
  31. # which scales perfectly with clock frequency. To be specific
  32. # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
  33. # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
  34. $flavour = shift;
  35. if ($flavour =~ /64/) {
  36. $SIZE_T =8;
  37. $LRSAVE =2*$SIZE_T;
  38. $STU ="stdu";
  39. $POP ="ld";
  40. $PUSH ="std";
  41. } elsif ($flavour =~ /32/) {
  42. $SIZE_T =4;
  43. $LRSAVE =$SIZE_T;
  44. $STU ="stwu";
  45. $POP ="lwz";
  46. $PUSH ="stw";
  47. } else { die "nonsense $flavour"; }
  48. $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
  49. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  50. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  51. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  52. die "can't locate ppc-xlate.pl";
  53. open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
  54. $FRAME=32*$SIZE_T;
  55. sub _data_word()
  56. { my $i;
  57. while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
  58. }
  59. $sp="r1";
  60. $toc="r2";
  61. $inp="r3";
  62. $out="r4";
  63. $key="r5";
  64. $Tbl0="r3";
  65. $Tbl1="r6";
  66. $Tbl2="r7";
  67. $Tbl3=$out; # stay away from "r2"; $out is offloaded to stack
  68. $s0="r8";
  69. $s1="r9";
  70. $s2="r10";
  71. $s3="r11";
  72. $t0="r12";
  73. $t1="r0"; # stay away from "r13";
  74. $t2="r14";
  75. $t3="r15";
  76. $acc00="r16";
  77. $acc01="r17";
  78. $acc02="r18";
  79. $acc03="r19";
  80. $acc04="r20";
  81. $acc05="r21";
  82. $acc06="r22";
  83. $acc07="r23";
  84. $acc08="r24";
  85. $acc09="r25";
  86. $acc10="r26";
  87. $acc11="r27";
  88. $acc12="r28";
  89. $acc13="r29";
  90. $acc14="r30";
  91. $acc15="r31";
  92. $mask80=$Tbl2;
  93. $mask1b=$Tbl3;
  94. $code.=<<___;
  95. .machine "any"
  96. .text
  97. .align 7
  98. LAES_Te:
  99. mflr r0
  100. bcl 20,31,\$+4
  101. mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
  102. addi $Tbl0,$Tbl0,`128-8`
  103. mtlr r0
  104. blr
  105. .long 0
  106. .byte 0,12,0x14,0,0,0,0,0
  107. .space `64-9*4`
  108. LAES_Td:
  109. mflr r0
  110. bcl 20,31,\$+4
  111. mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
  112. addi $Tbl0,$Tbl0,`128-64-8+2048+256`
  113. mtlr r0
  114. blr
  115. .long 0
  116. .byte 0,12,0x14,0,0,0,0,0
  117. .space `128-64-9*4`
  118. ___
  119. &_data_word(
  120. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  121. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  122. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  123. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  124. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  125. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  126. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  127. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  128. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  129. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  130. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  131. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  132. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  133. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  134. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  135. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  136. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  137. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  138. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  139. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  140. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  141. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  142. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  143. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  144. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  145. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  146. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  147. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  148. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  149. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  150. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  151. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  152. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  153. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  154. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  155. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  156. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  157. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  158. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  159. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  160. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  161. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  162. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  163. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  164. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  165. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  166. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  167. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  168. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  169. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  170. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  171. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  172. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  173. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  174. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  175. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  176. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  177. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  178. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  179. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  180. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  181. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  182. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  183. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  184. $code.=<<___;
  185. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  186. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  187. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  188. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  189. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  190. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  191. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  192. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  193. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  194. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  195. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  196. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  197. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  198. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  199. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  200. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  201. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  202. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  203. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  204. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  205. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  206. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  207. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  208. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  209. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  210. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  211. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  212. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  213. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  214. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  215. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  216. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  217. ___
  218. &_data_word(
  219. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  220. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  221. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  222. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  223. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  224. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  225. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  226. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  227. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  228. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  229. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  230. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  231. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  232. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  233. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  234. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  235. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  236. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  237. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  238. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  239. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  240. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  241. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  242. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  243. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  244. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  245. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  246. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  247. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  248. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  249. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  250. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  251. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  252. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  253. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  254. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  255. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  256. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  257. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  258. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  259. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  260. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  261. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  262. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  263. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  264. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  265. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  266. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  267. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  268. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  269. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  270. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  271. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  272. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  273. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  274. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  275. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  276. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  277. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  278. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  279. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  280. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  281. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  282. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  283. $code.=<<___;
  284. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  285. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  286. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  287. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  288. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  289. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  290. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  291. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  292. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  293. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  294. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  295. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  296. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  297. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  298. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  299. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  300. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  301. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  302. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  303. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  304. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  305. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  306. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  307. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  308. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  309. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  310. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  311. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  312. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  313. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  314. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  315. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  316. .globl .AES_encrypt
  317. .align 7
  318. .AES_encrypt:
  319. $STU $sp,-$FRAME($sp)
  320. mflr r0
  321. $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
  322. $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
  323. $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
  324. $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
  325. $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
  326. $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
  327. $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
  328. $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
  329. $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
  330. $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
  331. $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
  332. $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
  333. $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
  334. $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
  335. $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
  336. $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
  337. $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
  338. $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
  339. $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
  340. $PUSH r0,`$FRAME+$LRSAVE`($sp)
  341. andi. $t0,$inp,3
  342. andi. $t1,$out,3
  343. or. $t0,$t0,$t1
  344. bne Lenc_unaligned
  345. Lenc_unaligned_ok:
  346. ___
  347. $code.=<<___ if (!$LITTLE_ENDIAN);
  348. lwz $s0,0($inp)
  349. lwz $s1,4($inp)
  350. lwz $s2,8($inp)
  351. lwz $s3,12($inp)
  352. ___
  353. $code.=<<___ if ($LITTLE_ENDIAN);
  354. lwz $t0,0($inp)
  355. lwz $t1,4($inp)
  356. lwz $t2,8($inp)
  357. lwz $t3,12($inp)
  358. rotlwi $s0,$t0,8
  359. rotlwi $s1,$t1,8
  360. rotlwi $s2,$t2,8
  361. rotlwi $s3,$t3,8
  362. rlwimi $s0,$t0,24,0,7
  363. rlwimi $s1,$t1,24,0,7
  364. rlwimi $s2,$t2,24,0,7
  365. rlwimi $s3,$t3,24,0,7
  366. rlwimi $s0,$t0,24,16,23
  367. rlwimi $s1,$t1,24,16,23
  368. rlwimi $s2,$t2,24,16,23
  369. rlwimi $s3,$t3,24,16,23
  370. ___
  371. $code.=<<___;
  372. bl LAES_Te
  373. bl Lppc_AES_encrypt_compact
  374. $POP $out,`$FRAME-$SIZE_T*19`($sp)
  375. ___
  376. $code.=<<___ if ($LITTLE_ENDIAN);
  377. rotlwi $t0,$s0,8
  378. rotlwi $t1,$s1,8
  379. rotlwi $t2,$s2,8
  380. rotlwi $t3,$s3,8
  381. rlwimi $t0,$s0,24,0,7
  382. rlwimi $t1,$s1,24,0,7
  383. rlwimi $t2,$s2,24,0,7
  384. rlwimi $t3,$s3,24,0,7
  385. rlwimi $t0,$s0,24,16,23
  386. rlwimi $t1,$s1,24,16,23
  387. rlwimi $t2,$s2,24,16,23
  388. rlwimi $t3,$s3,24,16,23
  389. stw $t0,0($out)
  390. stw $t1,4($out)
  391. stw $t2,8($out)
  392. stw $t3,12($out)
  393. ___
  394. $code.=<<___ if (!$LITTLE_ENDIAN);
  395. stw $s0,0($out)
  396. stw $s1,4($out)
  397. stw $s2,8($out)
  398. stw $s3,12($out)
  399. ___
  400. $code.=<<___;
  401. b Lenc_done
  402. Lenc_unaligned:
  403. subfic $t0,$inp,4096
  404. subfic $t1,$out,4096
  405. andi. $t0,$t0,4096-16
  406. beq Lenc_xpage
  407. andi. $t1,$t1,4096-16
  408. bne Lenc_unaligned_ok
  409. Lenc_xpage:
  410. lbz $acc00,0($inp)
  411. lbz $acc01,1($inp)
  412. lbz $acc02,2($inp)
  413. lbz $s0,3($inp)
  414. lbz $acc04,4($inp)
  415. lbz $acc05,5($inp)
  416. lbz $acc06,6($inp)
  417. lbz $s1,7($inp)
  418. lbz $acc08,8($inp)
  419. lbz $acc09,9($inp)
  420. lbz $acc10,10($inp)
  421. insrwi $s0,$acc00,8,0
  422. lbz $s2,11($inp)
  423. insrwi $s1,$acc04,8,0
  424. lbz $acc12,12($inp)
  425. insrwi $s0,$acc01,8,8
  426. lbz $acc13,13($inp)
  427. insrwi $s1,$acc05,8,8
  428. lbz $acc14,14($inp)
  429. insrwi $s0,$acc02,8,16
  430. lbz $s3,15($inp)
  431. insrwi $s1,$acc06,8,16
  432. insrwi $s2,$acc08,8,0
  433. insrwi $s3,$acc12,8,0
  434. insrwi $s2,$acc09,8,8
  435. insrwi $s3,$acc13,8,8
  436. insrwi $s2,$acc10,8,16
  437. insrwi $s3,$acc14,8,16
  438. bl LAES_Te
  439. bl Lppc_AES_encrypt_compact
  440. $POP $out,`$FRAME-$SIZE_T*19`($sp)
  441. extrwi $acc00,$s0,8,0
  442. extrwi $acc01,$s0,8,8
  443. stb $acc00,0($out)
  444. extrwi $acc02,$s0,8,16
  445. stb $acc01,1($out)
  446. stb $acc02,2($out)
  447. extrwi $acc04,$s1,8,0
  448. stb $s0,3($out)
  449. extrwi $acc05,$s1,8,8
  450. stb $acc04,4($out)
  451. extrwi $acc06,$s1,8,16
  452. stb $acc05,5($out)
  453. stb $acc06,6($out)
  454. extrwi $acc08,$s2,8,0
  455. stb $s1,7($out)
  456. extrwi $acc09,$s2,8,8
  457. stb $acc08,8($out)
  458. extrwi $acc10,$s2,8,16
  459. stb $acc09,9($out)
  460. stb $acc10,10($out)
  461. extrwi $acc12,$s3,8,0
  462. stb $s2,11($out)
  463. extrwi $acc13,$s3,8,8
  464. stb $acc12,12($out)
  465. extrwi $acc14,$s3,8,16
  466. stb $acc13,13($out)
  467. stb $acc14,14($out)
  468. stb $s3,15($out)
  469. Lenc_done:
  470. $POP r0,`$FRAME+$LRSAVE`($sp)
  471. $POP r14,`$FRAME-$SIZE_T*18`($sp)
  472. $POP r15,`$FRAME-$SIZE_T*17`($sp)
  473. $POP r16,`$FRAME-$SIZE_T*16`($sp)
  474. $POP r17,`$FRAME-$SIZE_T*15`($sp)
  475. $POP r18,`$FRAME-$SIZE_T*14`($sp)
  476. $POP r19,`$FRAME-$SIZE_T*13`($sp)
  477. $POP r20,`$FRAME-$SIZE_T*12`($sp)
  478. $POP r21,`$FRAME-$SIZE_T*11`($sp)
  479. $POP r22,`$FRAME-$SIZE_T*10`($sp)
  480. $POP r23,`$FRAME-$SIZE_T*9`($sp)
  481. $POP r24,`$FRAME-$SIZE_T*8`($sp)
  482. $POP r25,`$FRAME-$SIZE_T*7`($sp)
  483. $POP r26,`$FRAME-$SIZE_T*6`($sp)
  484. $POP r27,`$FRAME-$SIZE_T*5`($sp)
  485. $POP r28,`$FRAME-$SIZE_T*4`($sp)
  486. $POP r29,`$FRAME-$SIZE_T*3`($sp)
  487. $POP r30,`$FRAME-$SIZE_T*2`($sp)
  488. $POP r31,`$FRAME-$SIZE_T*1`($sp)
  489. mtlr r0
  490. addi $sp,$sp,$FRAME
  491. blr
  492. .long 0
  493. .byte 0,12,4,1,0x80,18,3,0
  494. .long 0
  495. .align 5
  496. Lppc_AES_encrypt:
  497. lwz $acc00,240($key)
  498. addi $Tbl1,$Tbl0,3
  499. lwz $t0,0($key)
  500. addi $Tbl2,$Tbl0,2
  501. lwz $t1,4($key)
  502. addi $Tbl3,$Tbl0,1
  503. lwz $t2,8($key)
  504. addi $acc00,$acc00,-1
  505. lwz $t3,12($key)
  506. addi $key,$key,16
  507. xor $s0,$s0,$t0
  508. xor $s1,$s1,$t1
  509. xor $s2,$s2,$t2
  510. xor $s3,$s3,$t3
  511. mtctr $acc00
  512. .align 4
  513. Lenc_loop:
  514. rlwinm $acc00,$s0,`32-24+3`,21,28
  515. rlwinm $acc01,$s1,`32-24+3`,21,28
  516. rlwinm $acc02,$s2,`32-24+3`,21,28
  517. rlwinm $acc03,$s3,`32-24+3`,21,28
  518. lwz $t0,0($key)
  519. rlwinm $acc04,$s1,`32-16+3`,21,28
  520. lwz $t1,4($key)
  521. rlwinm $acc05,$s2,`32-16+3`,21,28
  522. lwz $t2,8($key)
  523. rlwinm $acc06,$s3,`32-16+3`,21,28
  524. lwz $t3,12($key)
  525. rlwinm $acc07,$s0,`32-16+3`,21,28
  526. lwzx $acc00,$Tbl0,$acc00
  527. rlwinm $acc08,$s2,`32-8+3`,21,28
  528. lwzx $acc01,$Tbl0,$acc01
  529. rlwinm $acc09,$s3,`32-8+3`,21,28
  530. lwzx $acc02,$Tbl0,$acc02
  531. rlwinm $acc10,$s0,`32-8+3`,21,28
  532. lwzx $acc03,$Tbl0,$acc03
  533. rlwinm $acc11,$s1,`32-8+3`,21,28
  534. lwzx $acc04,$Tbl1,$acc04
  535. rlwinm $acc12,$s3,`0+3`,21,28
  536. lwzx $acc05,$Tbl1,$acc05
  537. rlwinm $acc13,$s0,`0+3`,21,28
  538. lwzx $acc06,$Tbl1,$acc06
  539. rlwinm $acc14,$s1,`0+3`,21,28
  540. lwzx $acc07,$Tbl1,$acc07
  541. rlwinm $acc15,$s2,`0+3`,21,28
  542. lwzx $acc08,$Tbl2,$acc08
  543. xor $t0,$t0,$acc00
  544. lwzx $acc09,$Tbl2,$acc09
  545. xor $t1,$t1,$acc01
  546. lwzx $acc10,$Tbl2,$acc10
  547. xor $t2,$t2,$acc02
  548. lwzx $acc11,$Tbl2,$acc11
  549. xor $t3,$t3,$acc03
  550. lwzx $acc12,$Tbl3,$acc12
  551. xor $t0,$t0,$acc04
  552. lwzx $acc13,$Tbl3,$acc13
  553. xor $t1,$t1,$acc05
  554. lwzx $acc14,$Tbl3,$acc14
  555. xor $t2,$t2,$acc06
  556. lwzx $acc15,$Tbl3,$acc15
  557. xor $t3,$t3,$acc07
  558. xor $t0,$t0,$acc08
  559. xor $t1,$t1,$acc09
  560. xor $t2,$t2,$acc10
  561. xor $t3,$t3,$acc11
  562. xor $s0,$t0,$acc12
  563. xor $s1,$t1,$acc13
  564. xor $s2,$t2,$acc14
  565. xor $s3,$t3,$acc15
  566. addi $key,$key,16
  567. bdnz Lenc_loop
  568. addi $Tbl2,$Tbl0,2048
  569. nop
  570. lwz $t0,0($key)
  571. rlwinm $acc00,$s0,`32-24`,24,31
  572. lwz $t1,4($key)
  573. rlwinm $acc01,$s1,`32-24`,24,31
  574. lwz $t2,8($key)
  575. rlwinm $acc02,$s2,`32-24`,24,31
  576. lwz $t3,12($key)
  577. rlwinm $acc03,$s3,`32-24`,24,31
  578. lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
  579. rlwinm $acc04,$s1,`32-16`,24,31
  580. lwz $acc09,`2048+32`($Tbl0)
  581. rlwinm $acc05,$s2,`32-16`,24,31
  582. lwz $acc10,`2048+64`($Tbl0)
  583. rlwinm $acc06,$s3,`32-16`,24,31
  584. lwz $acc11,`2048+96`($Tbl0)
  585. rlwinm $acc07,$s0,`32-16`,24,31
  586. lwz $acc12,`2048+128`($Tbl0)
  587. rlwinm $acc08,$s2,`32-8`,24,31
  588. lwz $acc13,`2048+160`($Tbl0)
  589. rlwinm $acc09,$s3,`32-8`,24,31
  590. lwz $acc14,`2048+192`($Tbl0)
  591. rlwinm $acc10,$s0,`32-8`,24,31
  592. lwz $acc15,`2048+224`($Tbl0)
  593. rlwinm $acc11,$s1,`32-8`,24,31
  594. lbzx $acc00,$Tbl2,$acc00
  595. rlwinm $acc12,$s3,`0`,24,31
  596. lbzx $acc01,$Tbl2,$acc01
  597. rlwinm $acc13,$s0,`0`,24,31
  598. lbzx $acc02,$Tbl2,$acc02
  599. rlwinm $acc14,$s1,`0`,24,31
  600. lbzx $acc03,$Tbl2,$acc03
  601. rlwinm $acc15,$s2,`0`,24,31
  602. lbzx $acc04,$Tbl2,$acc04
  603. rlwinm $s0,$acc00,24,0,7
  604. lbzx $acc05,$Tbl2,$acc05
  605. rlwinm $s1,$acc01,24,0,7
  606. lbzx $acc06,$Tbl2,$acc06
  607. rlwinm $s2,$acc02,24,0,7
  608. lbzx $acc07,$Tbl2,$acc07
  609. rlwinm $s3,$acc03,24,0,7
  610. lbzx $acc08,$Tbl2,$acc08
  611. rlwimi $s0,$acc04,16,8,15
  612. lbzx $acc09,$Tbl2,$acc09
  613. rlwimi $s1,$acc05,16,8,15
  614. lbzx $acc10,$Tbl2,$acc10
  615. rlwimi $s2,$acc06,16,8,15
  616. lbzx $acc11,$Tbl2,$acc11
  617. rlwimi $s3,$acc07,16,8,15
  618. lbzx $acc12,$Tbl2,$acc12
  619. rlwimi $s0,$acc08,8,16,23
  620. lbzx $acc13,$Tbl2,$acc13
  621. rlwimi $s1,$acc09,8,16,23
  622. lbzx $acc14,$Tbl2,$acc14
  623. rlwimi $s2,$acc10,8,16,23
  624. lbzx $acc15,$Tbl2,$acc15
  625. rlwimi $s3,$acc11,8,16,23
  626. or $s0,$s0,$acc12
  627. or $s1,$s1,$acc13
  628. or $s2,$s2,$acc14
  629. or $s3,$s3,$acc15
  630. xor $s0,$s0,$t0
  631. xor $s1,$s1,$t1
  632. xor $s2,$s2,$t2
  633. xor $s3,$s3,$t3
  634. blr
  635. .long 0
  636. .byte 0,12,0x14,0,0,0,0,0
  637. .align 4
  638. Lppc_AES_encrypt_compact:
  639. lwz $acc00,240($key)
  640. addi $Tbl1,$Tbl0,2048
  641. lwz $t0,0($key)
  642. lis $mask80,0x8080
  643. lwz $t1,4($key)
  644. lis $mask1b,0x1b1b
  645. lwz $t2,8($key)
  646. ori $mask80,$mask80,0x8080
  647. lwz $t3,12($key)
  648. ori $mask1b,$mask1b,0x1b1b
  649. addi $key,$key,16
  650. mtctr $acc00
  651. .align 4
  652. Lenc_compact_loop:
  653. xor $s0,$s0,$t0
  654. xor $s1,$s1,$t1
  655. rlwinm $acc00,$s0,`32-24`,24,31
  656. xor $s2,$s2,$t2
  657. rlwinm $acc01,$s1,`32-24`,24,31
  658. xor $s3,$s3,$t3
  659. rlwinm $acc02,$s2,`32-24`,24,31
  660. rlwinm $acc03,$s3,`32-24`,24,31
  661. rlwinm $acc04,$s1,`32-16`,24,31
  662. rlwinm $acc05,$s2,`32-16`,24,31
  663. rlwinm $acc06,$s3,`32-16`,24,31
  664. rlwinm $acc07,$s0,`32-16`,24,31
  665. lbzx $acc00,$Tbl1,$acc00
  666. rlwinm $acc08,$s2,`32-8`,24,31
  667. lbzx $acc01,$Tbl1,$acc01
  668. rlwinm $acc09,$s3,`32-8`,24,31
  669. lbzx $acc02,$Tbl1,$acc02
  670. rlwinm $acc10,$s0,`32-8`,24,31
  671. lbzx $acc03,$Tbl1,$acc03
  672. rlwinm $acc11,$s1,`32-8`,24,31
  673. lbzx $acc04,$Tbl1,$acc04
  674. rlwinm $acc12,$s3,`0`,24,31
  675. lbzx $acc05,$Tbl1,$acc05
  676. rlwinm $acc13,$s0,`0`,24,31
  677. lbzx $acc06,$Tbl1,$acc06
  678. rlwinm $acc14,$s1,`0`,24,31
  679. lbzx $acc07,$Tbl1,$acc07
  680. rlwinm $acc15,$s2,`0`,24,31
  681. lbzx $acc08,$Tbl1,$acc08
  682. rlwinm $s0,$acc00,24,0,7
  683. lbzx $acc09,$Tbl1,$acc09
  684. rlwinm $s1,$acc01,24,0,7
  685. lbzx $acc10,$Tbl1,$acc10
  686. rlwinm $s2,$acc02,24,0,7
  687. lbzx $acc11,$Tbl1,$acc11
  688. rlwinm $s3,$acc03,24,0,7
  689. lbzx $acc12,$Tbl1,$acc12
  690. rlwimi $s0,$acc04,16,8,15
  691. lbzx $acc13,$Tbl1,$acc13
  692. rlwimi $s1,$acc05,16,8,15
  693. lbzx $acc14,$Tbl1,$acc14
  694. rlwimi $s2,$acc06,16,8,15
  695. lbzx $acc15,$Tbl1,$acc15
  696. rlwimi $s3,$acc07,16,8,15
  697. rlwimi $s0,$acc08,8,16,23
  698. rlwimi $s1,$acc09,8,16,23
  699. rlwimi $s2,$acc10,8,16,23
  700. rlwimi $s3,$acc11,8,16,23
  701. lwz $t0,0($key)
  702. or $s0,$s0,$acc12
  703. lwz $t1,4($key)
  704. or $s1,$s1,$acc13
  705. lwz $t2,8($key)
  706. or $s2,$s2,$acc14
  707. lwz $t3,12($key)
  708. or $s3,$s3,$acc15
  709. addi $key,$key,16
  710. bdz Lenc_compact_done
  711. and $acc00,$s0,$mask80 # r1=r0&0x80808080
  712. and $acc01,$s1,$mask80
  713. and $acc02,$s2,$mask80
  714. and $acc03,$s3,$mask80
  715. srwi $acc04,$acc00,7 # r1>>7
  716. andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
  717. srwi $acc05,$acc01,7
  718. andc $acc09,$s1,$mask80
  719. srwi $acc06,$acc02,7
  720. andc $acc10,$s2,$mask80
  721. srwi $acc07,$acc03,7
  722. andc $acc11,$s3,$mask80
  723. sub $acc00,$acc00,$acc04 # r1-(r1>>7)
  724. sub $acc01,$acc01,$acc05
  725. sub $acc02,$acc02,$acc06
  726. sub $acc03,$acc03,$acc07
  727. add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
  728. add $acc09,$acc09,$acc09
  729. add $acc10,$acc10,$acc10
  730. add $acc11,$acc11,$acc11
  731. and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  732. and $acc01,$acc01,$mask1b
  733. and $acc02,$acc02,$mask1b
  734. and $acc03,$acc03,$mask1b
  735. xor $acc00,$acc00,$acc08 # r2
  736. xor $acc01,$acc01,$acc09
  737. rotlwi $acc12,$s0,16 # ROTATE(r0,16)
  738. xor $acc02,$acc02,$acc10
  739. rotlwi $acc13,$s1,16
  740. xor $acc03,$acc03,$acc11
  741. rotlwi $acc14,$s2,16
  742. xor $s0,$s0,$acc00 # r0^r2
  743. rotlwi $acc15,$s3,16
  744. xor $s1,$s1,$acc01
  745. rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
  746. xor $s2,$s2,$acc02
  747. rotrwi $s1,$s1,24
  748. xor $s3,$s3,$acc03
  749. rotrwi $s2,$s2,24
  750. xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
  751. rotrwi $s3,$s3,24
  752. xor $s1,$s1,$acc01
  753. xor $s2,$s2,$acc02
  754. xor $s3,$s3,$acc03
  755. rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
  756. xor $s0,$s0,$acc12 #
  757. rotlwi $acc09,$acc13,8
  758. xor $s1,$s1,$acc13
  759. rotlwi $acc10,$acc14,8
  760. xor $s2,$s2,$acc14
  761. rotlwi $acc11,$acc15,8
  762. xor $s3,$s3,$acc15
  763. xor $s0,$s0,$acc08 #
  764. xor $s1,$s1,$acc09
  765. xor $s2,$s2,$acc10
  766. xor $s3,$s3,$acc11
  767. b Lenc_compact_loop
  768. .align 4
  769. Lenc_compact_done:
  770. xor $s0,$s0,$t0
  771. xor $s1,$s1,$t1
  772. xor $s2,$s2,$t2
  773. xor $s3,$s3,$t3
  774. blr
  775. .long 0
  776. .byte 0,12,0x14,0,0,0,0,0
  777. .size .AES_encrypt,.-.AES_encrypt
  778. .globl .AES_decrypt
  779. .align 7
  780. .AES_decrypt:
  781. $STU $sp,-$FRAME($sp)
  782. mflr r0
  783. $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
  784. $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
  785. $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
  786. $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
  787. $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
  788. $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
  789. $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
  790. $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
  791. $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
  792. $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
  793. $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
  794. $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
  795. $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
  796. $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
  797. $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
  798. $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
  799. $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
  800. $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
  801. $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
  802. $PUSH r0,`$FRAME+$LRSAVE`($sp)
  803. andi. $t0,$inp,3
  804. andi. $t1,$out,3
  805. or. $t0,$t0,$t1
  806. bne Ldec_unaligned
  807. Ldec_unaligned_ok:
  808. ___
  809. $code.=<<___ if (!$LITTLE_ENDIAN);
  810. lwz $s0,0($inp)
  811. lwz $s1,4($inp)
  812. lwz $s2,8($inp)
  813. lwz $s3,12($inp)
  814. ___
  815. $code.=<<___ if ($LITTLE_ENDIAN);
  816. lwz $t0,0($inp)
  817. lwz $t1,4($inp)
  818. lwz $t2,8($inp)
  819. lwz $t3,12($inp)
  820. rotlwi $s0,$t0,8
  821. rotlwi $s1,$t1,8
  822. rotlwi $s2,$t2,8
  823. rotlwi $s3,$t3,8
  824. rlwimi $s0,$t0,24,0,7
  825. rlwimi $s1,$t1,24,0,7
  826. rlwimi $s2,$t2,24,0,7
  827. rlwimi $s3,$t3,24,0,7
  828. rlwimi $s0,$t0,24,16,23
  829. rlwimi $s1,$t1,24,16,23
  830. rlwimi $s2,$t2,24,16,23
  831. rlwimi $s3,$t3,24,16,23
  832. ___
  833. $code.=<<___;
  834. bl LAES_Td
  835. bl Lppc_AES_decrypt_compact
  836. $POP $out,`$FRAME-$SIZE_T*19`($sp)
  837. ___
  838. $code.=<<___ if ($LITTLE_ENDIAN);
  839. rotlwi $t0,$s0,8
  840. rotlwi $t1,$s1,8
  841. rotlwi $t2,$s2,8
  842. rotlwi $t3,$s3,8
  843. rlwimi $t0,$s0,24,0,7
  844. rlwimi $t1,$s1,24,0,7
  845. rlwimi $t2,$s2,24,0,7
  846. rlwimi $t3,$s3,24,0,7
  847. rlwimi $t0,$s0,24,16,23
  848. rlwimi $t1,$s1,24,16,23
  849. rlwimi $t2,$s2,24,16,23
  850. rlwimi $t3,$s3,24,16,23
  851. stw $t0,0($out)
  852. stw $t1,4($out)
  853. stw $t2,8($out)
  854. stw $t3,12($out)
  855. ___
  856. $code.=<<___ if (!$LITTLE_ENDIAN);
  857. stw $s0,0($out)
  858. stw $s1,4($out)
  859. stw $s2,8($out)
  860. stw $s3,12($out)
  861. ___
  862. $code.=<<___;
  863. b Ldec_done
  864. Ldec_unaligned:
  865. subfic $t0,$inp,4096
  866. subfic $t1,$out,4096
  867. andi. $t0,$t0,4096-16
  868. beq Ldec_xpage
  869. andi. $t1,$t1,4096-16
  870. bne Ldec_unaligned_ok
  871. Ldec_xpage:
  872. lbz $acc00,0($inp)
  873. lbz $acc01,1($inp)
  874. lbz $acc02,2($inp)
  875. lbz $s0,3($inp)
  876. lbz $acc04,4($inp)
  877. lbz $acc05,5($inp)
  878. lbz $acc06,6($inp)
  879. lbz $s1,7($inp)
  880. lbz $acc08,8($inp)
  881. lbz $acc09,9($inp)
  882. lbz $acc10,10($inp)
  883. insrwi $s0,$acc00,8,0
  884. lbz $s2,11($inp)
  885. insrwi $s1,$acc04,8,0
  886. lbz $acc12,12($inp)
  887. insrwi $s0,$acc01,8,8
  888. lbz $acc13,13($inp)
  889. insrwi $s1,$acc05,8,8
  890. lbz $acc14,14($inp)
  891. insrwi $s0,$acc02,8,16
  892. lbz $s3,15($inp)
  893. insrwi $s1,$acc06,8,16
  894. insrwi $s2,$acc08,8,0
  895. insrwi $s3,$acc12,8,0
  896. insrwi $s2,$acc09,8,8
  897. insrwi $s3,$acc13,8,8
  898. insrwi $s2,$acc10,8,16
  899. insrwi $s3,$acc14,8,16
  900. bl LAES_Td
  901. bl Lppc_AES_decrypt_compact
  902. $POP $out,`$FRAME-$SIZE_T*19`($sp)
  903. extrwi $acc00,$s0,8,0
  904. extrwi $acc01,$s0,8,8
  905. stb $acc00,0($out)
  906. extrwi $acc02,$s0,8,16
  907. stb $acc01,1($out)
  908. stb $acc02,2($out)
  909. extrwi $acc04,$s1,8,0
  910. stb $s0,3($out)
  911. extrwi $acc05,$s1,8,8
  912. stb $acc04,4($out)
  913. extrwi $acc06,$s1,8,16
  914. stb $acc05,5($out)
  915. stb $acc06,6($out)
  916. extrwi $acc08,$s2,8,0
  917. stb $s1,7($out)
  918. extrwi $acc09,$s2,8,8
  919. stb $acc08,8($out)
  920. extrwi $acc10,$s2,8,16
  921. stb $acc09,9($out)
  922. stb $acc10,10($out)
  923. extrwi $acc12,$s3,8,0
  924. stb $s2,11($out)
  925. extrwi $acc13,$s3,8,8
  926. stb $acc12,12($out)
  927. extrwi $acc14,$s3,8,16
  928. stb $acc13,13($out)
  929. stb $acc14,14($out)
  930. stb $s3,15($out)
  931. Ldec_done:
  932. $POP r0,`$FRAME+$LRSAVE`($sp)
  933. $POP r14,`$FRAME-$SIZE_T*18`($sp)
  934. $POP r15,`$FRAME-$SIZE_T*17`($sp)
  935. $POP r16,`$FRAME-$SIZE_T*16`($sp)
  936. $POP r17,`$FRAME-$SIZE_T*15`($sp)
  937. $POP r18,`$FRAME-$SIZE_T*14`($sp)
  938. $POP r19,`$FRAME-$SIZE_T*13`($sp)
  939. $POP r20,`$FRAME-$SIZE_T*12`($sp)
  940. $POP r21,`$FRAME-$SIZE_T*11`($sp)
  941. $POP r22,`$FRAME-$SIZE_T*10`($sp)
  942. $POP r23,`$FRAME-$SIZE_T*9`($sp)
  943. $POP r24,`$FRAME-$SIZE_T*8`($sp)
  944. $POP r25,`$FRAME-$SIZE_T*7`($sp)
  945. $POP r26,`$FRAME-$SIZE_T*6`($sp)
  946. $POP r27,`$FRAME-$SIZE_T*5`($sp)
  947. $POP r28,`$FRAME-$SIZE_T*4`($sp)
  948. $POP r29,`$FRAME-$SIZE_T*3`($sp)
  949. $POP r30,`$FRAME-$SIZE_T*2`($sp)
  950. $POP r31,`$FRAME-$SIZE_T*1`($sp)
  951. mtlr r0
  952. addi $sp,$sp,$FRAME
  953. blr
  954. .long 0
  955. .byte 0,12,4,1,0x80,18,3,0
  956. .long 0
  957. .align 5
  958. Lppc_AES_decrypt:
  959. lwz $acc00,240($key)
  960. addi $Tbl1,$Tbl0,3
  961. lwz $t0,0($key)
  962. addi $Tbl2,$Tbl0,2
  963. lwz $t1,4($key)
  964. addi $Tbl3,$Tbl0,1
  965. lwz $t2,8($key)
  966. addi $acc00,$acc00,-1
  967. lwz $t3,12($key)
  968. addi $key,$key,16
  969. xor $s0,$s0,$t0
  970. xor $s1,$s1,$t1
  971. xor $s2,$s2,$t2
  972. xor $s3,$s3,$t3
  973. mtctr $acc00
  974. .align 4
  975. Ldec_loop:
  976. rlwinm $acc00,$s0,`32-24+3`,21,28
  977. rlwinm $acc01,$s1,`32-24+3`,21,28
  978. rlwinm $acc02,$s2,`32-24+3`,21,28
  979. rlwinm $acc03,$s3,`32-24+3`,21,28
  980. lwz $t0,0($key)
  981. rlwinm $acc04,$s3,`32-16+3`,21,28
  982. lwz $t1,4($key)
  983. rlwinm $acc05,$s0,`32-16+3`,21,28
  984. lwz $t2,8($key)
  985. rlwinm $acc06,$s1,`32-16+3`,21,28
  986. lwz $t3,12($key)
  987. rlwinm $acc07,$s2,`32-16+3`,21,28
  988. lwzx $acc00,$Tbl0,$acc00
  989. rlwinm $acc08,$s2,`32-8+3`,21,28
  990. lwzx $acc01,$Tbl0,$acc01
  991. rlwinm $acc09,$s3,`32-8+3`,21,28
  992. lwzx $acc02,$Tbl0,$acc02
  993. rlwinm $acc10,$s0,`32-8+3`,21,28
  994. lwzx $acc03,$Tbl0,$acc03
  995. rlwinm $acc11,$s1,`32-8+3`,21,28
  996. lwzx $acc04,$Tbl1,$acc04
  997. rlwinm $acc12,$s1,`0+3`,21,28
  998. lwzx $acc05,$Tbl1,$acc05
  999. rlwinm $acc13,$s2,`0+3`,21,28
  1000. lwzx $acc06,$Tbl1,$acc06
  1001. rlwinm $acc14,$s3,`0+3`,21,28
  1002. lwzx $acc07,$Tbl1,$acc07
  1003. rlwinm $acc15,$s0,`0+3`,21,28
  1004. lwzx $acc08,$Tbl2,$acc08
  1005. xor $t0,$t0,$acc00
  1006. lwzx $acc09,$Tbl2,$acc09
  1007. xor $t1,$t1,$acc01
  1008. lwzx $acc10,$Tbl2,$acc10
  1009. xor $t2,$t2,$acc02
  1010. lwzx $acc11,$Tbl2,$acc11
  1011. xor $t3,$t3,$acc03
  1012. lwzx $acc12,$Tbl3,$acc12
  1013. xor $t0,$t0,$acc04
  1014. lwzx $acc13,$Tbl3,$acc13
  1015. xor $t1,$t1,$acc05
  1016. lwzx $acc14,$Tbl3,$acc14
  1017. xor $t2,$t2,$acc06
  1018. lwzx $acc15,$Tbl3,$acc15
  1019. xor $t3,$t3,$acc07
  1020. xor $t0,$t0,$acc08
  1021. xor $t1,$t1,$acc09
  1022. xor $t2,$t2,$acc10
  1023. xor $t3,$t3,$acc11
  1024. xor $s0,$t0,$acc12
  1025. xor $s1,$t1,$acc13
  1026. xor $s2,$t2,$acc14
  1027. xor $s3,$t3,$acc15
  1028. addi $key,$key,16
  1029. bdnz Ldec_loop
  1030. addi $Tbl2,$Tbl0,2048
  1031. nop
  1032. lwz $t0,0($key)
  1033. rlwinm $acc00,$s0,`32-24`,24,31
  1034. lwz $t1,4($key)
  1035. rlwinm $acc01,$s1,`32-24`,24,31
  1036. lwz $t2,8($key)
  1037. rlwinm $acc02,$s2,`32-24`,24,31
  1038. lwz $t3,12($key)
  1039. rlwinm $acc03,$s3,`32-24`,24,31
  1040. lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
  1041. rlwinm $acc04,$s3,`32-16`,24,31
  1042. lwz $acc09,`2048+32`($Tbl0)
  1043. rlwinm $acc05,$s0,`32-16`,24,31
  1044. lwz $acc10,`2048+64`($Tbl0)
  1045. lbzx $acc00,$Tbl2,$acc00
  1046. lwz $acc11,`2048+96`($Tbl0)
  1047. lbzx $acc01,$Tbl2,$acc01
  1048. lwz $acc12,`2048+128`($Tbl0)
  1049. rlwinm $acc06,$s1,`32-16`,24,31
  1050. lwz $acc13,`2048+160`($Tbl0)
  1051. rlwinm $acc07,$s2,`32-16`,24,31
  1052. lwz $acc14,`2048+192`($Tbl0)
  1053. rlwinm $acc08,$s2,`32-8`,24,31
  1054. lwz $acc15,`2048+224`($Tbl0)
  1055. rlwinm $acc09,$s3,`32-8`,24,31
  1056. lbzx $acc02,$Tbl2,$acc02
  1057. rlwinm $acc10,$s0,`32-8`,24,31
  1058. lbzx $acc03,$Tbl2,$acc03
  1059. rlwinm $acc11,$s1,`32-8`,24,31
  1060. lbzx $acc04,$Tbl2,$acc04
  1061. rlwinm $acc12,$s1,`0`,24,31
  1062. lbzx $acc05,$Tbl2,$acc05
  1063. rlwinm $acc13,$s2,`0`,24,31
  1064. lbzx $acc06,$Tbl2,$acc06
  1065. rlwinm $acc14,$s3,`0`,24,31
  1066. lbzx $acc07,$Tbl2,$acc07
  1067. rlwinm $acc15,$s0,`0`,24,31
  1068. lbzx $acc08,$Tbl2,$acc08
  1069. rlwinm $s0,$acc00,24,0,7
  1070. lbzx $acc09,$Tbl2,$acc09
  1071. rlwinm $s1,$acc01,24,0,7
  1072. lbzx $acc10,$Tbl2,$acc10
  1073. rlwinm $s2,$acc02,24,0,7
  1074. lbzx $acc11,$Tbl2,$acc11
  1075. rlwinm $s3,$acc03,24,0,7
  1076. lbzx $acc12,$Tbl2,$acc12
  1077. rlwimi $s0,$acc04,16,8,15
  1078. lbzx $acc13,$Tbl2,$acc13
  1079. rlwimi $s1,$acc05,16,8,15
  1080. lbzx $acc14,$Tbl2,$acc14
  1081. rlwimi $s2,$acc06,16,8,15
  1082. lbzx $acc15,$Tbl2,$acc15
  1083. rlwimi $s3,$acc07,16,8,15
  1084. rlwimi $s0,$acc08,8,16,23
  1085. rlwimi $s1,$acc09,8,16,23
  1086. rlwimi $s2,$acc10,8,16,23
  1087. rlwimi $s3,$acc11,8,16,23
  1088. or $s0,$s0,$acc12
  1089. or $s1,$s1,$acc13
  1090. or $s2,$s2,$acc14
  1091. or $s3,$s3,$acc15
  1092. xor $s0,$s0,$t0
  1093. xor $s1,$s1,$t1
  1094. xor $s2,$s2,$t2
  1095. xor $s3,$s3,$t3
  1096. blr
  1097. .long 0
  1098. .byte 0,12,0x14,0,0,0,0,0
  1099. .align 4
  1100. Lppc_AES_decrypt_compact:
  1101. lwz $acc00,240($key)
  1102. addi $Tbl1,$Tbl0,2048
  1103. lwz $t0,0($key)
  1104. lis $mask80,0x8080
  1105. lwz $t1,4($key)
  1106. lis $mask1b,0x1b1b
  1107. lwz $t2,8($key)
  1108. ori $mask80,$mask80,0x8080
  1109. lwz $t3,12($key)
  1110. ori $mask1b,$mask1b,0x1b1b
  1111. addi $key,$key,16
  1112. ___
  1113. $code.=<<___ if ($SIZE_T==8);
  1114. insrdi $mask80,$mask80,32,0
  1115. insrdi $mask1b,$mask1b,32,0
  1116. ___
  1117. $code.=<<___;
  1118. mtctr $acc00
  1119. .align 4
  1120. Ldec_compact_loop:
  1121. xor $s0,$s0,$t0
  1122. xor $s1,$s1,$t1
  1123. rlwinm $acc00,$s0,`32-24`,24,31
  1124. xor $s2,$s2,$t2
  1125. rlwinm $acc01,$s1,`32-24`,24,31
  1126. xor $s3,$s3,$t3
  1127. rlwinm $acc02,$s2,`32-24`,24,31
  1128. rlwinm $acc03,$s3,`32-24`,24,31
  1129. rlwinm $acc04,$s3,`32-16`,24,31
  1130. rlwinm $acc05,$s0,`32-16`,24,31
  1131. rlwinm $acc06,$s1,`32-16`,24,31
  1132. rlwinm $acc07,$s2,`32-16`,24,31
  1133. lbzx $acc00,$Tbl1,$acc00
  1134. rlwinm $acc08,$s2,`32-8`,24,31
  1135. lbzx $acc01,$Tbl1,$acc01
  1136. rlwinm $acc09,$s3,`32-8`,24,31
  1137. lbzx $acc02,$Tbl1,$acc02
  1138. rlwinm $acc10,$s0,`32-8`,24,31
  1139. lbzx $acc03,$Tbl1,$acc03
  1140. rlwinm $acc11,$s1,`32-8`,24,31
  1141. lbzx $acc04,$Tbl1,$acc04
  1142. rlwinm $acc12,$s1,`0`,24,31
  1143. lbzx $acc05,$Tbl1,$acc05
  1144. rlwinm $acc13,$s2,`0`,24,31
  1145. lbzx $acc06,$Tbl1,$acc06
  1146. rlwinm $acc14,$s3,`0`,24,31
  1147. lbzx $acc07,$Tbl1,$acc07
  1148. rlwinm $acc15,$s0,`0`,24,31
  1149. lbzx $acc08,$Tbl1,$acc08
  1150. rlwinm $s0,$acc00,24,0,7
  1151. lbzx $acc09,$Tbl1,$acc09
  1152. rlwinm $s1,$acc01,24,0,7
  1153. lbzx $acc10,$Tbl1,$acc10
  1154. rlwinm $s2,$acc02,24,0,7
  1155. lbzx $acc11,$Tbl1,$acc11
  1156. rlwinm $s3,$acc03,24,0,7
  1157. lbzx $acc12,$Tbl1,$acc12
  1158. rlwimi $s0,$acc04,16,8,15
  1159. lbzx $acc13,$Tbl1,$acc13
  1160. rlwimi $s1,$acc05,16,8,15
  1161. lbzx $acc14,$Tbl1,$acc14
  1162. rlwimi $s2,$acc06,16,8,15
  1163. lbzx $acc15,$Tbl1,$acc15
  1164. rlwimi $s3,$acc07,16,8,15
  1165. rlwimi $s0,$acc08,8,16,23
  1166. rlwimi $s1,$acc09,8,16,23
  1167. rlwimi $s2,$acc10,8,16,23
  1168. rlwimi $s3,$acc11,8,16,23
  1169. lwz $t0,0($key)
  1170. or $s0,$s0,$acc12
  1171. lwz $t1,4($key)
  1172. or $s1,$s1,$acc13
  1173. lwz $t2,8($key)
  1174. or $s2,$s2,$acc14
  1175. lwz $t3,12($key)
  1176. or $s3,$s3,$acc15
  1177. addi $key,$key,16
  1178. bdz Ldec_compact_done
  1179. ___
  1180. $code.=<<___ if ($SIZE_T==8);
  1181. # vectorized permutation improves decrypt performance by 10%
  1182. insrdi $s0,$s1,32,0
  1183. insrdi $s2,$s3,32,0
  1184. and $acc00,$s0,$mask80 # r1=r0&0x80808080
  1185. and $acc02,$s2,$mask80
  1186. srdi $acc04,$acc00,7 # r1>>7
  1187. srdi $acc06,$acc02,7
  1188. andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
  1189. andc $acc10,$s2,$mask80
  1190. sub $acc00,$acc00,$acc04 # r1-(r1>>7)
  1191. sub $acc02,$acc02,$acc06
  1192. add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
  1193. add $acc10,$acc10,$acc10
  1194. and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1195. and $acc02,$acc02,$mask1b
  1196. xor $acc00,$acc00,$acc08 # r2
  1197. xor $acc02,$acc02,$acc10
  1198. and $acc04,$acc00,$mask80 # r1=r2&0x80808080
  1199. and $acc06,$acc02,$mask80
  1200. srdi $acc08,$acc04,7 # r1>>7
  1201. srdi $acc10,$acc06,7
  1202. andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
  1203. andc $acc14,$acc02,$mask80
  1204. sub $acc04,$acc04,$acc08 # r1-(r1>>7)
  1205. sub $acc06,$acc06,$acc10
  1206. add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
  1207. add $acc14,$acc14,$acc14
  1208. and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1209. and $acc06,$acc06,$mask1b
  1210. xor $acc04,$acc04,$acc12 # r4
  1211. xor $acc06,$acc06,$acc14
  1212. and $acc08,$acc04,$mask80 # r1=r4&0x80808080
  1213. and $acc10,$acc06,$mask80
  1214. srdi $acc12,$acc08,7 # r1>>7
  1215. srdi $acc14,$acc10,7
  1216. sub $acc08,$acc08,$acc12 # r1-(r1>>7)
  1217. sub $acc10,$acc10,$acc14
  1218. andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
  1219. andc $acc14,$acc06,$mask80
  1220. add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
  1221. add $acc14,$acc14,$acc14
  1222. and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1223. and $acc10,$acc10,$mask1b
  1224. xor $acc08,$acc08,$acc12 # r8
  1225. xor $acc10,$acc10,$acc14
  1226. xor $acc00,$acc00,$s0 # r2^r0
  1227. xor $acc02,$acc02,$s2
  1228. xor $acc04,$acc04,$s0 # r4^r0
  1229. xor $acc06,$acc06,$s2
  1230. extrdi $acc01,$acc00,32,0
  1231. extrdi $acc03,$acc02,32,0
  1232. extrdi $acc05,$acc04,32,0
  1233. extrdi $acc07,$acc06,32,0
  1234. extrdi $acc09,$acc08,32,0
  1235. extrdi $acc11,$acc10,32,0
  1236. ___
  1237. $code.=<<___ if ($SIZE_T==4);
  1238. and $acc00,$s0,$mask80 # r1=r0&0x80808080
  1239. and $acc01,$s1,$mask80
  1240. and $acc02,$s2,$mask80
  1241. and $acc03,$s3,$mask80
  1242. srwi $acc04,$acc00,7 # r1>>7
  1243. andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
  1244. srwi $acc05,$acc01,7
  1245. andc $acc09,$s1,$mask80
  1246. srwi $acc06,$acc02,7
  1247. andc $acc10,$s2,$mask80
  1248. srwi $acc07,$acc03,7
  1249. andc $acc11,$s3,$mask80
  1250. sub $acc00,$acc00,$acc04 # r1-(r1>>7)
  1251. sub $acc01,$acc01,$acc05
  1252. sub $acc02,$acc02,$acc06
  1253. sub $acc03,$acc03,$acc07
  1254. add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
  1255. add $acc09,$acc09,$acc09
  1256. add $acc10,$acc10,$acc10
  1257. add $acc11,$acc11,$acc11
  1258. and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1259. and $acc01,$acc01,$mask1b
  1260. and $acc02,$acc02,$mask1b
  1261. and $acc03,$acc03,$mask1b
  1262. xor $acc00,$acc00,$acc08 # r2
  1263. xor $acc01,$acc01,$acc09
  1264. xor $acc02,$acc02,$acc10
  1265. xor $acc03,$acc03,$acc11
  1266. and $acc04,$acc00,$mask80 # r1=r2&0x80808080
  1267. and $acc05,$acc01,$mask80
  1268. and $acc06,$acc02,$mask80
  1269. and $acc07,$acc03,$mask80
  1270. srwi $acc08,$acc04,7 # r1>>7
  1271. andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
  1272. srwi $acc09,$acc05,7
  1273. andc $acc13,$acc01,$mask80
  1274. srwi $acc10,$acc06,7
  1275. andc $acc14,$acc02,$mask80
  1276. srwi $acc11,$acc07,7
  1277. andc $acc15,$acc03,$mask80
  1278. sub $acc04,$acc04,$acc08 # r1-(r1>>7)
  1279. sub $acc05,$acc05,$acc09
  1280. sub $acc06,$acc06,$acc10
  1281. sub $acc07,$acc07,$acc11
  1282. add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
  1283. add $acc13,$acc13,$acc13
  1284. add $acc14,$acc14,$acc14
  1285. add $acc15,$acc15,$acc15
  1286. and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1287. and $acc05,$acc05,$mask1b
  1288. and $acc06,$acc06,$mask1b
  1289. and $acc07,$acc07,$mask1b
  1290. xor $acc04,$acc04,$acc12 # r4
  1291. xor $acc05,$acc05,$acc13
  1292. xor $acc06,$acc06,$acc14
  1293. xor $acc07,$acc07,$acc15
  1294. and $acc08,$acc04,$mask80 # r1=r4&0x80808080
  1295. and $acc09,$acc05,$mask80
  1296. srwi $acc12,$acc08,7 # r1>>7
  1297. and $acc10,$acc06,$mask80
  1298. srwi $acc13,$acc09,7
  1299. and $acc11,$acc07,$mask80
  1300. srwi $acc14,$acc10,7
  1301. sub $acc08,$acc08,$acc12 # r1-(r1>>7)
  1302. srwi $acc15,$acc11,7
  1303. sub $acc09,$acc09,$acc13
  1304. sub $acc10,$acc10,$acc14
  1305. sub $acc11,$acc11,$acc15
  1306. andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
  1307. andc $acc13,$acc05,$mask80
  1308. andc $acc14,$acc06,$mask80
  1309. andc $acc15,$acc07,$mask80
  1310. add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
  1311. add $acc13,$acc13,$acc13
  1312. add $acc14,$acc14,$acc14
  1313. add $acc15,$acc15,$acc15
  1314. and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1315. and $acc09,$acc09,$mask1b
  1316. and $acc10,$acc10,$mask1b
  1317. and $acc11,$acc11,$mask1b
  1318. xor $acc08,$acc08,$acc12 # r8
  1319. xor $acc09,$acc09,$acc13
  1320. xor $acc10,$acc10,$acc14
  1321. xor $acc11,$acc11,$acc15
  1322. xor $acc00,$acc00,$s0 # r2^r0
  1323. xor $acc01,$acc01,$s1
  1324. xor $acc02,$acc02,$s2
  1325. xor $acc03,$acc03,$s3
  1326. xor $acc04,$acc04,$s0 # r4^r0
  1327. xor $acc05,$acc05,$s1
  1328. xor $acc06,$acc06,$s2
  1329. xor $acc07,$acc07,$s3
  1330. ___
  1331. $code.=<<___;
  1332. rotrwi $s0,$s0,8 # = ROTATE(r0,8)
  1333. rotrwi $s1,$s1,8
  1334. xor $s0,$s0,$acc00 # ^= r2^r0
  1335. rotrwi $s2,$s2,8
  1336. xor $s1,$s1,$acc01
  1337. rotrwi $s3,$s3,8
  1338. xor $s2,$s2,$acc02
  1339. xor $s3,$s3,$acc03
  1340. xor $acc00,$acc00,$acc08
  1341. xor $acc01,$acc01,$acc09
  1342. xor $acc02,$acc02,$acc10
  1343. xor $acc03,$acc03,$acc11
  1344. xor $s0,$s0,$acc04 # ^= r4^r0
  1345. rotrwi $acc00,$acc00,24
  1346. xor $s1,$s1,$acc05
  1347. rotrwi $acc01,$acc01,24
  1348. xor $s2,$s2,$acc06
  1349. rotrwi $acc02,$acc02,24
  1350. xor $s3,$s3,$acc07
  1351. rotrwi $acc03,$acc03,24
  1352. xor $acc04,$acc04,$acc08
  1353. xor $acc05,$acc05,$acc09
  1354. xor $acc06,$acc06,$acc10
  1355. xor $acc07,$acc07,$acc11
  1356. xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
  1357. rotrwi $acc04,$acc04,16
  1358. xor $s1,$s1,$acc09
  1359. rotrwi $acc05,$acc05,16
  1360. xor $s2,$s2,$acc10
  1361. rotrwi $acc06,$acc06,16
  1362. xor $s3,$s3,$acc11
  1363. rotrwi $acc07,$acc07,16
  1364. xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
  1365. rotrwi $acc08,$acc08,8
  1366. xor $s1,$s1,$acc01
  1367. rotrwi $acc09,$acc09,8
  1368. xor $s2,$s2,$acc02
  1369. rotrwi $acc10,$acc10,8
  1370. xor $s3,$s3,$acc03
  1371. rotrwi $acc11,$acc11,8
  1372. xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
  1373. xor $s1,$s1,$acc05
  1374. xor $s2,$s2,$acc06
  1375. xor $s3,$s3,$acc07
  1376. xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
  1377. xor $s1,$s1,$acc09
  1378. xor $s2,$s2,$acc10
  1379. xor $s3,$s3,$acc11
  1380. b Ldec_compact_loop
  1381. .align 4
  1382. Ldec_compact_done:
  1383. xor $s0,$s0,$t0
  1384. xor $s1,$s1,$t1
  1385. xor $s2,$s2,$t2
  1386. xor $s3,$s3,$t3
  1387. blr
  1388. .long 0
  1389. .byte 0,12,0x14,0,0,0,0,0
  1390. .size .AES_decrypt,.-.AES_decrypt
  1391. .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
  1392. .align 7
  1393. ___
  1394. $code =~ s/\`([^\`]*)\`/eval $1/gem;
  1395. print $code;
  1396. close STDOUT;