aes-ppc.pl 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365
  1. #!/usr/bin/env perl
  2. # ====================================================================
  3. # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  4. # project. The module is, however, dual licensed under OpenSSL and
  5. # CRYPTOGAMS licenses depending on where you obtain it. For further
  6. # details see http://www.openssl.org/~appro/cryptogams/.
  7. # ====================================================================
  8. # Needs more work: key setup, CBC routine...
  9. #
  10. # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
  11. # 128-bit key, which is ~40% better than 64-bit code generated by gcc
  12. # 4.0. But these are not the ones currently used! Their "compact"
  13. # counterparts are, for security reason. ppc_AES_encrypt_compact runs
  14. # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
  15. # at 1/3 of ppc_AES_decrypt.
  16. # February 2010
  17. #
  18. # Rescheduling instructions to favour Power6 pipeline gave 10%
  19. # performance improvement on the platfrom in question (and marginal
  20. # improvement even on others). It should be noted that Power6 fails
  21. # to process byte in 18 cycles, only in 23, because it fails to issue
  22. # 4 load instructions in two cycles, only in 3. As result non-compact
  23. # block subroutines are 25% slower than one would expect. Compact
  24. # functions scale better, because they have pure computational part,
  25. # which scales perfectly with clock frequency. To be specific
  26. # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
  27. # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
  28. $flavour = shift;
  29. if ($flavour =~ /64/) {
  30. $SIZE_T =8;
  31. $LRSAVE =2*$SIZE_T;
  32. $STU ="stdu";
  33. $POP ="ld";
  34. $PUSH ="std";
  35. } elsif ($flavour =~ /32/) {
  36. $SIZE_T =4;
  37. $LRSAVE =$SIZE_T;
  38. $STU ="stwu";
  39. $POP ="lwz";
  40. $PUSH ="stw";
  41. } else { die "nonsense $flavour"; }
  42. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  43. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  44. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  45. die "can't locate ppc-xlate.pl";
  46. open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
  47. $FRAME=32*$SIZE_T;
  48. sub _data_word()
  49. { my $i;
  50. while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
  51. }
  52. $sp="r1";
  53. $toc="r2";
  54. $inp="r3";
  55. $out="r4";
  56. $key="r5";
  57. $Tbl0="r3";
  58. $Tbl1="r6";
  59. $Tbl2="r7";
  60. $Tbl3="r2";
  61. $s0="r8";
  62. $s1="r9";
  63. $s2="r10";
  64. $s3="r11";
  65. $t0="r12";
  66. $t1="r13";
  67. $t2="r14";
  68. $t3="r15";
  69. $acc00="r16";
  70. $acc01="r17";
  71. $acc02="r18";
  72. $acc03="r19";
  73. $acc04="r20";
  74. $acc05="r21";
  75. $acc06="r22";
  76. $acc07="r23";
  77. $acc08="r24";
  78. $acc09="r25";
  79. $acc10="r26";
  80. $acc11="r27";
  81. $acc12="r28";
  82. $acc13="r29";
  83. $acc14="r30";
  84. $acc15="r31";
  85. # stay away from TLS pointer
  86. if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
  87. else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
  88. $mask80=$Tbl2;
  89. $mask1b=$Tbl3;
  90. $code.=<<___;
  91. .machine "any"
  92. .text
  93. .align 7
  94. LAES_Te:
  95. mflr r0
  96. bcl 20,31,\$+4
  97. mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
  98. addi $Tbl0,$Tbl0,`128-8`
  99. mtlr r0
  100. blr
  101. .long 0
  102. .byte 0,12,0x14,0,0,0,0,0
  103. .space `64-9*4`
  104. LAES_Td:
  105. mflr r0
  106. bcl 20,31,\$+4
  107. mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
  108. addi $Tbl0,$Tbl0,`128-64-8+2048+256`
  109. mtlr r0
  110. blr
  111. .long 0
  112. .byte 0,12,0x14,0,0,0,0,0
  113. .space `128-64-9*4`
  114. ___
  115. &_data_word(
  116. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  117. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  118. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  119. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  120. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  121. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  122. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  123. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  124. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  125. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  126. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  127. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  128. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  129. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  130. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  131. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  132. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  133. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  134. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  135. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  136. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  137. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  138. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  139. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  140. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  141. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  142. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  143. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  144. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  145. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  146. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  147. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  148. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  149. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  150. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  151. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  152. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  153. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  154. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  155. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  156. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  157. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  158. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  159. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  160. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  161. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  162. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  163. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  164. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  165. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  166. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  167. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  168. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  169. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  170. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  171. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  172. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  173. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  174. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  175. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  176. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  177. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  178. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  179. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  180. $code.=<<___;
  181. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  182. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  183. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  184. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  185. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  186. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  187. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  188. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  189. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  190. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  191. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  192. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  193. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  194. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  195. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  196. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  197. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  198. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  199. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  200. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  201. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  202. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  203. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  204. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  205. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  206. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  207. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  208. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  209. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  210. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  211. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  212. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  213. ___
  214. &_data_word(
  215. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  216. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  217. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  218. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  219. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  220. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  221. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  222. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  223. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  224. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  225. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  226. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  227. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  228. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  229. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  230. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  231. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  232. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  233. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  234. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  235. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  236. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  237. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  238. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  239. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  240. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  241. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  242. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  243. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  244. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  245. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  246. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  247. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  248. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  249. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  250. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  251. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  252. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  253. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  254. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  255. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  256. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  257. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  258. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  259. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  260. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  261. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  262. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  263. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  264. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  265. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  266. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  267. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  268. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  269. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  270. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  271. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  272. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  273. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  274. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  275. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  276. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  277. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  278. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  279. $code.=<<___;
  280. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  281. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  282. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  283. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  284. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  285. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  286. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  287. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  288. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  289. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  290. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  291. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  292. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  293. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  294. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  295. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  296. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  297. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  298. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  299. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  300. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  301. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  302. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  303. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  304. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  305. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  306. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  307. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  308. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  309. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  310. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  311. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  312. .globl .AES_encrypt
  313. .align 7
  314. .AES_encrypt:
  315. $STU $sp,-$FRAME($sp)
  316. mflr r0
  317. $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
  318. $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
  319. $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
  320. $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
  321. $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
  322. $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
  323. $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
  324. $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
  325. $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
  326. $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
  327. $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
  328. $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
  329. $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
  330. $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
  331. $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
  332. $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
  333. $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
  334. $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
  335. $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
  336. $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
  337. $PUSH r0,`$FRAME+$LRSAVE`($sp)
  338. andi. $t0,$inp,3
  339. andi. $t1,$out,3
  340. or. $t0,$t0,$t1
  341. bne Lenc_unaligned
  342. Lenc_unaligned_ok:
  343. lwz $s0,0($inp)
  344. lwz $s1,4($inp)
  345. lwz $s2,8($inp)
  346. lwz $s3,12($inp)
  347. bl LAES_Te
  348. bl Lppc_AES_encrypt_compact
  349. stw $s0,0($out)
  350. stw $s1,4($out)
  351. stw $s2,8($out)
  352. stw $s3,12($out)
  353. b Lenc_done
  354. Lenc_unaligned:
  355. subfic $t0,$inp,4096
  356. subfic $t1,$out,4096
  357. andi. $t0,$t0,4096-16
  358. beq Lenc_xpage
  359. andi. $t1,$t1,4096-16
  360. bne Lenc_unaligned_ok
  361. Lenc_xpage:
  362. lbz $acc00,0($inp)
  363. lbz $acc01,1($inp)
  364. lbz $acc02,2($inp)
  365. lbz $s0,3($inp)
  366. lbz $acc04,4($inp)
  367. lbz $acc05,5($inp)
  368. lbz $acc06,6($inp)
  369. lbz $s1,7($inp)
  370. lbz $acc08,8($inp)
  371. lbz $acc09,9($inp)
  372. lbz $acc10,10($inp)
  373. insrwi $s0,$acc00,8,0
  374. lbz $s2,11($inp)
  375. insrwi $s1,$acc04,8,0
  376. lbz $acc12,12($inp)
  377. insrwi $s0,$acc01,8,8
  378. lbz $acc13,13($inp)
  379. insrwi $s1,$acc05,8,8
  380. lbz $acc14,14($inp)
  381. insrwi $s0,$acc02,8,16
  382. lbz $s3,15($inp)
  383. insrwi $s1,$acc06,8,16
  384. insrwi $s2,$acc08,8,0
  385. insrwi $s3,$acc12,8,0
  386. insrwi $s2,$acc09,8,8
  387. insrwi $s3,$acc13,8,8
  388. insrwi $s2,$acc10,8,16
  389. insrwi $s3,$acc14,8,16
  390. bl LAES_Te
  391. bl Lppc_AES_encrypt_compact
  392. extrwi $acc00,$s0,8,0
  393. extrwi $acc01,$s0,8,8
  394. stb $acc00,0($out)
  395. extrwi $acc02,$s0,8,16
  396. stb $acc01,1($out)
  397. stb $acc02,2($out)
  398. extrwi $acc04,$s1,8,0
  399. stb $s0,3($out)
  400. extrwi $acc05,$s1,8,8
  401. stb $acc04,4($out)
  402. extrwi $acc06,$s1,8,16
  403. stb $acc05,5($out)
  404. stb $acc06,6($out)
  405. extrwi $acc08,$s2,8,0
  406. stb $s1,7($out)
  407. extrwi $acc09,$s2,8,8
  408. stb $acc08,8($out)
  409. extrwi $acc10,$s2,8,16
  410. stb $acc09,9($out)
  411. stb $acc10,10($out)
  412. extrwi $acc12,$s3,8,0
  413. stb $s2,11($out)
  414. extrwi $acc13,$s3,8,8
  415. stb $acc12,12($out)
  416. extrwi $acc14,$s3,8,16
  417. stb $acc13,13($out)
  418. stb $acc14,14($out)
  419. stb $s3,15($out)
  420. Lenc_done:
  421. $POP r0,`$FRAME+$LRSAVE`($sp)
  422. $POP $toc,`$FRAME-$SIZE_T*20`($sp)
  423. $POP r13,`$FRAME-$SIZE_T*19`($sp)
  424. $POP r14,`$FRAME-$SIZE_T*18`($sp)
  425. $POP r15,`$FRAME-$SIZE_T*17`($sp)
  426. $POP r16,`$FRAME-$SIZE_T*16`($sp)
  427. $POP r17,`$FRAME-$SIZE_T*15`($sp)
  428. $POP r18,`$FRAME-$SIZE_T*14`($sp)
  429. $POP r19,`$FRAME-$SIZE_T*13`($sp)
  430. $POP r20,`$FRAME-$SIZE_T*12`($sp)
  431. $POP r21,`$FRAME-$SIZE_T*11`($sp)
  432. $POP r22,`$FRAME-$SIZE_T*10`($sp)
  433. $POP r23,`$FRAME-$SIZE_T*9`($sp)
  434. $POP r24,`$FRAME-$SIZE_T*8`($sp)
  435. $POP r25,`$FRAME-$SIZE_T*7`($sp)
  436. $POP r26,`$FRAME-$SIZE_T*6`($sp)
  437. $POP r27,`$FRAME-$SIZE_T*5`($sp)
  438. $POP r28,`$FRAME-$SIZE_T*4`($sp)
  439. $POP r29,`$FRAME-$SIZE_T*3`($sp)
  440. $POP r30,`$FRAME-$SIZE_T*2`($sp)
  441. $POP r31,`$FRAME-$SIZE_T*1`($sp)
  442. mtlr r0
  443. addi $sp,$sp,$FRAME
  444. blr
  445. .long 0
  446. .byte 0,12,4,1,0x80,18,3,0
  447. .long 0
  448. .align 5
  449. Lppc_AES_encrypt:
  450. lwz $acc00,240($key)
  451. addi $Tbl1,$Tbl0,3
  452. lwz $t0,0($key)
  453. addi $Tbl2,$Tbl0,2
  454. lwz $t1,4($key)
  455. addi $Tbl3,$Tbl0,1
  456. lwz $t2,8($key)
  457. addi $acc00,$acc00,-1
  458. lwz $t3,12($key)
  459. addi $key,$key,16
  460. xor $s0,$s0,$t0
  461. xor $s1,$s1,$t1
  462. xor $s2,$s2,$t2
  463. xor $s3,$s3,$t3
  464. mtctr $acc00
  465. .align 4
  466. Lenc_loop:
  467. rlwinm $acc00,$s0,`32-24+3`,21,28
  468. rlwinm $acc01,$s1,`32-24+3`,21,28
  469. rlwinm $acc02,$s2,`32-24+3`,21,28
  470. rlwinm $acc03,$s3,`32-24+3`,21,28
  471. lwz $t0,0($key)
  472. rlwinm $acc04,$s1,`32-16+3`,21,28
  473. lwz $t1,4($key)
  474. rlwinm $acc05,$s2,`32-16+3`,21,28
  475. lwz $t2,8($key)
  476. rlwinm $acc06,$s3,`32-16+3`,21,28
  477. lwz $t3,12($key)
  478. rlwinm $acc07,$s0,`32-16+3`,21,28
  479. lwzx $acc00,$Tbl0,$acc00
  480. rlwinm $acc08,$s2,`32-8+3`,21,28
  481. lwzx $acc01,$Tbl0,$acc01
  482. rlwinm $acc09,$s3,`32-8+3`,21,28
  483. lwzx $acc02,$Tbl0,$acc02
  484. rlwinm $acc10,$s0,`32-8+3`,21,28
  485. lwzx $acc03,$Tbl0,$acc03
  486. rlwinm $acc11,$s1,`32-8+3`,21,28
  487. lwzx $acc04,$Tbl1,$acc04
  488. rlwinm $acc12,$s3,`0+3`,21,28
  489. lwzx $acc05,$Tbl1,$acc05
  490. rlwinm $acc13,$s0,`0+3`,21,28
  491. lwzx $acc06,$Tbl1,$acc06
  492. rlwinm $acc14,$s1,`0+3`,21,28
  493. lwzx $acc07,$Tbl1,$acc07
  494. rlwinm $acc15,$s2,`0+3`,21,28
  495. lwzx $acc08,$Tbl2,$acc08
  496. xor $t0,$t0,$acc00
  497. lwzx $acc09,$Tbl2,$acc09
  498. xor $t1,$t1,$acc01
  499. lwzx $acc10,$Tbl2,$acc10
  500. xor $t2,$t2,$acc02
  501. lwzx $acc11,$Tbl2,$acc11
  502. xor $t3,$t3,$acc03
  503. lwzx $acc12,$Tbl3,$acc12
  504. xor $t0,$t0,$acc04
  505. lwzx $acc13,$Tbl3,$acc13
  506. xor $t1,$t1,$acc05
  507. lwzx $acc14,$Tbl3,$acc14
  508. xor $t2,$t2,$acc06
  509. lwzx $acc15,$Tbl3,$acc15
  510. xor $t3,$t3,$acc07
  511. xor $t0,$t0,$acc08
  512. xor $t1,$t1,$acc09
  513. xor $t2,$t2,$acc10
  514. xor $t3,$t3,$acc11
  515. xor $s0,$t0,$acc12
  516. xor $s1,$t1,$acc13
  517. xor $s2,$t2,$acc14
  518. xor $s3,$t3,$acc15
  519. addi $key,$key,16
  520. bdnz Lenc_loop
  521. addi $Tbl2,$Tbl0,2048
  522. nop
  523. lwz $t0,0($key)
  524. rlwinm $acc00,$s0,`32-24`,24,31
  525. lwz $t1,4($key)
  526. rlwinm $acc01,$s1,`32-24`,24,31
  527. lwz $t2,8($key)
  528. rlwinm $acc02,$s2,`32-24`,24,31
  529. lwz $t3,12($key)
  530. rlwinm $acc03,$s3,`32-24`,24,31
  531. lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
  532. rlwinm $acc04,$s1,`32-16`,24,31
  533. lwz $acc09,`2048+32`($Tbl0)
  534. rlwinm $acc05,$s2,`32-16`,24,31
  535. lwz $acc10,`2048+64`($Tbl0)
  536. rlwinm $acc06,$s3,`32-16`,24,31
  537. lwz $acc11,`2048+96`($Tbl0)
  538. rlwinm $acc07,$s0,`32-16`,24,31
  539. lwz $acc12,`2048+128`($Tbl0)
  540. rlwinm $acc08,$s2,`32-8`,24,31
  541. lwz $acc13,`2048+160`($Tbl0)
  542. rlwinm $acc09,$s3,`32-8`,24,31
  543. lwz $acc14,`2048+192`($Tbl0)
  544. rlwinm $acc10,$s0,`32-8`,24,31
  545. lwz $acc15,`2048+224`($Tbl0)
  546. rlwinm $acc11,$s1,`32-8`,24,31
  547. lbzx $acc00,$Tbl2,$acc00
  548. rlwinm $acc12,$s3,`0`,24,31
  549. lbzx $acc01,$Tbl2,$acc01
  550. rlwinm $acc13,$s0,`0`,24,31
  551. lbzx $acc02,$Tbl2,$acc02
  552. rlwinm $acc14,$s1,`0`,24,31
  553. lbzx $acc03,$Tbl2,$acc03
  554. rlwinm $acc15,$s2,`0`,24,31
  555. lbzx $acc04,$Tbl2,$acc04
  556. rlwinm $s0,$acc00,24,0,7
  557. lbzx $acc05,$Tbl2,$acc05
  558. rlwinm $s1,$acc01,24,0,7
  559. lbzx $acc06,$Tbl2,$acc06
  560. rlwinm $s2,$acc02,24,0,7
  561. lbzx $acc07,$Tbl2,$acc07
  562. rlwinm $s3,$acc03,24,0,7
  563. lbzx $acc08,$Tbl2,$acc08
  564. rlwimi $s0,$acc04,16,8,15
  565. lbzx $acc09,$Tbl2,$acc09
  566. rlwimi $s1,$acc05,16,8,15
  567. lbzx $acc10,$Tbl2,$acc10
  568. rlwimi $s2,$acc06,16,8,15
  569. lbzx $acc11,$Tbl2,$acc11
  570. rlwimi $s3,$acc07,16,8,15
  571. lbzx $acc12,$Tbl2,$acc12
  572. rlwimi $s0,$acc08,8,16,23
  573. lbzx $acc13,$Tbl2,$acc13
  574. rlwimi $s1,$acc09,8,16,23
  575. lbzx $acc14,$Tbl2,$acc14
  576. rlwimi $s2,$acc10,8,16,23
  577. lbzx $acc15,$Tbl2,$acc15
  578. rlwimi $s3,$acc11,8,16,23
  579. or $s0,$s0,$acc12
  580. or $s1,$s1,$acc13
  581. or $s2,$s2,$acc14
  582. or $s3,$s3,$acc15
  583. xor $s0,$s0,$t0
  584. xor $s1,$s1,$t1
  585. xor $s2,$s2,$t2
  586. xor $s3,$s3,$t3
  587. blr
  588. .long 0
  589. .byte 0,12,0x14,0,0,0,0,0
  590. .align 4
  591. Lppc_AES_encrypt_compact:
  592. lwz $acc00,240($key)
  593. addi $Tbl1,$Tbl0,2048
  594. lwz $t0,0($key)
  595. lis $mask80,0x8080
  596. lwz $t1,4($key)
  597. lis $mask1b,0x1b1b
  598. lwz $t2,8($key)
  599. ori $mask80,$mask80,0x8080
  600. lwz $t3,12($key)
  601. ori $mask1b,$mask1b,0x1b1b
  602. addi $key,$key,16
  603. mtctr $acc00
  604. .align 4
  605. Lenc_compact_loop:
  606. xor $s0,$s0,$t0
  607. xor $s1,$s1,$t1
  608. rlwinm $acc00,$s0,`32-24`,24,31
  609. xor $s2,$s2,$t2
  610. rlwinm $acc01,$s1,`32-24`,24,31
  611. xor $s3,$s3,$t3
  612. rlwinm $acc02,$s2,`32-24`,24,31
  613. rlwinm $acc03,$s3,`32-24`,24,31
  614. rlwinm $acc04,$s1,`32-16`,24,31
  615. rlwinm $acc05,$s2,`32-16`,24,31
  616. rlwinm $acc06,$s3,`32-16`,24,31
  617. rlwinm $acc07,$s0,`32-16`,24,31
  618. lbzx $acc00,$Tbl1,$acc00
  619. rlwinm $acc08,$s2,`32-8`,24,31
  620. lbzx $acc01,$Tbl1,$acc01
  621. rlwinm $acc09,$s3,`32-8`,24,31
  622. lbzx $acc02,$Tbl1,$acc02
  623. rlwinm $acc10,$s0,`32-8`,24,31
  624. lbzx $acc03,$Tbl1,$acc03
  625. rlwinm $acc11,$s1,`32-8`,24,31
  626. lbzx $acc04,$Tbl1,$acc04
  627. rlwinm $acc12,$s3,`0`,24,31
  628. lbzx $acc05,$Tbl1,$acc05
  629. rlwinm $acc13,$s0,`0`,24,31
  630. lbzx $acc06,$Tbl1,$acc06
  631. rlwinm $acc14,$s1,`0`,24,31
  632. lbzx $acc07,$Tbl1,$acc07
  633. rlwinm $acc15,$s2,`0`,24,31
  634. lbzx $acc08,$Tbl1,$acc08
  635. rlwinm $s0,$acc00,24,0,7
  636. lbzx $acc09,$Tbl1,$acc09
  637. rlwinm $s1,$acc01,24,0,7
  638. lbzx $acc10,$Tbl1,$acc10
  639. rlwinm $s2,$acc02,24,0,7
  640. lbzx $acc11,$Tbl1,$acc11
  641. rlwinm $s3,$acc03,24,0,7
  642. lbzx $acc12,$Tbl1,$acc12
  643. rlwimi $s0,$acc04,16,8,15
  644. lbzx $acc13,$Tbl1,$acc13
  645. rlwimi $s1,$acc05,16,8,15
  646. lbzx $acc14,$Tbl1,$acc14
  647. rlwimi $s2,$acc06,16,8,15
  648. lbzx $acc15,$Tbl1,$acc15
  649. rlwimi $s3,$acc07,16,8,15
  650. rlwimi $s0,$acc08,8,16,23
  651. rlwimi $s1,$acc09,8,16,23
  652. rlwimi $s2,$acc10,8,16,23
  653. rlwimi $s3,$acc11,8,16,23
  654. lwz $t0,0($key)
  655. or $s0,$s0,$acc12
  656. lwz $t1,4($key)
  657. or $s1,$s1,$acc13
  658. lwz $t2,8($key)
  659. or $s2,$s2,$acc14
  660. lwz $t3,12($key)
  661. or $s3,$s3,$acc15
  662. addi $key,$key,16
  663. bdz Lenc_compact_done
  664. and $acc00,$s0,$mask80 # r1=r0&0x80808080
  665. and $acc01,$s1,$mask80
  666. and $acc02,$s2,$mask80
  667. and $acc03,$s3,$mask80
  668. srwi $acc04,$acc00,7 # r1>>7
  669. andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
  670. srwi $acc05,$acc01,7
  671. andc $acc09,$s1,$mask80
  672. srwi $acc06,$acc02,7
  673. andc $acc10,$s2,$mask80
  674. srwi $acc07,$acc03,7
  675. andc $acc11,$s3,$mask80
  676. sub $acc00,$acc00,$acc04 # r1-(r1>>7)
  677. sub $acc01,$acc01,$acc05
  678. sub $acc02,$acc02,$acc06
  679. sub $acc03,$acc03,$acc07
  680. add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
  681. add $acc09,$acc09,$acc09
  682. add $acc10,$acc10,$acc10
  683. add $acc11,$acc11,$acc11
  684. and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  685. and $acc01,$acc01,$mask1b
  686. and $acc02,$acc02,$mask1b
  687. and $acc03,$acc03,$mask1b
  688. xor $acc00,$acc00,$acc08 # r2
  689. xor $acc01,$acc01,$acc09
  690. rotlwi $acc12,$s0,16 # ROTATE(r0,16)
  691. xor $acc02,$acc02,$acc10
  692. rotlwi $acc13,$s1,16
  693. xor $acc03,$acc03,$acc11
  694. rotlwi $acc14,$s2,16
  695. xor $s0,$s0,$acc00 # r0^r2
  696. rotlwi $acc15,$s3,16
  697. xor $s1,$s1,$acc01
  698. rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
  699. xor $s2,$s2,$acc02
  700. rotrwi $s1,$s1,24
  701. xor $s3,$s3,$acc03
  702. rotrwi $s2,$s2,24
  703. xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
  704. rotrwi $s3,$s3,24
  705. xor $s1,$s1,$acc01
  706. xor $s2,$s2,$acc02
  707. xor $s3,$s3,$acc03
  708. rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
  709. xor $s0,$s0,$acc12 #
  710. rotlwi $acc09,$acc13,8
  711. xor $s1,$s1,$acc13
  712. rotlwi $acc10,$acc14,8
  713. xor $s2,$s2,$acc14
  714. rotlwi $acc11,$acc15,8
  715. xor $s3,$s3,$acc15
  716. xor $s0,$s0,$acc08 #
  717. xor $s1,$s1,$acc09
  718. xor $s2,$s2,$acc10
  719. xor $s3,$s3,$acc11
  720. b Lenc_compact_loop
  721. .align 4
  722. Lenc_compact_done:
  723. xor $s0,$s0,$t0
  724. xor $s1,$s1,$t1
  725. xor $s2,$s2,$t2
  726. xor $s3,$s3,$t3
  727. blr
  728. .long 0
  729. .byte 0,12,0x14,0,0,0,0,0
  730. .globl .AES_decrypt
  731. .align 7
  732. .AES_decrypt:
  733. $STU $sp,-$FRAME($sp)
  734. mflr r0
  735. $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
  736. $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
  737. $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
  738. $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
  739. $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
  740. $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
  741. $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
  742. $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
  743. $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
  744. $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
  745. $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
  746. $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
  747. $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
  748. $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
  749. $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
  750. $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
  751. $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
  752. $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
  753. $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
  754. $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
  755. $PUSH r0,`$FRAME+$LRSAVE`($sp)
  756. andi. $t0,$inp,3
  757. andi. $t1,$out,3
  758. or. $t0,$t0,$t1
  759. bne Ldec_unaligned
  760. Ldec_unaligned_ok:
  761. lwz $s0,0($inp)
  762. lwz $s1,4($inp)
  763. lwz $s2,8($inp)
  764. lwz $s3,12($inp)
  765. bl LAES_Td
  766. bl Lppc_AES_decrypt_compact
  767. stw $s0,0($out)
  768. stw $s1,4($out)
  769. stw $s2,8($out)
  770. stw $s3,12($out)
  771. b Ldec_done
  772. Ldec_unaligned:
  773. subfic $t0,$inp,4096
  774. subfic $t1,$out,4096
  775. andi. $t0,$t0,4096-16
  776. beq Ldec_xpage
  777. andi. $t1,$t1,4096-16
  778. bne Ldec_unaligned_ok
  779. Ldec_xpage:
  780. lbz $acc00,0($inp)
  781. lbz $acc01,1($inp)
  782. lbz $acc02,2($inp)
  783. lbz $s0,3($inp)
  784. lbz $acc04,4($inp)
  785. lbz $acc05,5($inp)
  786. lbz $acc06,6($inp)
  787. lbz $s1,7($inp)
  788. lbz $acc08,8($inp)
  789. lbz $acc09,9($inp)
  790. lbz $acc10,10($inp)
  791. insrwi $s0,$acc00,8,0
  792. lbz $s2,11($inp)
  793. insrwi $s1,$acc04,8,0
  794. lbz $acc12,12($inp)
  795. insrwi $s0,$acc01,8,8
  796. lbz $acc13,13($inp)
  797. insrwi $s1,$acc05,8,8
  798. lbz $acc14,14($inp)
  799. insrwi $s0,$acc02,8,16
  800. lbz $s3,15($inp)
  801. insrwi $s1,$acc06,8,16
  802. insrwi $s2,$acc08,8,0
  803. insrwi $s3,$acc12,8,0
  804. insrwi $s2,$acc09,8,8
  805. insrwi $s3,$acc13,8,8
  806. insrwi $s2,$acc10,8,16
  807. insrwi $s3,$acc14,8,16
  808. bl LAES_Td
  809. bl Lppc_AES_decrypt_compact
  810. extrwi $acc00,$s0,8,0
  811. extrwi $acc01,$s0,8,8
  812. stb $acc00,0($out)
  813. extrwi $acc02,$s0,8,16
  814. stb $acc01,1($out)
  815. stb $acc02,2($out)
  816. extrwi $acc04,$s1,8,0
  817. stb $s0,3($out)
  818. extrwi $acc05,$s1,8,8
  819. stb $acc04,4($out)
  820. extrwi $acc06,$s1,8,16
  821. stb $acc05,5($out)
  822. stb $acc06,6($out)
  823. extrwi $acc08,$s2,8,0
  824. stb $s1,7($out)
  825. extrwi $acc09,$s2,8,8
  826. stb $acc08,8($out)
  827. extrwi $acc10,$s2,8,16
  828. stb $acc09,9($out)
  829. stb $acc10,10($out)
  830. extrwi $acc12,$s3,8,0
  831. stb $s2,11($out)
  832. extrwi $acc13,$s3,8,8
  833. stb $acc12,12($out)
  834. extrwi $acc14,$s3,8,16
  835. stb $acc13,13($out)
  836. stb $acc14,14($out)
  837. stb $s3,15($out)
  838. Ldec_done:
  839. $POP r0,`$FRAME+$LRSAVE`($sp)
  840. $POP $toc,`$FRAME-$SIZE_T*20`($sp)
  841. $POP r13,`$FRAME-$SIZE_T*19`($sp)
  842. $POP r14,`$FRAME-$SIZE_T*18`($sp)
  843. $POP r15,`$FRAME-$SIZE_T*17`($sp)
  844. $POP r16,`$FRAME-$SIZE_T*16`($sp)
  845. $POP r17,`$FRAME-$SIZE_T*15`($sp)
  846. $POP r18,`$FRAME-$SIZE_T*14`($sp)
  847. $POP r19,`$FRAME-$SIZE_T*13`($sp)
  848. $POP r20,`$FRAME-$SIZE_T*12`($sp)
  849. $POP r21,`$FRAME-$SIZE_T*11`($sp)
  850. $POP r22,`$FRAME-$SIZE_T*10`($sp)
  851. $POP r23,`$FRAME-$SIZE_T*9`($sp)
  852. $POP r24,`$FRAME-$SIZE_T*8`($sp)
  853. $POP r25,`$FRAME-$SIZE_T*7`($sp)
  854. $POP r26,`$FRAME-$SIZE_T*6`($sp)
  855. $POP r27,`$FRAME-$SIZE_T*5`($sp)
  856. $POP r28,`$FRAME-$SIZE_T*4`($sp)
  857. $POP r29,`$FRAME-$SIZE_T*3`($sp)
  858. $POP r30,`$FRAME-$SIZE_T*2`($sp)
  859. $POP r31,`$FRAME-$SIZE_T*1`($sp)
  860. mtlr r0
  861. addi $sp,$sp,$FRAME
  862. blr
  863. .long 0
  864. .byte 0,12,4,1,0x80,18,3,0
  865. .long 0
  866. .align 5
  867. Lppc_AES_decrypt:
  868. lwz $acc00,240($key)
  869. addi $Tbl1,$Tbl0,3
  870. lwz $t0,0($key)
  871. addi $Tbl2,$Tbl0,2
  872. lwz $t1,4($key)
  873. addi $Tbl3,$Tbl0,1
  874. lwz $t2,8($key)
  875. addi $acc00,$acc00,-1
  876. lwz $t3,12($key)
  877. addi $key,$key,16
  878. xor $s0,$s0,$t0
  879. xor $s1,$s1,$t1
  880. xor $s2,$s2,$t2
  881. xor $s3,$s3,$t3
  882. mtctr $acc00
  883. .align 4
  884. Ldec_loop:
  885. rlwinm $acc00,$s0,`32-24+3`,21,28
  886. rlwinm $acc01,$s1,`32-24+3`,21,28
  887. rlwinm $acc02,$s2,`32-24+3`,21,28
  888. rlwinm $acc03,$s3,`32-24+3`,21,28
  889. lwz $t0,0($key)
  890. rlwinm $acc04,$s3,`32-16+3`,21,28
  891. lwz $t1,4($key)
  892. rlwinm $acc05,$s0,`32-16+3`,21,28
  893. lwz $t2,8($key)
  894. rlwinm $acc06,$s1,`32-16+3`,21,28
  895. lwz $t3,12($key)
  896. rlwinm $acc07,$s2,`32-16+3`,21,28
  897. lwzx $acc00,$Tbl0,$acc00
  898. rlwinm $acc08,$s2,`32-8+3`,21,28
  899. lwzx $acc01,$Tbl0,$acc01
  900. rlwinm $acc09,$s3,`32-8+3`,21,28
  901. lwzx $acc02,$Tbl0,$acc02
  902. rlwinm $acc10,$s0,`32-8+3`,21,28
  903. lwzx $acc03,$Tbl0,$acc03
  904. rlwinm $acc11,$s1,`32-8+3`,21,28
  905. lwzx $acc04,$Tbl1,$acc04
  906. rlwinm $acc12,$s1,`0+3`,21,28
  907. lwzx $acc05,$Tbl1,$acc05
  908. rlwinm $acc13,$s2,`0+3`,21,28
  909. lwzx $acc06,$Tbl1,$acc06
  910. rlwinm $acc14,$s3,`0+3`,21,28
  911. lwzx $acc07,$Tbl1,$acc07
  912. rlwinm $acc15,$s0,`0+3`,21,28
  913. lwzx $acc08,$Tbl2,$acc08
  914. xor $t0,$t0,$acc00
  915. lwzx $acc09,$Tbl2,$acc09
  916. xor $t1,$t1,$acc01
  917. lwzx $acc10,$Tbl2,$acc10
  918. xor $t2,$t2,$acc02
  919. lwzx $acc11,$Tbl2,$acc11
  920. xor $t3,$t3,$acc03
  921. lwzx $acc12,$Tbl3,$acc12
  922. xor $t0,$t0,$acc04
  923. lwzx $acc13,$Tbl3,$acc13
  924. xor $t1,$t1,$acc05
  925. lwzx $acc14,$Tbl3,$acc14
  926. xor $t2,$t2,$acc06
  927. lwzx $acc15,$Tbl3,$acc15
  928. xor $t3,$t3,$acc07
  929. xor $t0,$t0,$acc08
  930. xor $t1,$t1,$acc09
  931. xor $t2,$t2,$acc10
  932. xor $t3,$t3,$acc11
  933. xor $s0,$t0,$acc12
  934. xor $s1,$t1,$acc13
  935. xor $s2,$t2,$acc14
  936. xor $s3,$t3,$acc15
  937. addi $key,$key,16
  938. bdnz Ldec_loop
  939. addi $Tbl2,$Tbl0,2048
  940. nop
  941. lwz $t0,0($key)
  942. rlwinm $acc00,$s0,`32-24`,24,31
  943. lwz $t1,4($key)
  944. rlwinm $acc01,$s1,`32-24`,24,31
  945. lwz $t2,8($key)
  946. rlwinm $acc02,$s2,`32-24`,24,31
  947. lwz $t3,12($key)
  948. rlwinm $acc03,$s3,`32-24`,24,31
  949. lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
  950. rlwinm $acc04,$s3,`32-16`,24,31
  951. lwz $acc09,`2048+32`($Tbl0)
  952. rlwinm $acc05,$s0,`32-16`,24,31
  953. lwz $acc10,`2048+64`($Tbl0)
  954. lbzx $acc00,$Tbl2,$acc00
  955. lwz $acc11,`2048+96`($Tbl0)
  956. lbzx $acc01,$Tbl2,$acc01
  957. lwz $acc12,`2048+128`($Tbl0)
  958. rlwinm $acc06,$s1,`32-16`,24,31
  959. lwz $acc13,`2048+160`($Tbl0)
  960. rlwinm $acc07,$s2,`32-16`,24,31
  961. lwz $acc14,`2048+192`($Tbl0)
  962. rlwinm $acc08,$s2,`32-8`,24,31
  963. lwz $acc15,`2048+224`($Tbl0)
  964. rlwinm $acc09,$s3,`32-8`,24,31
  965. lbzx $acc02,$Tbl2,$acc02
  966. rlwinm $acc10,$s0,`32-8`,24,31
  967. lbzx $acc03,$Tbl2,$acc03
  968. rlwinm $acc11,$s1,`32-8`,24,31
  969. lbzx $acc04,$Tbl2,$acc04
  970. rlwinm $acc12,$s1,`0`,24,31
  971. lbzx $acc05,$Tbl2,$acc05
  972. rlwinm $acc13,$s2,`0`,24,31
  973. lbzx $acc06,$Tbl2,$acc06
  974. rlwinm $acc14,$s3,`0`,24,31
  975. lbzx $acc07,$Tbl2,$acc07
  976. rlwinm $acc15,$s0,`0`,24,31
  977. lbzx $acc08,$Tbl2,$acc08
  978. rlwinm $s0,$acc00,24,0,7
  979. lbzx $acc09,$Tbl2,$acc09
  980. rlwinm $s1,$acc01,24,0,7
  981. lbzx $acc10,$Tbl2,$acc10
  982. rlwinm $s2,$acc02,24,0,7
  983. lbzx $acc11,$Tbl2,$acc11
  984. rlwinm $s3,$acc03,24,0,7
  985. lbzx $acc12,$Tbl2,$acc12
  986. rlwimi $s0,$acc04,16,8,15
  987. lbzx $acc13,$Tbl2,$acc13
  988. rlwimi $s1,$acc05,16,8,15
  989. lbzx $acc14,$Tbl2,$acc14
  990. rlwimi $s2,$acc06,16,8,15
  991. lbzx $acc15,$Tbl2,$acc15
  992. rlwimi $s3,$acc07,16,8,15
  993. rlwimi $s0,$acc08,8,16,23
  994. rlwimi $s1,$acc09,8,16,23
  995. rlwimi $s2,$acc10,8,16,23
  996. rlwimi $s3,$acc11,8,16,23
  997. or $s0,$s0,$acc12
  998. or $s1,$s1,$acc13
  999. or $s2,$s2,$acc14
  1000. or $s3,$s3,$acc15
  1001. xor $s0,$s0,$t0
  1002. xor $s1,$s1,$t1
  1003. xor $s2,$s2,$t2
  1004. xor $s3,$s3,$t3
  1005. blr
  1006. .long 0
  1007. .byte 0,12,0x14,0,0,0,0,0
  1008. .align 4
  1009. Lppc_AES_decrypt_compact:
  1010. lwz $acc00,240($key)
  1011. addi $Tbl1,$Tbl0,2048
  1012. lwz $t0,0($key)
  1013. lis $mask80,0x8080
  1014. lwz $t1,4($key)
  1015. lis $mask1b,0x1b1b
  1016. lwz $t2,8($key)
  1017. ori $mask80,$mask80,0x8080
  1018. lwz $t3,12($key)
  1019. ori $mask1b,$mask1b,0x1b1b
  1020. addi $key,$key,16
  1021. ___
  1022. $code.=<<___ if ($SIZE_T==8);
  1023. insrdi $mask80,$mask80,32,0
  1024. insrdi $mask1b,$mask1b,32,0
  1025. ___
  1026. $code.=<<___;
  1027. mtctr $acc00
  1028. .align 4
  1029. Ldec_compact_loop:
  1030. xor $s0,$s0,$t0
  1031. xor $s1,$s1,$t1
  1032. rlwinm $acc00,$s0,`32-24`,24,31
  1033. xor $s2,$s2,$t2
  1034. rlwinm $acc01,$s1,`32-24`,24,31
  1035. xor $s3,$s3,$t3
  1036. rlwinm $acc02,$s2,`32-24`,24,31
  1037. rlwinm $acc03,$s3,`32-24`,24,31
  1038. rlwinm $acc04,$s3,`32-16`,24,31
  1039. rlwinm $acc05,$s0,`32-16`,24,31
  1040. rlwinm $acc06,$s1,`32-16`,24,31
  1041. rlwinm $acc07,$s2,`32-16`,24,31
  1042. lbzx $acc00,$Tbl1,$acc00
  1043. rlwinm $acc08,$s2,`32-8`,24,31
  1044. lbzx $acc01,$Tbl1,$acc01
  1045. rlwinm $acc09,$s3,`32-8`,24,31
  1046. lbzx $acc02,$Tbl1,$acc02
  1047. rlwinm $acc10,$s0,`32-8`,24,31
  1048. lbzx $acc03,$Tbl1,$acc03
  1049. rlwinm $acc11,$s1,`32-8`,24,31
  1050. lbzx $acc04,$Tbl1,$acc04
  1051. rlwinm $acc12,$s1,`0`,24,31
  1052. lbzx $acc05,$Tbl1,$acc05
  1053. rlwinm $acc13,$s2,`0`,24,31
  1054. lbzx $acc06,$Tbl1,$acc06
  1055. rlwinm $acc14,$s3,`0`,24,31
  1056. lbzx $acc07,$Tbl1,$acc07
  1057. rlwinm $acc15,$s0,`0`,24,31
  1058. lbzx $acc08,$Tbl1,$acc08
  1059. rlwinm $s0,$acc00,24,0,7
  1060. lbzx $acc09,$Tbl1,$acc09
  1061. rlwinm $s1,$acc01,24,0,7
  1062. lbzx $acc10,$Tbl1,$acc10
  1063. rlwinm $s2,$acc02,24,0,7
  1064. lbzx $acc11,$Tbl1,$acc11
  1065. rlwinm $s3,$acc03,24,0,7
  1066. lbzx $acc12,$Tbl1,$acc12
  1067. rlwimi $s0,$acc04,16,8,15
  1068. lbzx $acc13,$Tbl1,$acc13
  1069. rlwimi $s1,$acc05,16,8,15
  1070. lbzx $acc14,$Tbl1,$acc14
  1071. rlwimi $s2,$acc06,16,8,15
  1072. lbzx $acc15,$Tbl1,$acc15
  1073. rlwimi $s3,$acc07,16,8,15
  1074. rlwimi $s0,$acc08,8,16,23
  1075. rlwimi $s1,$acc09,8,16,23
  1076. rlwimi $s2,$acc10,8,16,23
  1077. rlwimi $s3,$acc11,8,16,23
  1078. lwz $t0,0($key)
  1079. or $s0,$s0,$acc12
  1080. lwz $t1,4($key)
  1081. or $s1,$s1,$acc13
  1082. lwz $t2,8($key)
  1083. or $s2,$s2,$acc14
  1084. lwz $t3,12($key)
  1085. or $s3,$s3,$acc15
  1086. addi $key,$key,16
  1087. bdz Ldec_compact_done
  1088. ___
  1089. $code.=<<___ if ($SIZE_T==8);
  1090. # vectorized permutation improves decrypt performance by 10%
  1091. insrdi $s0,$s1,32,0
  1092. insrdi $s2,$s3,32,0
  1093. and $acc00,$s0,$mask80 # r1=r0&0x80808080
  1094. and $acc02,$s2,$mask80
  1095. srdi $acc04,$acc00,7 # r1>>7
  1096. srdi $acc06,$acc02,7
  1097. andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
  1098. andc $acc10,$s2,$mask80
  1099. sub $acc00,$acc00,$acc04 # r1-(r1>>7)
  1100. sub $acc02,$acc02,$acc06
  1101. add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
  1102. add $acc10,$acc10,$acc10
  1103. and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1104. and $acc02,$acc02,$mask1b
  1105. xor $acc00,$acc00,$acc08 # r2
  1106. xor $acc02,$acc02,$acc10
  1107. and $acc04,$acc00,$mask80 # r1=r2&0x80808080
  1108. and $acc06,$acc02,$mask80
  1109. srdi $acc08,$acc04,7 # r1>>7
  1110. srdi $acc10,$acc06,7
  1111. andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
  1112. andc $acc14,$acc02,$mask80
  1113. sub $acc04,$acc04,$acc08 # r1-(r1>>7)
  1114. sub $acc06,$acc06,$acc10
  1115. add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
  1116. add $acc14,$acc14,$acc14
  1117. and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1118. and $acc06,$acc06,$mask1b
  1119. xor $acc04,$acc04,$acc12 # r4
  1120. xor $acc06,$acc06,$acc14
  1121. and $acc08,$acc04,$mask80 # r1=r4&0x80808080
  1122. and $acc10,$acc06,$mask80
  1123. srdi $acc12,$acc08,7 # r1>>7
  1124. srdi $acc14,$acc10,7
  1125. sub $acc08,$acc08,$acc12 # r1-(r1>>7)
  1126. sub $acc10,$acc10,$acc14
  1127. andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
  1128. andc $acc14,$acc06,$mask80
  1129. add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
  1130. add $acc14,$acc14,$acc14
  1131. and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1132. and $acc10,$acc10,$mask1b
  1133. xor $acc08,$acc08,$acc12 # r8
  1134. xor $acc10,$acc10,$acc14
  1135. xor $acc00,$acc00,$s0 # r2^r0
  1136. xor $acc02,$acc02,$s2
  1137. xor $acc04,$acc04,$s0 # r4^r0
  1138. xor $acc06,$acc06,$s2
  1139. extrdi $acc01,$acc00,32,0
  1140. extrdi $acc03,$acc02,32,0
  1141. extrdi $acc05,$acc04,32,0
  1142. extrdi $acc07,$acc06,32,0
  1143. extrdi $acc09,$acc08,32,0
  1144. extrdi $acc11,$acc10,32,0
  1145. ___
  1146. $code.=<<___ if ($SIZE_T==4);
  1147. and $acc00,$s0,$mask80 # r1=r0&0x80808080
  1148. and $acc01,$s1,$mask80
  1149. and $acc02,$s2,$mask80
  1150. and $acc03,$s3,$mask80
  1151. srwi $acc04,$acc00,7 # r1>>7
  1152. andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
  1153. srwi $acc05,$acc01,7
  1154. andc $acc09,$s1,$mask80
  1155. srwi $acc06,$acc02,7
  1156. andc $acc10,$s2,$mask80
  1157. srwi $acc07,$acc03,7
  1158. andc $acc11,$s3,$mask80
  1159. sub $acc00,$acc00,$acc04 # r1-(r1>>7)
  1160. sub $acc01,$acc01,$acc05
  1161. sub $acc02,$acc02,$acc06
  1162. sub $acc03,$acc03,$acc07
  1163. add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
  1164. add $acc09,$acc09,$acc09
  1165. add $acc10,$acc10,$acc10
  1166. add $acc11,$acc11,$acc11
  1167. and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1168. and $acc01,$acc01,$mask1b
  1169. and $acc02,$acc02,$mask1b
  1170. and $acc03,$acc03,$mask1b
  1171. xor $acc00,$acc00,$acc08 # r2
  1172. xor $acc01,$acc01,$acc09
  1173. xor $acc02,$acc02,$acc10
  1174. xor $acc03,$acc03,$acc11
  1175. and $acc04,$acc00,$mask80 # r1=r2&0x80808080
  1176. and $acc05,$acc01,$mask80
  1177. and $acc06,$acc02,$mask80
  1178. and $acc07,$acc03,$mask80
  1179. srwi $acc08,$acc04,7 # r1>>7
  1180. andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
  1181. srwi $acc09,$acc05,7
  1182. andc $acc13,$acc01,$mask80
  1183. srwi $acc10,$acc06,7
  1184. andc $acc14,$acc02,$mask80
  1185. srwi $acc11,$acc07,7
  1186. andc $acc15,$acc03,$mask80
  1187. sub $acc04,$acc04,$acc08 # r1-(r1>>7)
  1188. sub $acc05,$acc05,$acc09
  1189. sub $acc06,$acc06,$acc10
  1190. sub $acc07,$acc07,$acc11
  1191. add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
  1192. add $acc13,$acc13,$acc13
  1193. add $acc14,$acc14,$acc14
  1194. add $acc15,$acc15,$acc15
  1195. and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1196. and $acc05,$acc05,$mask1b
  1197. and $acc06,$acc06,$mask1b
  1198. and $acc07,$acc07,$mask1b
  1199. xor $acc04,$acc04,$acc12 # r4
  1200. xor $acc05,$acc05,$acc13
  1201. xor $acc06,$acc06,$acc14
  1202. xor $acc07,$acc07,$acc15
  1203. and $acc08,$acc04,$mask80 # r1=r4&0x80808080
  1204. and $acc09,$acc05,$mask80
  1205. srwi $acc12,$acc08,7 # r1>>7
  1206. and $acc10,$acc06,$mask80
  1207. srwi $acc13,$acc09,7
  1208. and $acc11,$acc07,$mask80
  1209. srwi $acc14,$acc10,7
  1210. sub $acc08,$acc08,$acc12 # r1-(r1>>7)
  1211. srwi $acc15,$acc11,7
  1212. sub $acc09,$acc09,$acc13
  1213. sub $acc10,$acc10,$acc14
  1214. sub $acc11,$acc11,$acc15
  1215. andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
  1216. andc $acc13,$acc05,$mask80
  1217. andc $acc14,$acc06,$mask80
  1218. andc $acc15,$acc07,$mask80
  1219. add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
  1220. add $acc13,$acc13,$acc13
  1221. add $acc14,$acc14,$acc14
  1222. add $acc15,$acc15,$acc15
  1223. and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
  1224. and $acc09,$acc09,$mask1b
  1225. and $acc10,$acc10,$mask1b
  1226. and $acc11,$acc11,$mask1b
  1227. xor $acc08,$acc08,$acc12 # r8
  1228. xor $acc09,$acc09,$acc13
  1229. xor $acc10,$acc10,$acc14
  1230. xor $acc11,$acc11,$acc15
  1231. xor $acc00,$acc00,$s0 # r2^r0
  1232. xor $acc01,$acc01,$s1
  1233. xor $acc02,$acc02,$s2
  1234. xor $acc03,$acc03,$s3
  1235. xor $acc04,$acc04,$s0 # r4^r0
  1236. xor $acc05,$acc05,$s1
  1237. xor $acc06,$acc06,$s2
  1238. xor $acc07,$acc07,$s3
  1239. ___
  1240. $code.=<<___;
  1241. rotrwi $s0,$s0,8 # = ROTATE(r0,8)
  1242. rotrwi $s1,$s1,8
  1243. xor $s0,$s0,$acc00 # ^= r2^r0
  1244. rotrwi $s2,$s2,8
  1245. xor $s1,$s1,$acc01
  1246. rotrwi $s3,$s3,8
  1247. xor $s2,$s2,$acc02
  1248. xor $s3,$s3,$acc03
  1249. xor $acc00,$acc00,$acc08
  1250. xor $acc01,$acc01,$acc09
  1251. xor $acc02,$acc02,$acc10
  1252. xor $acc03,$acc03,$acc11
  1253. xor $s0,$s0,$acc04 # ^= r4^r0
  1254. rotrwi $acc00,$acc00,24
  1255. xor $s1,$s1,$acc05
  1256. rotrwi $acc01,$acc01,24
  1257. xor $s2,$s2,$acc06
  1258. rotrwi $acc02,$acc02,24
  1259. xor $s3,$s3,$acc07
  1260. rotrwi $acc03,$acc03,24
  1261. xor $acc04,$acc04,$acc08
  1262. xor $acc05,$acc05,$acc09
  1263. xor $acc06,$acc06,$acc10
  1264. xor $acc07,$acc07,$acc11
  1265. xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
  1266. rotrwi $acc04,$acc04,16
  1267. xor $s1,$s1,$acc09
  1268. rotrwi $acc05,$acc05,16
  1269. xor $s2,$s2,$acc10
  1270. rotrwi $acc06,$acc06,16
  1271. xor $s3,$s3,$acc11
  1272. rotrwi $acc07,$acc07,16
  1273. xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
  1274. rotrwi $acc08,$acc08,8
  1275. xor $s1,$s1,$acc01
  1276. rotrwi $acc09,$acc09,8
  1277. xor $s2,$s2,$acc02
  1278. rotrwi $acc10,$acc10,8
  1279. xor $s3,$s3,$acc03
  1280. rotrwi $acc11,$acc11,8
  1281. xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
  1282. xor $s1,$s1,$acc05
  1283. xor $s2,$s2,$acc06
  1284. xor $s3,$s3,$acc07
  1285. xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
  1286. xor $s1,$s1,$acc09
  1287. xor $s2,$s2,$acc10
  1288. xor $s3,$s3,$acc11
  1289. b Ldec_compact_loop
  1290. .align 4
  1291. Ldec_compact_done:
  1292. xor $s0,$s0,$t0
  1293. xor $s1,$s1,$t1
  1294. xor $s2,$s2,$t2
  1295. xor $s3,$s3,$t3
  1296. blr
  1297. .long 0
  1298. .byte 0,12,0x14,0,0,0,0,0
  1299. .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
  1300. .align 7
  1301. ___
  1302. $code =~ s/\`([^\`]*)\`/eval $1/gem;
  1303. print $code;
  1304. close STDOUT;