aes-sparcv9.pl 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182
  1. #!/usr/bin/env perl
  2. #
  3. # ====================================================================
  4. # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  5. # project. Rights for redistribution and usage in source and binary
  6. # forms are granted according to the OpenSSL license.
  7. # ====================================================================
  8. #
  9. # Version 1.1
  10. #
  11. # The major reason for undertaken effort was to mitigate the hazard of
  12. # cache-timing attack. This is [currently and initially!] addressed in
  13. # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
  14. # 2. References to them are scheduled for L2 cache latency, meaning
  15. # that the tables don't have to reside in L1 cache. Once again, this
  16. # is an initial draft and one should expect more countermeasures to
  17. # be implemented...
  18. #
  19. # Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
  20. # round.
  21. #
  22. # Even though performance was not the primary goal [on the contrary,
  23. # extra shifts "induced" by compressed S-box and longer loop epilogue
  24. # "induced" by scheduling for L2 have negative effect on performance],
  25. # the code turned out to run in ~23 cycles per processed byte en-/
  26. # decrypted with 128-bit key. This is pretty good result for code
  27. # with mentioned qualities and UltraSPARC core. Compared to Sun C
  28. # generated code my encrypt procedure runs just few percents faster,
  29. # while decrypt one - whole 50% faster [yes, Sun C failed to generate
  30. # optimal decrypt procedure]. Compared to GNU C generated code both
  31. # procedures are more than 60% faster:-)
  32. $bits=32;
  33. for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
  34. if ($bits==64) { $bias=2047; $frame=192; }
  35. else { $bias=0; $frame=112; }
  36. $locals=16;
  37. $acc0="%l0";
  38. $acc1="%o0";
  39. $acc2="%o1";
  40. $acc3="%o2";
  41. $acc4="%l1";
  42. $acc5="%o3";
  43. $acc6="%o4";
  44. $acc7="%o5";
  45. $acc8="%l2";
  46. $acc9="%o7";
  47. $acc10="%g1";
  48. $acc11="%g2";
  49. $acc12="%l3";
  50. $acc13="%g3";
  51. $acc14="%g4";
  52. $acc15="%g5";
  53. $t0="%l4";
  54. $t1="%l5";
  55. $t2="%l6";
  56. $t3="%l7";
  57. $s0="%i0";
  58. $s1="%i1";
  59. $s2="%i2";
  60. $s3="%i3";
  61. $tbl="%i4";
  62. $key="%i5";
  63. $rounds="%i7"; # aliases with return address, which is off-loaded to stack
  64. sub _data_word()
  65. { my $i;
  66. while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
  67. }
  68. $code.=<<___ if ($bits==64);
  69. .register %g2,#scratch
  70. .register %g3,#scratch
  71. ___
  72. $code.=<<___;
  73. .section ".text",#alloc,#execinstr
  74. .align 256
  75. AES_Te:
  76. ___
  77. &_data_word(
  78. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  79. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  80. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  81. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  82. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  83. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  84. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  85. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  86. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  87. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  88. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  89. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  90. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  91. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  92. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  93. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  94. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  95. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  96. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  97. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  98. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  99. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  100. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  101. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  102. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  103. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  104. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  105. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  106. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  107. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  108. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  109. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  110. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  111. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  112. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  113. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  114. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  115. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  116. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  117. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  118. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  119. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  120. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  121. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  122. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  123. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  124. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  125. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  126. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  127. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  128. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  129. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  130. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  131. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  132. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  133. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  134. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  135. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  136. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  137. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  138. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  139. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  140. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  141. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  142. $code.=<<___;
  143. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  144. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  145. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  146. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  147. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  148. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  149. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  150. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  151. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  152. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  153. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  154. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  155. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  156. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  157. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  158. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  159. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  160. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  161. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  162. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  163. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  164. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  165. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  166. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  167. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  168. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  169. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  170. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  171. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  172. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  173. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  174. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  175. .type AES_Te,#object
  176. .size AES_Te,(.-AES_Te)
  177. .align 64
  178. .skip 16
  179. _sparcv9_AES_encrypt:
  180. save %sp,-$frame-$locals,%sp
  181. stx %i7,[%sp+$bias+$frame+0] ! off-load return address
  182. ld [$key+240],$rounds
  183. ld [$key+0],$t0
  184. ld [$key+4],$t1 !
  185. ld [$key+8],$t2
  186. srl $rounds,1,$rounds
  187. xor $t0,$s0,$s0
  188. ld [$key+12],$t3
  189. srl $s0,21,$acc0
  190. xor $t1,$s1,$s1
  191. ld [$key+16],$t0
  192. srl $s1,13,$acc1 !
  193. xor $t2,$s2,$s2
  194. ld [$key+20],$t1
  195. xor $t3,$s3,$s3
  196. ld [$key+24],$t2
  197. and $acc0,2040,$acc0
  198. ld [$key+28],$t3
  199. nop
  200. .Lenc_loop:
  201. srl $s2,5,$acc2 !
  202. and $acc1,2040,$acc1
  203. ldx [$tbl+$acc0],$acc0
  204. sll $s3,3,$acc3
  205. and $acc2,2040,$acc2
  206. ldx [$tbl+$acc1],$acc1
  207. srl $s1,21,$acc4
  208. and $acc3,2040,$acc3
  209. ldx [$tbl+$acc2],$acc2 !
  210. srl $s2,13,$acc5
  211. and $acc4,2040,$acc4
  212. ldx [$tbl+$acc3],$acc3
  213. srl $s3,5,$acc6
  214. and $acc5,2040,$acc5
  215. ldx [$tbl+$acc4],$acc4
  216. fmovs %f0,%f0
  217. sll $s0,3,$acc7 !
  218. and $acc6,2040,$acc6
  219. ldx [$tbl+$acc5],$acc5
  220. srl $s2,21,$acc8
  221. and $acc7,2040,$acc7
  222. ldx [$tbl+$acc6],$acc6
  223. srl $s3,13,$acc9
  224. and $acc8,2040,$acc8
  225. ldx [$tbl+$acc7],$acc7 !
  226. srl $s0,5,$acc10
  227. and $acc9,2040,$acc9
  228. ldx [$tbl+$acc8],$acc8
  229. sll $s1,3,$acc11
  230. and $acc10,2040,$acc10
  231. ldx [$tbl+$acc9],$acc9
  232. fmovs %f0,%f0
  233. srl $s3,21,$acc12 !
  234. and $acc11,2040,$acc11
  235. ldx [$tbl+$acc10],$acc10
  236. srl $s0,13,$acc13
  237. and $acc12,2040,$acc12
  238. ldx [$tbl+$acc11],$acc11
  239. srl $s1,5,$acc14
  240. and $acc13,2040,$acc13
  241. ldx [$tbl+$acc12],$acc12 !
  242. sll $s2,3,$acc15
  243. and $acc14,2040,$acc14
  244. ldx [$tbl+$acc13],$acc13
  245. and $acc15,2040,$acc15
  246. add $key,32,$key
  247. ldx [$tbl+$acc14],$acc14
  248. fmovs %f0,%f0
  249. subcc $rounds,1,$rounds !
  250. ldx [$tbl+$acc15],$acc15
  251. bz,a,pn %icc,.Lenc_last
  252. add $tbl,2048,$rounds
  253. srlx $acc1,8,$acc1
  254. xor $acc0,$t0,$t0
  255. ld [$key+0],$s0
  256. fmovs %f0,%f0
  257. srlx $acc2,16,$acc2 !
  258. xor $acc1,$t0,$t0
  259. ld [$key+4],$s1
  260. srlx $acc3,24,$acc3
  261. xor $acc2,$t0,$t0
  262. ld [$key+8],$s2
  263. srlx $acc5,8,$acc5
  264. xor $acc3,$t0,$t0
  265. ld [$key+12],$s3 !
  266. srlx $acc6,16,$acc6
  267. xor $acc4,$t1,$t1
  268. fmovs %f0,%f0
  269. srlx $acc7,24,$acc7
  270. xor $acc5,$t1,$t1
  271. srlx $acc9,8,$acc9
  272. xor $acc6,$t1,$t1
  273. srlx $acc10,16,$acc10 !
  274. xor $acc7,$t1,$t1
  275. srlx $acc11,24,$acc11
  276. xor $acc8,$t2,$t2
  277. srlx $acc13,8,$acc13
  278. xor $acc9,$t2,$t2
  279. srlx $acc14,16,$acc14
  280. xor $acc10,$t2,$t2
  281. srlx $acc15,24,$acc15 !
  282. xor $acc11,$t2,$t2
  283. xor $acc12,$acc14,$acc14
  284. xor $acc13,$t3,$t3
  285. srl $t0,21,$acc0
  286. xor $acc14,$t3,$t3
  287. srl $t1,13,$acc1
  288. xor $acc15,$t3,$t3
  289. and $acc0,2040,$acc0 !
  290. srl $t2,5,$acc2
  291. and $acc1,2040,$acc1
  292. ldx [$tbl+$acc0],$acc0
  293. sll $t3,3,$acc3
  294. and $acc2,2040,$acc2
  295. ldx [$tbl+$acc1],$acc1
  296. fmovs %f0,%f0
  297. srl $t1,21,$acc4 !
  298. and $acc3,2040,$acc3
  299. ldx [$tbl+$acc2],$acc2
  300. srl $t2,13,$acc5
  301. and $acc4,2040,$acc4
  302. ldx [$tbl+$acc3],$acc3
  303. srl $t3,5,$acc6
  304. and $acc5,2040,$acc5
  305. ldx [$tbl+$acc4],$acc4 !
  306. sll $t0,3,$acc7
  307. and $acc6,2040,$acc6
  308. ldx [$tbl+$acc5],$acc5
  309. srl $t2,21,$acc8
  310. and $acc7,2040,$acc7
  311. ldx [$tbl+$acc6],$acc6
  312. fmovs %f0,%f0
  313. srl $t3,13,$acc9 !
  314. and $acc8,2040,$acc8
  315. ldx [$tbl+$acc7],$acc7
  316. srl $t0,5,$acc10
  317. and $acc9,2040,$acc9
  318. ldx [$tbl+$acc8],$acc8
  319. sll $t1,3,$acc11
  320. and $acc10,2040,$acc10
  321. ldx [$tbl+$acc9],$acc9 !
  322. srl $t3,21,$acc12
  323. and $acc11,2040,$acc11
  324. ldx [$tbl+$acc10],$acc10
  325. srl $t0,13,$acc13
  326. and $acc12,2040,$acc12
  327. ldx [$tbl+$acc11],$acc11
  328. fmovs %f0,%f0
  329. srl $t1,5,$acc14 !
  330. and $acc13,2040,$acc13
  331. ldx [$tbl+$acc12],$acc12
  332. sll $t2,3,$acc15
  333. and $acc14,2040,$acc14
  334. ldx [$tbl+$acc13],$acc13
  335. srlx $acc1,8,$acc1
  336. and $acc15,2040,$acc15
  337. ldx [$tbl+$acc14],$acc14 !
  338. srlx $acc2,16,$acc2
  339. xor $acc0,$s0,$s0
  340. ldx [$tbl+$acc15],$acc15
  341. srlx $acc3,24,$acc3
  342. xor $acc1,$s0,$s0
  343. ld [$key+16],$t0
  344. fmovs %f0,%f0
  345. srlx $acc5,8,$acc5 !
  346. xor $acc2,$s0,$s0
  347. ld [$key+20],$t1
  348. srlx $acc6,16,$acc6
  349. xor $acc3,$s0,$s0
  350. ld [$key+24],$t2
  351. srlx $acc7,24,$acc7
  352. xor $acc4,$s1,$s1
  353. ld [$key+28],$t3 !
  354. srlx $acc9,8,$acc9
  355. xor $acc5,$s1,$s1
  356. ldx [$tbl+2048+0],%g0 ! prefetch te4
  357. srlx $acc10,16,$acc10
  358. xor $acc6,$s1,$s1
  359. ldx [$tbl+2048+32],%g0 ! prefetch te4
  360. srlx $acc11,24,$acc11
  361. xor $acc7,$s1,$s1
  362. ldx [$tbl+2048+64],%g0 ! prefetch te4
  363. srlx $acc13,8,$acc13
  364. xor $acc8,$s2,$s2
  365. ldx [$tbl+2048+96],%g0 ! prefetch te4
  366. srlx $acc14,16,$acc14 !
  367. xor $acc9,$s2,$s2
  368. ldx [$tbl+2048+128],%g0 ! prefetch te4
  369. srlx $acc15,24,$acc15
  370. xor $acc10,$s2,$s2
  371. ldx [$tbl+2048+160],%g0 ! prefetch te4
  372. srl $s0,21,$acc0
  373. xor $acc11,$s2,$s2
  374. ldx [$tbl+2048+192],%g0 ! prefetch te4
  375. xor $acc12,$acc14,$acc14
  376. xor $acc13,$s3,$s3
  377. ldx [$tbl+2048+224],%g0 ! prefetch te4
  378. srl $s1,13,$acc1 !
  379. xor $acc14,$s3,$s3
  380. xor $acc15,$s3,$s3
  381. ba .Lenc_loop
  382. and $acc0,2040,$acc0
  383. .align 32
  384. .Lenc_last:
  385. srlx $acc1,8,$acc1 !
  386. xor $acc0,$t0,$t0
  387. ld [$key+0],$s0
  388. srlx $acc2,16,$acc2
  389. xor $acc1,$t0,$t0
  390. ld [$key+4],$s1
  391. srlx $acc3,24,$acc3
  392. xor $acc2,$t0,$t0
  393. ld [$key+8],$s2 !
  394. srlx $acc5,8,$acc5
  395. xor $acc3,$t0,$t0
  396. ld [$key+12],$s3
  397. srlx $acc6,16,$acc6
  398. xor $acc4,$t1,$t1
  399. srlx $acc7,24,$acc7
  400. xor $acc5,$t1,$t1
  401. srlx $acc9,8,$acc9 !
  402. xor $acc6,$t1,$t1
  403. srlx $acc10,16,$acc10
  404. xor $acc7,$t1,$t1
  405. srlx $acc11,24,$acc11
  406. xor $acc8,$t2,$t2
  407. srlx $acc13,8,$acc13
  408. xor $acc9,$t2,$t2
  409. srlx $acc14,16,$acc14 !
  410. xor $acc10,$t2,$t2
  411. srlx $acc15,24,$acc15
  412. xor $acc11,$t2,$t2
  413. xor $acc12,$acc14,$acc14
  414. xor $acc13,$t3,$t3
  415. srl $t0,24,$acc0
  416. xor $acc14,$t3,$t3
  417. srl $t1,16,$acc1 !
  418. xor $acc15,$t3,$t3
  419. srl $t2,8,$acc2
  420. and $acc1,255,$acc1
  421. ldub [$rounds+$acc0],$acc0
  422. srl $t1,24,$acc4
  423. and $acc2,255,$acc2
  424. ldub [$rounds+$acc1],$acc1
  425. srl $t2,16,$acc5 !
  426. and $t3,255,$acc3
  427. ldub [$rounds+$acc2],$acc2
  428. ldub [$rounds+$acc3],$acc3
  429. srl $t3,8,$acc6
  430. and $acc5,255,$acc5
  431. ldub [$rounds+$acc4],$acc4
  432. fmovs %f0,%f0
  433. srl $t2,24,$acc8 !
  434. and $acc6,255,$acc6
  435. ldub [$rounds+$acc5],$acc5
  436. srl $t3,16,$acc9
  437. and $t0,255,$acc7
  438. ldub [$rounds+$acc6],$acc6
  439. ldub [$rounds+$acc7],$acc7
  440. fmovs %f0,%f0
  441. srl $t0,8,$acc10 !
  442. and $acc9,255,$acc9
  443. ldub [$rounds+$acc8],$acc8
  444. srl $t3,24,$acc12
  445. and $acc10,255,$acc10
  446. ldub [$rounds+$acc9],$acc9
  447. srl $t0,16,$acc13
  448. and $t1,255,$acc11
  449. ldub [$rounds+$acc10],$acc10 !
  450. srl $t1,8,$acc14
  451. and $acc13,255,$acc13
  452. ldub [$rounds+$acc11],$acc11
  453. ldub [$rounds+$acc12],$acc12
  454. and $acc14,255,$acc14
  455. ldub [$rounds+$acc13],$acc13
  456. and $t2,255,$acc15
  457. ldub [$rounds+$acc14],$acc14 !
  458. sll $acc0,24,$acc0
  459. xor $acc3,$s0,$s0
  460. ldub [$rounds+$acc15],$acc15
  461. sll $acc1,16,$acc1
  462. xor $acc0,$s0,$s0
  463. ldx [%sp+$bias+$frame+0],%i7 ! restore return address
  464. fmovs %f0,%f0
  465. sll $acc2,8,$acc2 !
  466. xor $acc1,$s0,$s0
  467. sll $acc4,24,$acc4
  468. xor $acc2,$s0,$s0
  469. sll $acc5,16,$acc5
  470. xor $acc7,$s1,$s1
  471. sll $acc6,8,$acc6
  472. xor $acc4,$s1,$s1
  473. sll $acc8,24,$acc8 !
  474. xor $acc5,$s1,$s1
  475. sll $acc9,16,$acc9
  476. xor $acc11,$s2,$s2
  477. sll $acc10,8,$acc10
  478. xor $acc6,$s1,$s1
  479. sll $acc12,24,$acc12
  480. xor $acc8,$s2,$s2
  481. sll $acc13,16,$acc13 !
  482. xor $acc9,$s2,$s2
  483. sll $acc14,8,$acc14
  484. xor $acc10,$s2,$s2
  485. xor $acc12,$acc14,$acc14
  486. xor $acc13,$s3,$s3
  487. xor $acc14,$s3,$s3
  488. xor $acc15,$s3,$s3
  489. ret
  490. restore
  491. .type _sparcv9_AES_encrypt,#function
  492. .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
  493. .align 32
  494. .globl AES_encrypt
  495. AES_encrypt:
  496. or %o0,%o1,%g1
  497. andcc %g1,3,%g0
  498. bnz,pn %xcc,.Lunaligned_enc
  499. save %sp,-$frame,%sp
  500. ld [%i0+0],%o0
  501. ld [%i0+4],%o1
  502. ld [%i0+8],%o2
  503. ld [%i0+12],%o3
  504. 1: call .+8
  505. add %o7,AES_Te-1b,%o4
  506. call _sparcv9_AES_encrypt
  507. mov %i2,%o5
  508. st %o0,[%i1+0]
  509. st %o1,[%i1+4]
  510. st %o2,[%i1+8]
  511. st %o3,[%i1+12]
  512. ret
  513. restore
  514. .align 32
  515. .Lunaligned_enc:
  516. ldub [%i0+0],%l0
  517. ldub [%i0+1],%l1
  518. ldub [%i0+2],%l2
  519. sll %l0,24,%l0
  520. ldub [%i0+3],%l3
  521. sll %l1,16,%l1
  522. ldub [%i0+4],%l4
  523. sll %l2,8,%l2
  524. or %l1,%l0,%l0
  525. ldub [%i0+5],%l5
  526. sll %l4,24,%l4
  527. or %l3,%l2,%l2
  528. ldub [%i0+6],%l6
  529. sll %l5,16,%l5
  530. or %l0,%l2,%o0
  531. ldub [%i0+7],%l7
  532. sll %l6,8,%l6
  533. or %l5,%l4,%l4
  534. ldub [%i0+8],%l0
  535. or %l7,%l6,%l6
  536. ldub [%i0+9],%l1
  537. or %l4,%l6,%o1
  538. ldub [%i0+10],%l2
  539. sll %l0,24,%l0
  540. ldub [%i0+11],%l3
  541. sll %l1,16,%l1
  542. ldub [%i0+12],%l4
  543. sll %l2,8,%l2
  544. or %l1,%l0,%l0
  545. ldub [%i0+13],%l5
  546. sll %l4,24,%l4
  547. or %l3,%l2,%l2
  548. ldub [%i0+14],%l6
  549. sll %l5,16,%l5
  550. or %l0,%l2,%o2
  551. ldub [%i0+15],%l7
  552. sll %l6,8,%l6
  553. or %l5,%l4,%l4
  554. or %l7,%l6,%l6
  555. or %l4,%l6,%o3
  556. 1: call .+8
  557. add %o7,AES_Te-1b,%o4
  558. call _sparcv9_AES_encrypt
  559. mov %i2,%o5
  560. srl %o0,24,%l0
  561. srl %o0,16,%l1
  562. stb %l0,[%i1+0]
  563. srl %o0,8,%l2
  564. stb %l1,[%i1+1]
  565. stb %l2,[%i1+2]
  566. srl %o1,24,%l4
  567. stb %o0,[%i1+3]
  568. srl %o1,16,%l5
  569. stb %l4,[%i1+4]
  570. srl %o1,8,%l6
  571. stb %l5,[%i1+5]
  572. stb %l6,[%i1+6]
  573. srl %o2,24,%l0
  574. stb %o1,[%i1+7]
  575. srl %o2,16,%l1
  576. stb %l0,[%i1+8]
  577. srl %o2,8,%l2
  578. stb %l1,[%i1+9]
  579. stb %l2,[%i1+10]
  580. srl %o3,24,%l4
  581. stb %o2,[%i1+11]
  582. srl %o3,16,%l5
  583. stb %l4,[%i1+12]
  584. srl %o3,8,%l6
  585. stb %l5,[%i1+13]
  586. stb %l6,[%i1+14]
  587. stb %o3,[%i1+15]
  588. ret
  589. restore
  590. .type AES_encrypt,#function
  591. .size AES_encrypt,(.-AES_encrypt)
  592. ___
  593. $code.=<<___;
  594. .align 256
  595. AES_Td:
  596. ___
  597. &_data_word(
  598. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  599. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  600. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  601. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  602. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  603. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  604. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  605. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  606. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  607. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  608. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  609. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  610. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  611. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  612. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  613. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  614. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  615. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  616. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  617. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  618. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  619. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  620. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  621. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  622. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  623. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  624. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  625. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  626. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  627. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  628. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  629. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  630. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  631. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  632. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  633. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  634. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  635. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  636. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  637. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  638. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  639. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  640. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  641. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  642. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  643. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  644. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  645. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  646. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  647. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  648. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  649. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  650. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  651. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  652. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  653. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  654. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  655. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  656. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  657. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  658. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  659. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  660. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  661. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  662. $code.=<<___;
  663. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  664. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  665. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  666. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  667. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  668. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  669. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  670. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  671. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  672. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  673. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  674. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  675. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  676. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  677. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  678. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  679. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  680. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  681. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  682. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  683. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  684. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  685. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  686. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  687. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  688. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  689. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  690. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  691. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  692. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  693. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  694. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  695. .type AES_Td,#object
  696. .size AES_Td,(.-AES_Td)
  697. .align 64
  698. .skip 16
  699. _sparcv9_AES_decrypt:
  700. save %sp,-$frame-$locals,%sp
  701. stx %i7,[%sp+$bias+$frame+0] ! off-load return address
  702. ld [$key+240],$rounds
  703. ld [$key+0],$t0
  704. ld [$key+4],$t1 !
  705. ld [$key+8],$t2
  706. ld [$key+12],$t3
  707. srl $rounds,1,$rounds
  708. xor $t0,$s0,$s0
  709. ld [$key+16],$t0
  710. xor $t1,$s1,$s1
  711. ld [$key+20],$t1
  712. srl $s0,21,$acc0 !
  713. xor $t2,$s2,$s2
  714. ld [$key+24],$t2
  715. xor $t3,$s3,$s3
  716. and $acc0,2040,$acc0
  717. ld [$key+28],$t3
  718. srl $s3,13,$acc1
  719. nop
  720. .Ldec_loop:
  721. srl $s2,5,$acc2 !
  722. and $acc1,2040,$acc1
  723. ldx [$tbl+$acc0],$acc0
  724. sll $s1,3,$acc3
  725. and $acc2,2040,$acc2
  726. ldx [$tbl+$acc1],$acc1
  727. srl $s1,21,$acc4
  728. and $acc3,2040,$acc3
  729. ldx [$tbl+$acc2],$acc2 !
  730. srl $s0,13,$acc5
  731. and $acc4,2040,$acc4
  732. ldx [$tbl+$acc3],$acc3
  733. srl $s3,5,$acc6
  734. and $acc5,2040,$acc5
  735. ldx [$tbl+$acc4],$acc4
  736. fmovs %f0,%f0
  737. sll $s2,3,$acc7 !
  738. and $acc6,2040,$acc6
  739. ldx [$tbl+$acc5],$acc5
  740. srl $s2,21,$acc8
  741. and $acc7,2040,$acc7
  742. ldx [$tbl+$acc6],$acc6
  743. srl $s1,13,$acc9
  744. and $acc8,2040,$acc8
  745. ldx [$tbl+$acc7],$acc7 !
  746. srl $s0,5,$acc10
  747. and $acc9,2040,$acc9
  748. ldx [$tbl+$acc8],$acc8
  749. sll $s3,3,$acc11
  750. and $acc10,2040,$acc10
  751. ldx [$tbl+$acc9],$acc9
  752. fmovs %f0,%f0
  753. srl $s3,21,$acc12 !
  754. and $acc11,2040,$acc11
  755. ldx [$tbl+$acc10],$acc10
  756. srl $s2,13,$acc13
  757. and $acc12,2040,$acc12
  758. ldx [$tbl+$acc11],$acc11
  759. srl $s1,5,$acc14
  760. and $acc13,2040,$acc13
  761. ldx [$tbl+$acc12],$acc12 !
  762. sll $s0,3,$acc15
  763. and $acc14,2040,$acc14
  764. ldx [$tbl+$acc13],$acc13
  765. and $acc15,2040,$acc15
  766. add $key,32,$key
  767. ldx [$tbl+$acc14],$acc14
  768. fmovs %f0,%f0
  769. subcc $rounds,1,$rounds !
  770. ldx [$tbl+$acc15],$acc15
  771. bz,a,pn %icc,.Ldec_last
  772. add $tbl,2048,$rounds
  773. srlx $acc1,8,$acc1
  774. xor $acc0,$t0,$t0
  775. ld [$key+0],$s0
  776. fmovs %f0,%f0
  777. srlx $acc2,16,$acc2 !
  778. xor $acc1,$t0,$t0
  779. ld [$key+4],$s1
  780. srlx $acc3,24,$acc3
  781. xor $acc2,$t0,$t0
  782. ld [$key+8],$s2
  783. srlx $acc5,8,$acc5
  784. xor $acc3,$t0,$t0
  785. ld [$key+12],$s3 !
  786. srlx $acc6,16,$acc6
  787. xor $acc4,$t1,$t1
  788. fmovs %f0,%f0
  789. srlx $acc7,24,$acc7
  790. xor $acc5,$t1,$t1
  791. srlx $acc9,8,$acc9
  792. xor $acc6,$t1,$t1
  793. srlx $acc10,16,$acc10 !
  794. xor $acc7,$t1,$t1
  795. srlx $acc11,24,$acc11
  796. xor $acc8,$t2,$t2
  797. srlx $acc13,8,$acc13
  798. xor $acc9,$t2,$t2
  799. srlx $acc14,16,$acc14
  800. xor $acc10,$t2,$t2
  801. srlx $acc15,24,$acc15 !
  802. xor $acc11,$t2,$t2
  803. xor $acc12,$acc14,$acc14
  804. xor $acc13,$t3,$t3
  805. srl $t0,21,$acc0
  806. xor $acc14,$t3,$t3
  807. xor $acc15,$t3,$t3
  808. srl $t3,13,$acc1
  809. and $acc0,2040,$acc0 !
  810. srl $t2,5,$acc2
  811. and $acc1,2040,$acc1
  812. ldx [$tbl+$acc0],$acc0
  813. sll $t1,3,$acc3
  814. and $acc2,2040,$acc2
  815. ldx [$tbl+$acc1],$acc1
  816. fmovs %f0,%f0
  817. srl $t1,21,$acc4 !
  818. and $acc3,2040,$acc3
  819. ldx [$tbl+$acc2],$acc2
  820. srl $t0,13,$acc5
  821. and $acc4,2040,$acc4
  822. ldx [$tbl+$acc3],$acc3
  823. srl $t3,5,$acc6
  824. and $acc5,2040,$acc5
  825. ldx [$tbl+$acc4],$acc4 !
  826. sll $t2,3,$acc7
  827. and $acc6,2040,$acc6
  828. ldx [$tbl+$acc5],$acc5
  829. srl $t2,21,$acc8
  830. and $acc7,2040,$acc7
  831. ldx [$tbl+$acc6],$acc6
  832. fmovs %f0,%f0
  833. srl $t1,13,$acc9 !
  834. and $acc8,2040,$acc8
  835. ldx [$tbl+$acc7],$acc7
  836. srl $t0,5,$acc10
  837. and $acc9,2040,$acc9
  838. ldx [$tbl+$acc8],$acc8
  839. sll $t3,3,$acc11
  840. and $acc10,2040,$acc10
  841. ldx [$tbl+$acc9],$acc9 !
  842. srl $t3,21,$acc12
  843. and $acc11,2040,$acc11
  844. ldx [$tbl+$acc10],$acc10
  845. srl $t2,13,$acc13
  846. and $acc12,2040,$acc12
  847. ldx [$tbl+$acc11],$acc11
  848. fmovs %f0,%f0
  849. srl $t1,5,$acc14 !
  850. and $acc13,2040,$acc13
  851. ldx [$tbl+$acc12],$acc12
  852. sll $t0,3,$acc15
  853. and $acc14,2040,$acc14
  854. ldx [$tbl+$acc13],$acc13
  855. srlx $acc1,8,$acc1
  856. and $acc15,2040,$acc15
  857. ldx [$tbl+$acc14],$acc14 !
  858. srlx $acc2,16,$acc2
  859. xor $acc0,$s0,$s0
  860. ldx [$tbl+$acc15],$acc15
  861. srlx $acc3,24,$acc3
  862. xor $acc1,$s0,$s0
  863. ld [$key+16],$t0
  864. fmovs %f0,%f0
  865. srlx $acc5,8,$acc5 !
  866. xor $acc2,$s0,$s0
  867. ld [$key+20],$t1
  868. srlx $acc6,16,$acc6
  869. xor $acc3,$s0,$s0
  870. ld [$key+24],$t2
  871. srlx $acc7,24,$acc7
  872. xor $acc4,$s1,$s1
  873. ld [$key+28],$t3 !
  874. srlx $acc9,8,$acc9
  875. xor $acc5,$s1,$s1
  876. ldx [$tbl+2048+0],%g0 ! prefetch td4
  877. srlx $acc10,16,$acc10
  878. xor $acc6,$s1,$s1
  879. ldx [$tbl+2048+32],%g0 ! prefetch td4
  880. srlx $acc11,24,$acc11
  881. xor $acc7,$s1,$s1
  882. ldx [$tbl+2048+64],%g0 ! prefetch td4
  883. srlx $acc13,8,$acc13
  884. xor $acc8,$s2,$s2
  885. ldx [$tbl+2048+96],%g0 ! prefetch td4
  886. srlx $acc14,16,$acc14 !
  887. xor $acc9,$s2,$s2
  888. ldx [$tbl+2048+128],%g0 ! prefetch td4
  889. srlx $acc15,24,$acc15
  890. xor $acc10,$s2,$s2
  891. ldx [$tbl+2048+160],%g0 ! prefetch td4
  892. srl $s0,21,$acc0
  893. xor $acc11,$s2,$s2
  894. ldx [$tbl+2048+192],%g0 ! prefetch td4
  895. xor $acc12,$acc14,$acc14
  896. xor $acc13,$s3,$s3
  897. ldx [$tbl+2048+224],%g0 ! prefetch td4
  898. and $acc0,2040,$acc0 !
  899. xor $acc14,$s3,$s3
  900. xor $acc15,$s3,$s3
  901. ba .Ldec_loop
  902. srl $s3,13,$acc1
  903. .align 32
  904. .Ldec_last:
  905. srlx $acc1,8,$acc1 !
  906. xor $acc0,$t0,$t0
  907. ld [$key+0],$s0
  908. srlx $acc2,16,$acc2
  909. xor $acc1,$t0,$t0
  910. ld [$key+4],$s1
  911. srlx $acc3,24,$acc3
  912. xor $acc2,$t0,$t0
  913. ld [$key+8],$s2 !
  914. srlx $acc5,8,$acc5
  915. xor $acc3,$t0,$t0
  916. ld [$key+12],$s3
  917. srlx $acc6,16,$acc6
  918. xor $acc4,$t1,$t1
  919. srlx $acc7,24,$acc7
  920. xor $acc5,$t1,$t1
  921. srlx $acc9,8,$acc9 !
  922. xor $acc6,$t1,$t1
  923. srlx $acc10,16,$acc10
  924. xor $acc7,$t1,$t1
  925. srlx $acc11,24,$acc11
  926. xor $acc8,$t2,$t2
  927. srlx $acc13,8,$acc13
  928. xor $acc9,$t2,$t2
  929. srlx $acc14,16,$acc14 !
  930. xor $acc10,$t2,$t2
  931. srlx $acc15,24,$acc15
  932. xor $acc11,$t2,$t2
  933. xor $acc12,$acc14,$acc14
  934. xor $acc13,$t3,$t3
  935. srl $t0,24,$acc0
  936. xor $acc14,$t3,$t3
  937. xor $acc15,$t3,$t3 !
  938. srl $t3,16,$acc1
  939. srl $t2,8,$acc2
  940. and $acc1,255,$acc1
  941. ldub [$rounds+$acc0],$acc0
  942. srl $t1,24,$acc4
  943. and $acc2,255,$acc2
  944. ldub [$rounds+$acc1],$acc1
  945. srl $t0,16,$acc5 !
  946. and $t1,255,$acc3
  947. ldub [$rounds+$acc2],$acc2
  948. ldub [$rounds+$acc3],$acc3
  949. srl $t3,8,$acc6
  950. and $acc5,255,$acc5
  951. ldub [$rounds+$acc4],$acc4
  952. fmovs %f0,%f0
  953. srl $t2,24,$acc8 !
  954. and $acc6,255,$acc6
  955. ldub [$rounds+$acc5],$acc5
  956. srl $t1,16,$acc9
  957. and $t2,255,$acc7
  958. ldub [$rounds+$acc6],$acc6
  959. ldub [$rounds+$acc7],$acc7
  960. fmovs %f0,%f0
  961. srl $t0,8,$acc10 !
  962. and $acc9,255,$acc9
  963. ldub [$rounds+$acc8],$acc8
  964. srl $t3,24,$acc12
  965. and $acc10,255,$acc10
  966. ldub [$rounds+$acc9],$acc9
  967. srl $t2,16,$acc13
  968. and $t3,255,$acc11
  969. ldub [$rounds+$acc10],$acc10 !
  970. srl $t1,8,$acc14
  971. and $acc13,255,$acc13
  972. ldub [$rounds+$acc11],$acc11
  973. ldub [$rounds+$acc12],$acc12
  974. and $acc14,255,$acc14
  975. ldub [$rounds+$acc13],$acc13
  976. and $t0,255,$acc15
  977. ldub [$rounds+$acc14],$acc14 !
  978. sll $acc0,24,$acc0
  979. xor $acc3,$s0,$s0
  980. ldub [$rounds+$acc15],$acc15
  981. sll $acc1,16,$acc1
  982. xor $acc0,$s0,$s0
  983. ldx [%sp+$bias+$frame+0],%i7 ! restore return address
  984. fmovs %f0,%f0
  985. sll $acc2,8,$acc2 !
  986. xor $acc1,$s0,$s0
  987. sll $acc4,24,$acc4
  988. xor $acc2,$s0,$s0
  989. sll $acc5,16,$acc5
  990. xor $acc7,$s1,$s1
  991. sll $acc6,8,$acc6
  992. xor $acc4,$s1,$s1
  993. sll $acc8,24,$acc8 !
  994. xor $acc5,$s1,$s1
  995. sll $acc9,16,$acc9
  996. xor $acc11,$s2,$s2
  997. sll $acc10,8,$acc10
  998. xor $acc6,$s1,$s1
  999. sll $acc12,24,$acc12
  1000. xor $acc8,$s2,$s2
  1001. sll $acc13,16,$acc13 !
  1002. xor $acc9,$s2,$s2
  1003. sll $acc14,8,$acc14
  1004. xor $acc10,$s2,$s2
  1005. xor $acc12,$acc14,$acc14
  1006. xor $acc13,$s3,$s3
  1007. xor $acc14,$s3,$s3
  1008. xor $acc15,$s3,$s3
  1009. ret
  1010. restore
  1011. .type _sparcv9_AES_decrypt,#function
  1012. .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
  1013. .align 32
  1014. .globl AES_decrypt
  1015. AES_decrypt:
  1016. or %o0,%o1,%g1
  1017. andcc %g1,3,%g0
  1018. bnz,pn %xcc,.Lunaligned_dec
  1019. save %sp,-$frame,%sp
  1020. ld [%i0+0],%o0
  1021. ld [%i0+4],%o1
  1022. ld [%i0+8],%o2
  1023. ld [%i0+12],%o3
  1024. 1: call .+8
  1025. add %o7,AES_Td-1b,%o4
  1026. call _sparcv9_AES_decrypt
  1027. mov %i2,%o5
  1028. st %o0,[%i1+0]
  1029. st %o1,[%i1+4]
  1030. st %o2,[%i1+8]
  1031. st %o3,[%i1+12]
  1032. ret
  1033. restore
  1034. .align 32
  1035. .Lunaligned_dec:
  1036. ldub [%i0+0],%l0
  1037. ldub [%i0+1],%l1
  1038. ldub [%i0+2],%l2
  1039. sll %l0,24,%l0
  1040. ldub [%i0+3],%l3
  1041. sll %l1,16,%l1
  1042. ldub [%i0+4],%l4
  1043. sll %l2,8,%l2
  1044. or %l1,%l0,%l0
  1045. ldub [%i0+5],%l5
  1046. sll %l4,24,%l4
  1047. or %l3,%l2,%l2
  1048. ldub [%i0+6],%l6
  1049. sll %l5,16,%l5
  1050. or %l0,%l2,%o0
  1051. ldub [%i0+7],%l7
  1052. sll %l6,8,%l6
  1053. or %l5,%l4,%l4
  1054. ldub [%i0+8],%l0
  1055. or %l7,%l6,%l6
  1056. ldub [%i0+9],%l1
  1057. or %l4,%l6,%o1
  1058. ldub [%i0+10],%l2
  1059. sll %l0,24,%l0
  1060. ldub [%i0+11],%l3
  1061. sll %l1,16,%l1
  1062. ldub [%i0+12],%l4
  1063. sll %l2,8,%l2
  1064. or %l1,%l0,%l0
  1065. ldub [%i0+13],%l5
  1066. sll %l4,24,%l4
  1067. or %l3,%l2,%l2
  1068. ldub [%i0+14],%l6
  1069. sll %l5,16,%l5
  1070. or %l0,%l2,%o2
  1071. ldub [%i0+15],%l7
  1072. sll %l6,8,%l6
  1073. or %l5,%l4,%l4
  1074. or %l7,%l6,%l6
  1075. or %l4,%l6,%o3
  1076. 1: call .+8
  1077. add %o7,AES_Td-1b,%o4
  1078. call _sparcv9_AES_decrypt
  1079. mov %i2,%o5
  1080. srl %o0,24,%l0
  1081. srl %o0,16,%l1
  1082. stb %l0,[%i1+0]
  1083. srl %o0,8,%l2
  1084. stb %l1,[%i1+1]
  1085. stb %l2,[%i1+2]
  1086. srl %o1,24,%l4
  1087. stb %o0,[%i1+3]
  1088. srl %o1,16,%l5
  1089. stb %l4,[%i1+4]
  1090. srl %o1,8,%l6
  1091. stb %l5,[%i1+5]
  1092. stb %l6,[%i1+6]
  1093. srl %o2,24,%l0
  1094. stb %o1,[%i1+7]
  1095. srl %o2,16,%l1
  1096. stb %l0,[%i1+8]
  1097. srl %o2,8,%l2
  1098. stb %l1,[%i1+9]
  1099. stb %l2,[%i1+10]
  1100. srl %o3,24,%l4
  1101. stb %o2,[%i1+11]
  1102. srl %o3,16,%l5
  1103. stb %l4,[%i1+12]
  1104. srl %o3,8,%l6
  1105. stb %l5,[%i1+13]
  1106. stb %l6,[%i1+14]
  1107. stb %o3,[%i1+15]
  1108. ret
  1109. restore
  1110. .type AES_decrypt,#function
  1111. .size AES_decrypt,(.-AES_decrypt)
  1112. ___
  1113. # fmovs instructions substituting for FP nops were originally added
  1114. # to meet specific instruction alignment requirements to maximize ILP.
  1115. # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
  1116. # undesired effect, so just omit them and sacrifice some portion of
  1117. # percent in performance...
  1118. $code =~ s/fmovs.*$//gm;
  1119. print $code;
  1120. close STDOUT; # ensure flush