aes-sparcv9.pl 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191
  1. #! /usr/bin/env perl
  2. # Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  11. # project. Rights for redistribution and usage in source and binary
  12. # forms are granted according to the License.
  13. # ====================================================================
  14. #
  15. # Version 1.1
  16. #
  17. # The major reason for undertaken effort was to mitigate the hazard of
  18. # cache-timing attack. This is [currently and initially!] addressed in
  19. # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
  20. # 2. References to them are scheduled for L2 cache latency, meaning
  21. # that the tables don't have to reside in L1 cache. Once again, this
  22. # is an initial draft and one should expect more countermeasures to
  23. # be implemented...
  24. #
  25. # Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
  26. # round.
  27. #
  28. # Even though performance was not the primary goal [on the contrary,
  29. # extra shifts "induced" by compressed S-box and longer loop epilogue
  30. # "induced" by scheduling for L2 have negative effect on performance],
  31. # the code turned out to run in ~23 cycles per processed byte en-/
  32. # decrypted with 128-bit key. This is pretty good result for code
  33. # with mentioned qualities and UltraSPARC core. Compared to Sun C
  34. # generated code my encrypt procedure runs just few percents faster,
  35. # while decrypt one - whole 50% faster [yes, Sun C failed to generate
  36. # optimal decrypt procedure]. Compared to GNU C generated code both
  37. # procedures are more than 60% faster:-)
  38. $output = pop and open STDOUT,">$output";
  39. $frame="STACK_FRAME";
  40. $bias="STACK_BIAS";
  41. $locals=16;
  42. $acc0="%l0";
  43. $acc1="%o0";
  44. $acc2="%o1";
  45. $acc3="%o2";
  46. $acc4="%l1";
  47. $acc5="%o3";
  48. $acc6="%o4";
  49. $acc7="%o5";
  50. $acc8="%l2";
  51. $acc9="%o7";
  52. $acc10="%g1";
  53. $acc11="%g2";
  54. $acc12="%l3";
  55. $acc13="%g3";
  56. $acc14="%g4";
  57. $acc15="%g5";
  58. $t0="%l4";
  59. $t1="%l5";
  60. $t2="%l6";
  61. $t3="%l7";
  62. $s0="%i0";
  63. $s1="%i1";
  64. $s2="%i2";
  65. $s3="%i3";
  66. $tbl="%i4";
  67. $key="%i5";
  68. $rounds="%i7"; # aliases with return address, which is off-loaded to stack
  69. sub _data_word()
  70. { my $i;
  71. while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
  72. }
  73. $code.=<<___;
  74. #include "sparc_arch.h"
  75. #ifdef __arch64__
  76. .register %g2,#scratch
  77. .register %g3,#scratch
  78. #endif
  79. .section ".text",#alloc,#execinstr
  80. .align 256
  81. AES_Te:
  82. ___
  83. &_data_word(
  84. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  85. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  86. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  87. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  88. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  89. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  90. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  91. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  92. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  93. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  94. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  95. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  96. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  97. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  98. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  99. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  100. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  101. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  102. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  103. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  104. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  105. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  106. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  107. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  108. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  109. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  110. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  111. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  112. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  113. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  114. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  115. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  116. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  117. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  118. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  119. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  120. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  121. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  122. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  123. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  124. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  125. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  126. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  127. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  128. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  129. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  130. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  131. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  132. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  133. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  134. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  135. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  136. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  137. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  138. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  139. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  140. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  141. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  142. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  143. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  144. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  145. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  146. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  147. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  148. $code.=<<___;
  149. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  150. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  151. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  152. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  153. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  154. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  155. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  156. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  157. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  158. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  159. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  160. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  161. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  162. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  163. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  164. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  165. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  166. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  167. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  168. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  169. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  170. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  171. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  172. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  173. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  174. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  175. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  176. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  177. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  178. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  179. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  180. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  181. .type AES_Te,#object
  182. .size AES_Te,(.-AES_Te)
  183. .align 64
  184. .skip 16
  185. _sparcv9_AES_encrypt:
  186. save %sp,-$frame-$locals,%sp
  187. stx %i7,[%sp+$bias+$frame+0] ! off-load return address
  188. ld [$key+240],$rounds
  189. ld [$key+0],$t0
  190. ld [$key+4],$t1 !
  191. ld [$key+8],$t2
  192. srl $rounds,1,$rounds
  193. xor $t0,$s0,$s0
  194. ld [$key+12],$t3
  195. srl $s0,21,$acc0
  196. xor $t1,$s1,$s1
  197. ld [$key+16],$t0
  198. srl $s1,13,$acc1 !
  199. xor $t2,$s2,$s2
  200. ld [$key+20],$t1
  201. xor $t3,$s3,$s3
  202. ld [$key+24],$t2
  203. and $acc0,2040,$acc0
  204. ld [$key+28],$t3
  205. nop
  206. .Lenc_loop:
  207. srl $s2,5,$acc2 !
  208. and $acc1,2040,$acc1
  209. ldx [$tbl+$acc0],$acc0
  210. sll $s3,3,$acc3
  211. and $acc2,2040,$acc2
  212. ldx [$tbl+$acc1],$acc1
  213. srl $s1,21,$acc4
  214. and $acc3,2040,$acc3
  215. ldx [$tbl+$acc2],$acc2 !
  216. srl $s2,13,$acc5
  217. and $acc4,2040,$acc4
  218. ldx [$tbl+$acc3],$acc3
  219. srl $s3,5,$acc6
  220. and $acc5,2040,$acc5
  221. ldx [$tbl+$acc4],$acc4
  222. fmovs %f0,%f0
  223. sll $s0,3,$acc7 !
  224. and $acc6,2040,$acc6
  225. ldx [$tbl+$acc5],$acc5
  226. srl $s2,21,$acc8
  227. and $acc7,2040,$acc7
  228. ldx [$tbl+$acc6],$acc6
  229. srl $s3,13,$acc9
  230. and $acc8,2040,$acc8
  231. ldx [$tbl+$acc7],$acc7 !
  232. srl $s0,5,$acc10
  233. and $acc9,2040,$acc9
  234. ldx [$tbl+$acc8],$acc8
  235. sll $s1,3,$acc11
  236. and $acc10,2040,$acc10
  237. ldx [$tbl+$acc9],$acc9
  238. fmovs %f0,%f0
  239. srl $s3,21,$acc12 !
  240. and $acc11,2040,$acc11
  241. ldx [$tbl+$acc10],$acc10
  242. srl $s0,13,$acc13
  243. and $acc12,2040,$acc12
  244. ldx [$tbl+$acc11],$acc11
  245. srl $s1,5,$acc14
  246. and $acc13,2040,$acc13
  247. ldx [$tbl+$acc12],$acc12 !
  248. sll $s2,3,$acc15
  249. and $acc14,2040,$acc14
  250. ldx [$tbl+$acc13],$acc13
  251. and $acc15,2040,$acc15
  252. add $key,32,$key
  253. ldx [$tbl+$acc14],$acc14
  254. fmovs %f0,%f0
  255. subcc $rounds,1,$rounds !
  256. ldx [$tbl+$acc15],$acc15
  257. bz,a,pn %icc,.Lenc_last
  258. add $tbl,2048,$rounds
  259. srlx $acc1,8,$acc1
  260. xor $acc0,$t0,$t0
  261. ld [$key+0],$s0
  262. fmovs %f0,%f0
  263. srlx $acc2,16,$acc2 !
  264. xor $acc1,$t0,$t0
  265. ld [$key+4],$s1
  266. srlx $acc3,24,$acc3
  267. xor $acc2,$t0,$t0
  268. ld [$key+8],$s2
  269. srlx $acc5,8,$acc5
  270. xor $acc3,$t0,$t0
  271. ld [$key+12],$s3 !
  272. srlx $acc6,16,$acc6
  273. xor $acc4,$t1,$t1
  274. fmovs %f0,%f0
  275. srlx $acc7,24,$acc7
  276. xor $acc5,$t1,$t1
  277. srlx $acc9,8,$acc9
  278. xor $acc6,$t1,$t1
  279. srlx $acc10,16,$acc10 !
  280. xor $acc7,$t1,$t1
  281. srlx $acc11,24,$acc11
  282. xor $acc8,$t2,$t2
  283. srlx $acc13,8,$acc13
  284. xor $acc9,$t2,$t2
  285. srlx $acc14,16,$acc14
  286. xor $acc10,$t2,$t2
  287. srlx $acc15,24,$acc15 !
  288. xor $acc11,$t2,$t2
  289. xor $acc12,$acc14,$acc14
  290. xor $acc13,$t3,$t3
  291. srl $t0,21,$acc0
  292. xor $acc14,$t3,$t3
  293. srl $t1,13,$acc1
  294. xor $acc15,$t3,$t3
  295. and $acc0,2040,$acc0 !
  296. srl $t2,5,$acc2
  297. and $acc1,2040,$acc1
  298. ldx [$tbl+$acc0],$acc0
  299. sll $t3,3,$acc3
  300. and $acc2,2040,$acc2
  301. ldx [$tbl+$acc1],$acc1
  302. fmovs %f0,%f0
  303. srl $t1,21,$acc4 !
  304. and $acc3,2040,$acc3
  305. ldx [$tbl+$acc2],$acc2
  306. srl $t2,13,$acc5
  307. and $acc4,2040,$acc4
  308. ldx [$tbl+$acc3],$acc3
  309. srl $t3,5,$acc6
  310. and $acc5,2040,$acc5
  311. ldx [$tbl+$acc4],$acc4 !
  312. sll $t0,3,$acc7
  313. and $acc6,2040,$acc6
  314. ldx [$tbl+$acc5],$acc5
  315. srl $t2,21,$acc8
  316. and $acc7,2040,$acc7
  317. ldx [$tbl+$acc6],$acc6
  318. fmovs %f0,%f0
  319. srl $t3,13,$acc9 !
  320. and $acc8,2040,$acc8
  321. ldx [$tbl+$acc7],$acc7
  322. srl $t0,5,$acc10
  323. and $acc9,2040,$acc9
  324. ldx [$tbl+$acc8],$acc8
  325. sll $t1,3,$acc11
  326. and $acc10,2040,$acc10
  327. ldx [$tbl+$acc9],$acc9 !
  328. srl $t3,21,$acc12
  329. and $acc11,2040,$acc11
  330. ldx [$tbl+$acc10],$acc10
  331. srl $t0,13,$acc13
  332. and $acc12,2040,$acc12
  333. ldx [$tbl+$acc11],$acc11
  334. fmovs %f0,%f0
  335. srl $t1,5,$acc14 !
  336. and $acc13,2040,$acc13
  337. ldx [$tbl+$acc12],$acc12
  338. sll $t2,3,$acc15
  339. and $acc14,2040,$acc14
  340. ldx [$tbl+$acc13],$acc13
  341. srlx $acc1,8,$acc1
  342. and $acc15,2040,$acc15
  343. ldx [$tbl+$acc14],$acc14 !
  344. srlx $acc2,16,$acc2
  345. xor $acc0,$s0,$s0
  346. ldx [$tbl+$acc15],$acc15
  347. srlx $acc3,24,$acc3
  348. xor $acc1,$s0,$s0
  349. ld [$key+16],$t0
  350. fmovs %f0,%f0
  351. srlx $acc5,8,$acc5 !
  352. xor $acc2,$s0,$s0
  353. ld [$key+20],$t1
  354. srlx $acc6,16,$acc6
  355. xor $acc3,$s0,$s0
  356. ld [$key+24],$t2
  357. srlx $acc7,24,$acc7
  358. xor $acc4,$s1,$s1
  359. ld [$key+28],$t3 !
  360. srlx $acc9,8,$acc9
  361. xor $acc5,$s1,$s1
  362. ldx [$tbl+2048+0],%g0 ! prefetch te4
  363. srlx $acc10,16,$acc10
  364. xor $acc6,$s1,$s1
  365. ldx [$tbl+2048+32],%g0 ! prefetch te4
  366. srlx $acc11,24,$acc11
  367. xor $acc7,$s1,$s1
  368. ldx [$tbl+2048+64],%g0 ! prefetch te4
  369. srlx $acc13,8,$acc13
  370. xor $acc8,$s2,$s2
  371. ldx [$tbl+2048+96],%g0 ! prefetch te4
  372. srlx $acc14,16,$acc14 !
  373. xor $acc9,$s2,$s2
  374. ldx [$tbl+2048+128],%g0 ! prefetch te4
  375. srlx $acc15,24,$acc15
  376. xor $acc10,$s2,$s2
  377. ldx [$tbl+2048+160],%g0 ! prefetch te4
  378. srl $s0,21,$acc0
  379. xor $acc11,$s2,$s2
  380. ldx [$tbl+2048+192],%g0 ! prefetch te4
  381. xor $acc12,$acc14,$acc14
  382. xor $acc13,$s3,$s3
  383. ldx [$tbl+2048+224],%g0 ! prefetch te4
  384. srl $s1,13,$acc1 !
  385. xor $acc14,$s3,$s3
  386. xor $acc15,$s3,$s3
  387. ba .Lenc_loop
  388. and $acc0,2040,$acc0
  389. .align 32
  390. .Lenc_last:
  391. srlx $acc1,8,$acc1 !
  392. xor $acc0,$t0,$t0
  393. ld [$key+0],$s0
  394. srlx $acc2,16,$acc2
  395. xor $acc1,$t0,$t0
  396. ld [$key+4],$s1
  397. srlx $acc3,24,$acc3
  398. xor $acc2,$t0,$t0
  399. ld [$key+8],$s2 !
  400. srlx $acc5,8,$acc5
  401. xor $acc3,$t0,$t0
  402. ld [$key+12],$s3
  403. srlx $acc6,16,$acc6
  404. xor $acc4,$t1,$t1
  405. srlx $acc7,24,$acc7
  406. xor $acc5,$t1,$t1
  407. srlx $acc9,8,$acc9 !
  408. xor $acc6,$t1,$t1
  409. srlx $acc10,16,$acc10
  410. xor $acc7,$t1,$t1
  411. srlx $acc11,24,$acc11
  412. xor $acc8,$t2,$t2
  413. srlx $acc13,8,$acc13
  414. xor $acc9,$t2,$t2
  415. srlx $acc14,16,$acc14 !
  416. xor $acc10,$t2,$t2
  417. srlx $acc15,24,$acc15
  418. xor $acc11,$t2,$t2
  419. xor $acc12,$acc14,$acc14
  420. xor $acc13,$t3,$t3
  421. srl $t0,24,$acc0
  422. xor $acc14,$t3,$t3
  423. srl $t1,16,$acc1 !
  424. xor $acc15,$t3,$t3
  425. srl $t2,8,$acc2
  426. and $acc1,255,$acc1
  427. ldub [$rounds+$acc0],$acc0
  428. srl $t1,24,$acc4
  429. and $acc2,255,$acc2
  430. ldub [$rounds+$acc1],$acc1
  431. srl $t2,16,$acc5 !
  432. and $t3,255,$acc3
  433. ldub [$rounds+$acc2],$acc2
  434. ldub [$rounds+$acc3],$acc3
  435. srl $t3,8,$acc6
  436. and $acc5,255,$acc5
  437. ldub [$rounds+$acc4],$acc4
  438. fmovs %f0,%f0
  439. srl $t2,24,$acc8 !
  440. and $acc6,255,$acc6
  441. ldub [$rounds+$acc5],$acc5
  442. srl $t3,16,$acc9
  443. and $t0,255,$acc7
  444. ldub [$rounds+$acc6],$acc6
  445. ldub [$rounds+$acc7],$acc7
  446. fmovs %f0,%f0
  447. srl $t0,8,$acc10 !
  448. and $acc9,255,$acc9
  449. ldub [$rounds+$acc8],$acc8
  450. srl $t3,24,$acc12
  451. and $acc10,255,$acc10
  452. ldub [$rounds+$acc9],$acc9
  453. srl $t0,16,$acc13
  454. and $t1,255,$acc11
  455. ldub [$rounds+$acc10],$acc10 !
  456. srl $t1,8,$acc14
  457. and $acc13,255,$acc13
  458. ldub [$rounds+$acc11],$acc11
  459. ldub [$rounds+$acc12],$acc12
  460. and $acc14,255,$acc14
  461. ldub [$rounds+$acc13],$acc13
  462. and $t2,255,$acc15
  463. ldub [$rounds+$acc14],$acc14 !
  464. sll $acc0,24,$acc0
  465. xor $acc3,$s0,$s0
  466. ldub [$rounds+$acc15],$acc15
  467. sll $acc1,16,$acc1
  468. xor $acc0,$s0,$s0
  469. ldx [%sp+$bias+$frame+0],%i7 ! restore return address
  470. fmovs %f0,%f0
  471. sll $acc2,8,$acc2 !
  472. xor $acc1,$s0,$s0
  473. sll $acc4,24,$acc4
  474. xor $acc2,$s0,$s0
  475. sll $acc5,16,$acc5
  476. xor $acc7,$s1,$s1
  477. sll $acc6,8,$acc6
  478. xor $acc4,$s1,$s1
  479. sll $acc8,24,$acc8 !
  480. xor $acc5,$s1,$s1
  481. sll $acc9,16,$acc9
  482. xor $acc11,$s2,$s2
  483. sll $acc10,8,$acc10
  484. xor $acc6,$s1,$s1
  485. sll $acc12,24,$acc12
  486. xor $acc8,$s2,$s2
  487. sll $acc13,16,$acc13 !
  488. xor $acc9,$s2,$s2
  489. sll $acc14,8,$acc14
  490. xor $acc10,$s2,$s2
  491. xor $acc12,$acc14,$acc14
  492. xor $acc13,$s3,$s3
  493. xor $acc14,$s3,$s3
  494. xor $acc15,$s3,$s3
  495. ret
  496. restore
  497. .type _sparcv9_AES_encrypt,#function
  498. .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
  499. .align 32
  500. .globl AES_encrypt
  501. AES_encrypt:
  502. or %o0,%o1,%g1
  503. andcc %g1,3,%g0
  504. bnz,pn %xcc,.Lunaligned_enc
  505. save %sp,-$frame,%sp
  506. ld [%i0+0],%o0
  507. ld [%i0+4],%o1
  508. ld [%i0+8],%o2
  509. ld [%i0+12],%o3
  510. 1: call .+8
  511. add %o7,AES_Te-1b,%o4
  512. call _sparcv9_AES_encrypt
  513. mov %i2,%o5
  514. st %o0,[%i1+0]
  515. st %o1,[%i1+4]
  516. st %o2,[%i1+8]
  517. st %o3,[%i1+12]
  518. ret
  519. restore
  520. .align 32
  521. .Lunaligned_enc:
  522. ldub [%i0+0],%l0
  523. ldub [%i0+1],%l1
  524. ldub [%i0+2],%l2
  525. sll %l0,24,%l0
  526. ldub [%i0+3],%l3
  527. sll %l1,16,%l1
  528. ldub [%i0+4],%l4
  529. sll %l2,8,%l2
  530. or %l1,%l0,%l0
  531. ldub [%i0+5],%l5
  532. sll %l4,24,%l4
  533. or %l3,%l2,%l2
  534. ldub [%i0+6],%l6
  535. sll %l5,16,%l5
  536. or %l0,%l2,%o0
  537. ldub [%i0+7],%l7
  538. sll %l6,8,%l6
  539. or %l5,%l4,%l4
  540. ldub [%i0+8],%l0
  541. or %l7,%l6,%l6
  542. ldub [%i0+9],%l1
  543. or %l4,%l6,%o1
  544. ldub [%i0+10],%l2
  545. sll %l0,24,%l0
  546. ldub [%i0+11],%l3
  547. sll %l1,16,%l1
  548. ldub [%i0+12],%l4
  549. sll %l2,8,%l2
  550. or %l1,%l0,%l0
  551. ldub [%i0+13],%l5
  552. sll %l4,24,%l4
  553. or %l3,%l2,%l2
  554. ldub [%i0+14],%l6
  555. sll %l5,16,%l5
  556. or %l0,%l2,%o2
  557. ldub [%i0+15],%l7
  558. sll %l6,8,%l6
  559. or %l5,%l4,%l4
  560. or %l7,%l6,%l6
  561. or %l4,%l6,%o3
  562. 1: call .+8
  563. add %o7,AES_Te-1b,%o4
  564. call _sparcv9_AES_encrypt
  565. mov %i2,%o5
  566. srl %o0,24,%l0
  567. srl %o0,16,%l1
  568. stb %l0,[%i1+0]
  569. srl %o0,8,%l2
  570. stb %l1,[%i1+1]
  571. stb %l2,[%i1+2]
  572. srl %o1,24,%l4
  573. stb %o0,[%i1+3]
  574. srl %o1,16,%l5
  575. stb %l4,[%i1+4]
  576. srl %o1,8,%l6
  577. stb %l5,[%i1+5]
  578. stb %l6,[%i1+6]
  579. srl %o2,24,%l0
  580. stb %o1,[%i1+7]
  581. srl %o2,16,%l1
  582. stb %l0,[%i1+8]
  583. srl %o2,8,%l2
  584. stb %l1,[%i1+9]
  585. stb %l2,[%i1+10]
  586. srl %o3,24,%l4
  587. stb %o2,[%i1+11]
  588. srl %o3,16,%l5
  589. stb %l4,[%i1+12]
  590. srl %o3,8,%l6
  591. stb %l5,[%i1+13]
  592. stb %l6,[%i1+14]
  593. stb %o3,[%i1+15]
  594. ret
  595. restore
  596. .type AES_encrypt,#function
  597. .size AES_encrypt,(.-AES_encrypt)
  598. ___
  599. $code.=<<___;
  600. .align 256
  601. AES_Td:
  602. ___
  603. &_data_word(
  604. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  605. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  606. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  607. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  608. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  609. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  610. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  611. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  612. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  613. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  614. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  615. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  616. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  617. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  618. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  619. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  620. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  621. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  622. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  623. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  624. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  625. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  626. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  627. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  628. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  629. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  630. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  631. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  632. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  633. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  634. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  635. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  636. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  637. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  638. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  639. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  640. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  641. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  642. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  643. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  644. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  645. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  646. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  647. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  648. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  649. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  650. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  651. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  652. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  653. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  654. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  655. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  656. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  657. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  658. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  659. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  660. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  661. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  662. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  663. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  664. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  665. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  666. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  667. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  668. $code.=<<___;
  669. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  670. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  671. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  672. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  673. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  674. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  675. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  676. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  677. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  678. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  679. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  680. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  681. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  682. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  683. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  684. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  685. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  686. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  687. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  688. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  689. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  690. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  691. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  692. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  693. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  694. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  695. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  696. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  697. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  698. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  699. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  700. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  701. .type AES_Td,#object
  702. .size AES_Td,(.-AES_Td)
  703. .align 64
  704. .skip 16
  705. _sparcv9_AES_decrypt:
  706. save %sp,-$frame-$locals,%sp
  707. stx %i7,[%sp+$bias+$frame+0] ! off-load return address
  708. ld [$key+240],$rounds
  709. ld [$key+0],$t0
  710. ld [$key+4],$t1 !
  711. ld [$key+8],$t2
  712. ld [$key+12],$t3
  713. srl $rounds,1,$rounds
  714. xor $t0,$s0,$s0
  715. ld [$key+16],$t0
  716. xor $t1,$s1,$s1
  717. ld [$key+20],$t1
  718. srl $s0,21,$acc0 !
  719. xor $t2,$s2,$s2
  720. ld [$key+24],$t2
  721. xor $t3,$s3,$s3
  722. and $acc0,2040,$acc0
  723. ld [$key+28],$t3
  724. srl $s3,13,$acc1
  725. nop
  726. .Ldec_loop:
  727. srl $s2,5,$acc2 !
  728. and $acc1,2040,$acc1
  729. ldx [$tbl+$acc0],$acc0
  730. sll $s1,3,$acc3
  731. and $acc2,2040,$acc2
  732. ldx [$tbl+$acc1],$acc1
  733. srl $s1,21,$acc4
  734. and $acc3,2040,$acc3
  735. ldx [$tbl+$acc2],$acc2 !
  736. srl $s0,13,$acc5
  737. and $acc4,2040,$acc4
  738. ldx [$tbl+$acc3],$acc3
  739. srl $s3,5,$acc6
  740. and $acc5,2040,$acc5
  741. ldx [$tbl+$acc4],$acc4
  742. fmovs %f0,%f0
  743. sll $s2,3,$acc7 !
  744. and $acc6,2040,$acc6
  745. ldx [$tbl+$acc5],$acc5
  746. srl $s2,21,$acc8
  747. and $acc7,2040,$acc7
  748. ldx [$tbl+$acc6],$acc6
  749. srl $s1,13,$acc9
  750. and $acc8,2040,$acc8
  751. ldx [$tbl+$acc7],$acc7 !
  752. srl $s0,5,$acc10
  753. and $acc9,2040,$acc9
  754. ldx [$tbl+$acc8],$acc8
  755. sll $s3,3,$acc11
  756. and $acc10,2040,$acc10
  757. ldx [$tbl+$acc9],$acc9
  758. fmovs %f0,%f0
  759. srl $s3,21,$acc12 !
  760. and $acc11,2040,$acc11
  761. ldx [$tbl+$acc10],$acc10
  762. srl $s2,13,$acc13
  763. and $acc12,2040,$acc12
  764. ldx [$tbl+$acc11],$acc11
  765. srl $s1,5,$acc14
  766. and $acc13,2040,$acc13
  767. ldx [$tbl+$acc12],$acc12 !
  768. sll $s0,3,$acc15
  769. and $acc14,2040,$acc14
  770. ldx [$tbl+$acc13],$acc13
  771. and $acc15,2040,$acc15
  772. add $key,32,$key
  773. ldx [$tbl+$acc14],$acc14
  774. fmovs %f0,%f0
  775. subcc $rounds,1,$rounds !
  776. ldx [$tbl+$acc15],$acc15
  777. bz,a,pn %icc,.Ldec_last
  778. add $tbl,2048,$rounds
  779. srlx $acc1,8,$acc1
  780. xor $acc0,$t0,$t0
  781. ld [$key+0],$s0
  782. fmovs %f0,%f0
  783. srlx $acc2,16,$acc2 !
  784. xor $acc1,$t0,$t0
  785. ld [$key+4],$s1
  786. srlx $acc3,24,$acc3
  787. xor $acc2,$t0,$t0
  788. ld [$key+8],$s2
  789. srlx $acc5,8,$acc5
  790. xor $acc3,$t0,$t0
  791. ld [$key+12],$s3 !
  792. srlx $acc6,16,$acc6
  793. xor $acc4,$t1,$t1
  794. fmovs %f0,%f0
  795. srlx $acc7,24,$acc7
  796. xor $acc5,$t1,$t1
  797. srlx $acc9,8,$acc9
  798. xor $acc6,$t1,$t1
  799. srlx $acc10,16,$acc10 !
  800. xor $acc7,$t1,$t1
  801. srlx $acc11,24,$acc11
  802. xor $acc8,$t2,$t2
  803. srlx $acc13,8,$acc13
  804. xor $acc9,$t2,$t2
  805. srlx $acc14,16,$acc14
  806. xor $acc10,$t2,$t2
  807. srlx $acc15,24,$acc15 !
  808. xor $acc11,$t2,$t2
  809. xor $acc12,$acc14,$acc14
  810. xor $acc13,$t3,$t3
  811. srl $t0,21,$acc0
  812. xor $acc14,$t3,$t3
  813. xor $acc15,$t3,$t3
  814. srl $t3,13,$acc1
  815. and $acc0,2040,$acc0 !
  816. srl $t2,5,$acc2
  817. and $acc1,2040,$acc1
  818. ldx [$tbl+$acc0],$acc0
  819. sll $t1,3,$acc3
  820. and $acc2,2040,$acc2
  821. ldx [$tbl+$acc1],$acc1
  822. fmovs %f0,%f0
  823. srl $t1,21,$acc4 !
  824. and $acc3,2040,$acc3
  825. ldx [$tbl+$acc2],$acc2
  826. srl $t0,13,$acc5
  827. and $acc4,2040,$acc4
  828. ldx [$tbl+$acc3],$acc3
  829. srl $t3,5,$acc6
  830. and $acc5,2040,$acc5
  831. ldx [$tbl+$acc4],$acc4 !
  832. sll $t2,3,$acc7
  833. and $acc6,2040,$acc6
  834. ldx [$tbl+$acc5],$acc5
  835. srl $t2,21,$acc8
  836. and $acc7,2040,$acc7
  837. ldx [$tbl+$acc6],$acc6
  838. fmovs %f0,%f0
  839. srl $t1,13,$acc9 !
  840. and $acc8,2040,$acc8
  841. ldx [$tbl+$acc7],$acc7
  842. srl $t0,5,$acc10
  843. and $acc9,2040,$acc9
  844. ldx [$tbl+$acc8],$acc8
  845. sll $t3,3,$acc11
  846. and $acc10,2040,$acc10
  847. ldx [$tbl+$acc9],$acc9 !
  848. srl $t3,21,$acc12
  849. and $acc11,2040,$acc11
  850. ldx [$tbl+$acc10],$acc10
  851. srl $t2,13,$acc13
  852. and $acc12,2040,$acc12
  853. ldx [$tbl+$acc11],$acc11
  854. fmovs %f0,%f0
  855. srl $t1,5,$acc14 !
  856. and $acc13,2040,$acc13
  857. ldx [$tbl+$acc12],$acc12
  858. sll $t0,3,$acc15
  859. and $acc14,2040,$acc14
  860. ldx [$tbl+$acc13],$acc13
  861. srlx $acc1,8,$acc1
  862. and $acc15,2040,$acc15
  863. ldx [$tbl+$acc14],$acc14 !
  864. srlx $acc2,16,$acc2
  865. xor $acc0,$s0,$s0
  866. ldx [$tbl+$acc15],$acc15
  867. srlx $acc3,24,$acc3
  868. xor $acc1,$s0,$s0
  869. ld [$key+16],$t0
  870. fmovs %f0,%f0
  871. srlx $acc5,8,$acc5 !
  872. xor $acc2,$s0,$s0
  873. ld [$key+20],$t1
  874. srlx $acc6,16,$acc6
  875. xor $acc3,$s0,$s0
  876. ld [$key+24],$t2
  877. srlx $acc7,24,$acc7
  878. xor $acc4,$s1,$s1
  879. ld [$key+28],$t3 !
  880. srlx $acc9,8,$acc9
  881. xor $acc5,$s1,$s1
  882. ldx [$tbl+2048+0],%g0 ! prefetch td4
  883. srlx $acc10,16,$acc10
  884. xor $acc6,$s1,$s1
  885. ldx [$tbl+2048+32],%g0 ! prefetch td4
  886. srlx $acc11,24,$acc11
  887. xor $acc7,$s1,$s1
  888. ldx [$tbl+2048+64],%g0 ! prefetch td4
  889. srlx $acc13,8,$acc13
  890. xor $acc8,$s2,$s2
  891. ldx [$tbl+2048+96],%g0 ! prefetch td4
  892. srlx $acc14,16,$acc14 !
  893. xor $acc9,$s2,$s2
  894. ldx [$tbl+2048+128],%g0 ! prefetch td4
  895. srlx $acc15,24,$acc15
  896. xor $acc10,$s2,$s2
  897. ldx [$tbl+2048+160],%g0 ! prefetch td4
  898. srl $s0,21,$acc0
  899. xor $acc11,$s2,$s2
  900. ldx [$tbl+2048+192],%g0 ! prefetch td4
  901. xor $acc12,$acc14,$acc14
  902. xor $acc13,$s3,$s3
  903. ldx [$tbl+2048+224],%g0 ! prefetch td4
  904. and $acc0,2040,$acc0 !
  905. xor $acc14,$s3,$s3
  906. xor $acc15,$s3,$s3
  907. ba .Ldec_loop
  908. srl $s3,13,$acc1
  909. .align 32
  910. .Ldec_last:
  911. srlx $acc1,8,$acc1 !
  912. xor $acc0,$t0,$t0
  913. ld [$key+0],$s0
  914. srlx $acc2,16,$acc2
  915. xor $acc1,$t0,$t0
  916. ld [$key+4],$s1
  917. srlx $acc3,24,$acc3
  918. xor $acc2,$t0,$t0
  919. ld [$key+8],$s2 !
  920. srlx $acc5,8,$acc5
  921. xor $acc3,$t0,$t0
  922. ld [$key+12],$s3
  923. srlx $acc6,16,$acc6
  924. xor $acc4,$t1,$t1
  925. srlx $acc7,24,$acc7
  926. xor $acc5,$t1,$t1
  927. srlx $acc9,8,$acc9 !
  928. xor $acc6,$t1,$t1
  929. srlx $acc10,16,$acc10
  930. xor $acc7,$t1,$t1
  931. srlx $acc11,24,$acc11
  932. xor $acc8,$t2,$t2
  933. srlx $acc13,8,$acc13
  934. xor $acc9,$t2,$t2
  935. srlx $acc14,16,$acc14 !
  936. xor $acc10,$t2,$t2
  937. srlx $acc15,24,$acc15
  938. xor $acc11,$t2,$t2
  939. xor $acc12,$acc14,$acc14
  940. xor $acc13,$t3,$t3
  941. srl $t0,24,$acc0
  942. xor $acc14,$t3,$t3
  943. xor $acc15,$t3,$t3 !
  944. srl $t3,16,$acc1
  945. srl $t2,8,$acc2
  946. and $acc1,255,$acc1
  947. ldub [$rounds+$acc0],$acc0
  948. srl $t1,24,$acc4
  949. and $acc2,255,$acc2
  950. ldub [$rounds+$acc1],$acc1
  951. srl $t0,16,$acc5 !
  952. and $t1,255,$acc3
  953. ldub [$rounds+$acc2],$acc2
  954. ldub [$rounds+$acc3],$acc3
  955. srl $t3,8,$acc6
  956. and $acc5,255,$acc5
  957. ldub [$rounds+$acc4],$acc4
  958. fmovs %f0,%f0
  959. srl $t2,24,$acc8 !
  960. and $acc6,255,$acc6
  961. ldub [$rounds+$acc5],$acc5
  962. srl $t1,16,$acc9
  963. and $t2,255,$acc7
  964. ldub [$rounds+$acc6],$acc6
  965. ldub [$rounds+$acc7],$acc7
  966. fmovs %f0,%f0
  967. srl $t0,8,$acc10 !
  968. and $acc9,255,$acc9
  969. ldub [$rounds+$acc8],$acc8
  970. srl $t3,24,$acc12
  971. and $acc10,255,$acc10
  972. ldub [$rounds+$acc9],$acc9
  973. srl $t2,16,$acc13
  974. and $t3,255,$acc11
  975. ldub [$rounds+$acc10],$acc10 !
  976. srl $t1,8,$acc14
  977. and $acc13,255,$acc13
  978. ldub [$rounds+$acc11],$acc11
  979. ldub [$rounds+$acc12],$acc12
  980. and $acc14,255,$acc14
  981. ldub [$rounds+$acc13],$acc13
  982. and $t0,255,$acc15
  983. ldub [$rounds+$acc14],$acc14 !
  984. sll $acc0,24,$acc0
  985. xor $acc3,$s0,$s0
  986. ldub [$rounds+$acc15],$acc15
  987. sll $acc1,16,$acc1
  988. xor $acc0,$s0,$s0
  989. ldx [%sp+$bias+$frame+0],%i7 ! restore return address
  990. fmovs %f0,%f0
  991. sll $acc2,8,$acc2 !
  992. xor $acc1,$s0,$s0
  993. sll $acc4,24,$acc4
  994. xor $acc2,$s0,$s0
  995. sll $acc5,16,$acc5
  996. xor $acc7,$s1,$s1
  997. sll $acc6,8,$acc6
  998. xor $acc4,$s1,$s1
  999. sll $acc8,24,$acc8 !
  1000. xor $acc5,$s1,$s1
  1001. sll $acc9,16,$acc9
  1002. xor $acc11,$s2,$s2
  1003. sll $acc10,8,$acc10
  1004. xor $acc6,$s1,$s1
  1005. sll $acc12,24,$acc12
  1006. xor $acc8,$s2,$s2
  1007. sll $acc13,16,$acc13 !
  1008. xor $acc9,$s2,$s2
  1009. sll $acc14,8,$acc14
  1010. xor $acc10,$s2,$s2
  1011. xor $acc12,$acc14,$acc14
  1012. xor $acc13,$s3,$s3
  1013. xor $acc14,$s3,$s3
  1014. xor $acc15,$s3,$s3
  1015. ret
  1016. restore
  1017. .type _sparcv9_AES_decrypt,#function
  1018. .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
  1019. .align 32
  1020. .globl AES_decrypt
  1021. AES_decrypt:
  1022. or %o0,%o1,%g1
  1023. andcc %g1,3,%g0
  1024. bnz,pn %xcc,.Lunaligned_dec
  1025. save %sp,-$frame,%sp
  1026. ld [%i0+0],%o0
  1027. ld [%i0+4],%o1
  1028. ld [%i0+8],%o2
  1029. ld [%i0+12],%o3
  1030. 1: call .+8
  1031. add %o7,AES_Td-1b,%o4
  1032. call _sparcv9_AES_decrypt
  1033. mov %i2,%o5
  1034. st %o0,[%i1+0]
  1035. st %o1,[%i1+4]
  1036. st %o2,[%i1+8]
  1037. st %o3,[%i1+12]
  1038. ret
  1039. restore
  1040. .align 32
  1041. .Lunaligned_dec:
  1042. ldub [%i0+0],%l0
  1043. ldub [%i0+1],%l1
  1044. ldub [%i0+2],%l2
  1045. sll %l0,24,%l0
  1046. ldub [%i0+3],%l3
  1047. sll %l1,16,%l1
  1048. ldub [%i0+4],%l4
  1049. sll %l2,8,%l2
  1050. or %l1,%l0,%l0
  1051. ldub [%i0+5],%l5
  1052. sll %l4,24,%l4
  1053. or %l3,%l2,%l2
  1054. ldub [%i0+6],%l6
  1055. sll %l5,16,%l5
  1056. or %l0,%l2,%o0
  1057. ldub [%i0+7],%l7
  1058. sll %l6,8,%l6
  1059. or %l5,%l4,%l4
  1060. ldub [%i0+8],%l0
  1061. or %l7,%l6,%l6
  1062. ldub [%i0+9],%l1
  1063. or %l4,%l6,%o1
  1064. ldub [%i0+10],%l2
  1065. sll %l0,24,%l0
  1066. ldub [%i0+11],%l3
  1067. sll %l1,16,%l1
  1068. ldub [%i0+12],%l4
  1069. sll %l2,8,%l2
  1070. or %l1,%l0,%l0
  1071. ldub [%i0+13],%l5
  1072. sll %l4,24,%l4
  1073. or %l3,%l2,%l2
  1074. ldub [%i0+14],%l6
  1075. sll %l5,16,%l5
  1076. or %l0,%l2,%o2
  1077. ldub [%i0+15],%l7
  1078. sll %l6,8,%l6
  1079. or %l5,%l4,%l4
  1080. or %l7,%l6,%l6
  1081. or %l4,%l6,%o3
  1082. 1: call .+8
  1083. add %o7,AES_Td-1b,%o4
  1084. call _sparcv9_AES_decrypt
  1085. mov %i2,%o5
  1086. srl %o0,24,%l0
  1087. srl %o0,16,%l1
  1088. stb %l0,[%i1+0]
  1089. srl %o0,8,%l2
  1090. stb %l1,[%i1+1]
  1091. stb %l2,[%i1+2]
  1092. srl %o1,24,%l4
  1093. stb %o0,[%i1+3]
  1094. srl %o1,16,%l5
  1095. stb %l4,[%i1+4]
  1096. srl %o1,8,%l6
  1097. stb %l5,[%i1+5]
  1098. stb %l6,[%i1+6]
  1099. srl %o2,24,%l0
  1100. stb %o1,[%i1+7]
  1101. srl %o2,16,%l1
  1102. stb %l0,[%i1+8]
  1103. srl %o2,8,%l2
  1104. stb %l1,[%i1+9]
  1105. stb %l2,[%i1+10]
  1106. srl %o3,24,%l4
  1107. stb %o2,[%i1+11]
  1108. srl %o3,16,%l5
  1109. stb %l4,[%i1+12]
  1110. srl %o3,8,%l6
  1111. stb %l5,[%i1+13]
  1112. stb %l6,[%i1+14]
  1113. stb %o3,[%i1+15]
  1114. ret
  1115. restore
  1116. .type AES_decrypt,#function
  1117. .size AES_decrypt,(.-AES_decrypt)
  1118. ___
  1119. # fmovs instructions substituting for FP nops were originally added
  1120. # to meet specific instruction alignment requirements to maximize ILP.
  1121. # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
  1122. # undesired effect, so just omit them and sacrifice some portion of
  1123. # percent in performance...
  1124. $code =~ s/fmovs.*$//gm;
  1125. print $code;
  1126. close STDOUT or die "error closing STDOUT: $!"; # ensure flush