2
0

aes-sparcv9.pl 30 KB


  1. #! /usr/bin/env perl
  2. # Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the OpenSSL license (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  11. # project. Rights for redistribution and usage in source and binary
  12. # forms are granted according to the OpenSSL license.
  13. # ====================================================================
  14. #
  15. # Version 1.1
  16. #
  17. # The major reason for undertaken effort was to mitigate the hazard of
  18. # cache-timing attack. This is [currently and initially!] addressed in
  19. # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
  20. # 2. References to them are scheduled for L2 cache latency, meaning
  21. # that the tables don't have to reside in L1 cache. Once again, this
  22. # is an initial draft and one should expect more countermeasures to
  23. # be implemented...
  24. #
  25. # Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
  26. # round.
  27. #
  28. # Even though performance was not the primary goal [on the contrary,
  29. # extra shifts "induced" by compressed S-box and longer loop epilogue
  30. # "induced" by scheduling for L2 have negative effect on performance],
  31. # the code turned out to run in ~23 cycles per processed byte en-/
  32. # decrypted with 128-bit key. This is pretty good result for code
  33. # with mentioned qualities and UltraSPARC core. Compared to Sun C
  34. # generated code my encrypt procedure runs just few percents faster,
  35. # while decrypt one - whole 50% faster [yes, Sun C failed to generate
  36. # optimal decrypt procedure]. Compared to GNU C generated code both
  37. # procedures are more than 60% faster:-)
  38. $output = pop;
  39. open STDOUT,">$output";
  40. $frame="STACK_FRAME";
  41. $bias="STACK_BIAS";
  42. $locals=16;
  43. $acc0="%l0";
  44. $acc1="%o0";
  45. $acc2="%o1";
  46. $acc3="%o2";
  47. $acc4="%l1";
  48. $acc5="%o3";
  49. $acc6="%o4";
  50. $acc7="%o5";
  51. $acc8="%l2";
  52. $acc9="%o7";
  53. $acc10="%g1";
  54. $acc11="%g2";
  55. $acc12="%l3";
  56. $acc13="%g3";
  57. $acc14="%g4";
  58. $acc15="%g5";
  59. $t0="%l4";
  60. $t1="%l5";
  61. $t2="%l6";
  62. $t3="%l7";
  63. $s0="%i0";
  64. $s1="%i1";
  65. $s2="%i2";
  66. $s3="%i3";
  67. $tbl="%i4";
  68. $key="%i5";
  69. $rounds="%i7"; # aliases with return address, which is off-loaded to stack
  70. sub _data_word()
  71. { my $i;
  72. while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
  73. }
  74. $code.=<<___;
  75. #include "sparc_arch.h"
  76. #ifdef __arch64__
  77. .register %g2,#scratch
  78. .register %g3,#scratch
  79. #endif
  80. .section ".text",#alloc,#execinstr
  81. .align 256
  82. AES_Te:
  83. ___
  84. &_data_word(
  85. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  86. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  87. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  88. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  89. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  90. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  91. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  92. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  93. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  94. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  95. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  96. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  97. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  98. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  99. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  100. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  101. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  102. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  103. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  104. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  105. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  106. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  107. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  108. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  109. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  110. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  111. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  112. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  113. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  114. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  115. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  116. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  117. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  118. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  119. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  120. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  121. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  122. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  123. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  124. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  125. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  126. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  127. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  128. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  129. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  130. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  131. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  132. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  133. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  134. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  135. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  136. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  137. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  138. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  139. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  140. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  141. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  142. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  143. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  144. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  145. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  146. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  147. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  148. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  149. $code.=<<___;
  150. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  151. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  152. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  153. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  154. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  155. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  156. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  157. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  158. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  159. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  160. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  161. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  162. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  163. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  164. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  165. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  166. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  167. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  168. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  169. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  170. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  171. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  172. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  173. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  174. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  175. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  176. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  177. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  178. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  179. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  180. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  181. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  182. .type AES_Te,#object
  183. .size AES_Te,(.-AES_Te)
  184. .align 64
  185. .skip 16
  186. _sparcv9_AES_encrypt:
  187. save %sp,-$frame-$locals,%sp
  188. stx %i7,[%sp+$bias+$frame+0] ! off-load return address
  189. ld [$key+240],$rounds
  190. ld [$key+0],$t0
  191. ld [$key+4],$t1 !
  192. ld [$key+8],$t2
  193. srl $rounds,1,$rounds
  194. xor $t0,$s0,$s0
  195. ld [$key+12],$t3
  196. srl $s0,21,$acc0
  197. xor $t1,$s1,$s1
  198. ld [$key+16],$t0
  199. srl $s1,13,$acc1 !
  200. xor $t2,$s2,$s2
  201. ld [$key+20],$t1
  202. xor $t3,$s3,$s3
  203. ld [$key+24],$t2
  204. and $acc0,2040,$acc0
  205. ld [$key+28],$t3
  206. nop
  207. .Lenc_loop:
  208. srl $s2,5,$acc2 !
  209. and $acc1,2040,$acc1
  210. ldx [$tbl+$acc0],$acc0
  211. sll $s3,3,$acc3
  212. and $acc2,2040,$acc2
  213. ldx [$tbl+$acc1],$acc1
  214. srl $s1,21,$acc4
  215. and $acc3,2040,$acc3
  216. ldx [$tbl+$acc2],$acc2 !
  217. srl $s2,13,$acc5
  218. and $acc4,2040,$acc4
  219. ldx [$tbl+$acc3],$acc3
  220. srl $s3,5,$acc6
  221. and $acc5,2040,$acc5
  222. ldx [$tbl+$acc4],$acc4
  223. fmovs %f0,%f0
  224. sll $s0,3,$acc7 !
  225. and $acc6,2040,$acc6
  226. ldx [$tbl+$acc5],$acc5
  227. srl $s2,21,$acc8
  228. and $acc7,2040,$acc7
  229. ldx [$tbl+$acc6],$acc6
  230. srl $s3,13,$acc9
  231. and $acc8,2040,$acc8
  232. ldx [$tbl+$acc7],$acc7 !
  233. srl $s0,5,$acc10
  234. and $acc9,2040,$acc9
  235. ldx [$tbl+$acc8],$acc8
  236. sll $s1,3,$acc11
  237. and $acc10,2040,$acc10
  238. ldx [$tbl+$acc9],$acc9
  239. fmovs %f0,%f0
  240. srl $s3,21,$acc12 !
  241. and $acc11,2040,$acc11
  242. ldx [$tbl+$acc10],$acc10
  243. srl $s0,13,$acc13
  244. and $acc12,2040,$acc12
  245. ldx [$tbl+$acc11],$acc11
  246. srl $s1,5,$acc14
  247. and $acc13,2040,$acc13
  248. ldx [$tbl+$acc12],$acc12 !
  249. sll $s2,3,$acc15
  250. and $acc14,2040,$acc14
  251. ldx [$tbl+$acc13],$acc13
  252. and $acc15,2040,$acc15
  253. add $key,32,$key
  254. ldx [$tbl+$acc14],$acc14
  255. fmovs %f0,%f0
  256. subcc $rounds,1,$rounds !
  257. ldx [$tbl+$acc15],$acc15
  258. bz,a,pn %icc,.Lenc_last
  259. add $tbl,2048,$rounds
  260. srlx $acc1,8,$acc1
  261. xor $acc0,$t0,$t0
  262. ld [$key+0],$s0
  263. fmovs %f0,%f0
  264. srlx $acc2,16,$acc2 !
  265. xor $acc1,$t0,$t0
  266. ld [$key+4],$s1
  267. srlx $acc3,24,$acc3
  268. xor $acc2,$t0,$t0
  269. ld [$key+8],$s2
  270. srlx $acc5,8,$acc5
  271. xor $acc3,$t0,$t0
  272. ld [$key+12],$s3 !
  273. srlx $acc6,16,$acc6
  274. xor $acc4,$t1,$t1
  275. fmovs %f0,%f0
  276. srlx $acc7,24,$acc7
  277. xor $acc5,$t1,$t1
  278. srlx $acc9,8,$acc9
  279. xor $acc6,$t1,$t1
  280. srlx $acc10,16,$acc10 !
  281. xor $acc7,$t1,$t1
  282. srlx $acc11,24,$acc11
  283. xor $acc8,$t2,$t2
  284. srlx $acc13,8,$acc13
  285. xor $acc9,$t2,$t2
  286. srlx $acc14,16,$acc14
  287. xor $acc10,$t2,$t2
  288. srlx $acc15,24,$acc15 !
  289. xor $acc11,$t2,$t2
  290. xor $acc12,$acc14,$acc14
  291. xor $acc13,$t3,$t3
  292. srl $t0,21,$acc0
  293. xor $acc14,$t3,$t3
  294. srl $t1,13,$acc1
  295. xor $acc15,$t3,$t3
  296. and $acc0,2040,$acc0 !
  297. srl $t2,5,$acc2
  298. and $acc1,2040,$acc1
  299. ldx [$tbl+$acc0],$acc0
  300. sll $t3,3,$acc3
  301. and $acc2,2040,$acc2
  302. ldx [$tbl+$acc1],$acc1
  303. fmovs %f0,%f0
  304. srl $t1,21,$acc4 !
  305. and $acc3,2040,$acc3
  306. ldx [$tbl+$acc2],$acc2
  307. srl $t2,13,$acc5
  308. and $acc4,2040,$acc4
  309. ldx [$tbl+$acc3],$acc3
  310. srl $t3,5,$acc6
  311. and $acc5,2040,$acc5
  312. ldx [$tbl+$acc4],$acc4 !
  313. sll $t0,3,$acc7
  314. and $acc6,2040,$acc6
  315. ldx [$tbl+$acc5],$acc5
  316. srl $t2,21,$acc8
  317. and $acc7,2040,$acc7
  318. ldx [$tbl+$acc6],$acc6
  319. fmovs %f0,%f0
  320. srl $t3,13,$acc9 !
  321. and $acc8,2040,$acc8
  322. ldx [$tbl+$acc7],$acc7
  323. srl $t0,5,$acc10
  324. and $acc9,2040,$acc9
  325. ldx [$tbl+$acc8],$acc8
  326. sll $t1,3,$acc11
  327. and $acc10,2040,$acc10
  328. ldx [$tbl+$acc9],$acc9 !
  329. srl $t3,21,$acc12
  330. and $acc11,2040,$acc11
  331. ldx [$tbl+$acc10],$acc10
  332. srl $t0,13,$acc13
  333. and $acc12,2040,$acc12
  334. ldx [$tbl+$acc11],$acc11
  335. fmovs %f0,%f0
  336. srl $t1,5,$acc14 !
  337. and $acc13,2040,$acc13
  338. ldx [$tbl+$acc12],$acc12
  339. sll $t2,3,$acc15
  340. and $acc14,2040,$acc14
  341. ldx [$tbl+$acc13],$acc13
  342. srlx $acc1,8,$acc1
  343. and $acc15,2040,$acc15
  344. ldx [$tbl+$acc14],$acc14 !
  345. srlx $acc2,16,$acc2
  346. xor $acc0,$s0,$s0
  347. ldx [$tbl+$acc15],$acc15
  348. srlx $acc3,24,$acc3
  349. xor $acc1,$s0,$s0
  350. ld [$key+16],$t0
  351. fmovs %f0,%f0
  352. srlx $acc5,8,$acc5 !
  353. xor $acc2,$s0,$s0
  354. ld [$key+20],$t1
  355. srlx $acc6,16,$acc6
  356. xor $acc3,$s0,$s0
  357. ld [$key+24],$t2
  358. srlx $acc7,24,$acc7
  359. xor $acc4,$s1,$s1
  360. ld [$key+28],$t3 !
  361. srlx $acc9,8,$acc9
  362. xor $acc5,$s1,$s1
  363. ldx [$tbl+2048+0],%g0 ! prefetch te4
  364. srlx $acc10,16,$acc10
  365. xor $acc6,$s1,$s1
  366. ldx [$tbl+2048+32],%g0 ! prefetch te4
  367. srlx $acc11,24,$acc11
  368. xor $acc7,$s1,$s1
  369. ldx [$tbl+2048+64],%g0 ! prefetch te4
  370. srlx $acc13,8,$acc13
  371. xor $acc8,$s2,$s2
  372. ldx [$tbl+2048+96],%g0 ! prefetch te4
  373. srlx $acc14,16,$acc14 !
  374. xor $acc9,$s2,$s2
  375. ldx [$tbl+2048+128],%g0 ! prefetch te4
  376. srlx $acc15,24,$acc15
  377. xor $acc10,$s2,$s2
  378. ldx [$tbl+2048+160],%g0 ! prefetch te4
  379. srl $s0,21,$acc0
  380. xor $acc11,$s2,$s2
  381. ldx [$tbl+2048+192],%g0 ! prefetch te4
  382. xor $acc12,$acc14,$acc14
  383. xor $acc13,$s3,$s3
  384. ldx [$tbl+2048+224],%g0 ! prefetch te4
  385. srl $s1,13,$acc1 !
  386. xor $acc14,$s3,$s3
  387. xor $acc15,$s3,$s3
  388. ba .Lenc_loop
  389. and $acc0,2040,$acc0
  390. .align 32
  391. .Lenc_last:
  392. srlx $acc1,8,$acc1 !
  393. xor $acc0,$t0,$t0
  394. ld [$key+0],$s0
  395. srlx $acc2,16,$acc2
  396. xor $acc1,$t0,$t0
  397. ld [$key+4],$s1
  398. srlx $acc3,24,$acc3
  399. xor $acc2,$t0,$t0
  400. ld [$key+8],$s2 !
  401. srlx $acc5,8,$acc5
  402. xor $acc3,$t0,$t0
  403. ld [$key+12],$s3
  404. srlx $acc6,16,$acc6
  405. xor $acc4,$t1,$t1
  406. srlx $acc7,24,$acc7
  407. xor $acc5,$t1,$t1
  408. srlx $acc9,8,$acc9 !
  409. xor $acc6,$t1,$t1
  410. srlx $acc10,16,$acc10
  411. xor $acc7,$t1,$t1
  412. srlx $acc11,24,$acc11
  413. xor $acc8,$t2,$t2
  414. srlx $acc13,8,$acc13
  415. xor $acc9,$t2,$t2
  416. srlx $acc14,16,$acc14 !
  417. xor $acc10,$t2,$t2
  418. srlx $acc15,24,$acc15
  419. xor $acc11,$t2,$t2
  420. xor $acc12,$acc14,$acc14
  421. xor $acc13,$t3,$t3
  422. srl $t0,24,$acc0
  423. xor $acc14,$t3,$t3
  424. srl $t1,16,$acc1 !
  425. xor $acc15,$t3,$t3
  426. srl $t2,8,$acc2
  427. and $acc1,255,$acc1
  428. ldub [$rounds+$acc0],$acc0
  429. srl $t1,24,$acc4
  430. and $acc2,255,$acc2
  431. ldub [$rounds+$acc1],$acc1
  432. srl $t2,16,$acc5 !
  433. and $t3,255,$acc3
  434. ldub [$rounds+$acc2],$acc2
  435. ldub [$rounds+$acc3],$acc3
  436. srl $t3,8,$acc6
  437. and $acc5,255,$acc5
  438. ldub [$rounds+$acc4],$acc4
  439. fmovs %f0,%f0
  440. srl $t2,24,$acc8 !
  441. and $acc6,255,$acc6
  442. ldub [$rounds+$acc5],$acc5
  443. srl $t3,16,$acc9
  444. and $t0,255,$acc7
  445. ldub [$rounds+$acc6],$acc6
  446. ldub [$rounds+$acc7],$acc7
  447. fmovs %f0,%f0
  448. srl $t0,8,$acc10 !
  449. and $acc9,255,$acc9
  450. ldub [$rounds+$acc8],$acc8
  451. srl $t3,24,$acc12
  452. and $acc10,255,$acc10
  453. ldub [$rounds+$acc9],$acc9
  454. srl $t0,16,$acc13
  455. and $t1,255,$acc11
  456. ldub [$rounds+$acc10],$acc10 !
  457. srl $t1,8,$acc14
  458. and $acc13,255,$acc13
  459. ldub [$rounds+$acc11],$acc11
  460. ldub [$rounds+$acc12],$acc12
  461. and $acc14,255,$acc14
  462. ldub [$rounds+$acc13],$acc13
  463. and $t2,255,$acc15
  464. ldub [$rounds+$acc14],$acc14 !
  465. sll $acc0,24,$acc0
  466. xor $acc3,$s0,$s0
  467. ldub [$rounds+$acc15],$acc15
  468. sll $acc1,16,$acc1
  469. xor $acc0,$s0,$s0
  470. ldx [%sp+$bias+$frame+0],%i7 ! restore return address
  471. fmovs %f0,%f0
  472. sll $acc2,8,$acc2 !
  473. xor $acc1,$s0,$s0
  474. sll $acc4,24,$acc4
  475. xor $acc2,$s0,$s0
  476. sll $acc5,16,$acc5
  477. xor $acc7,$s1,$s1
  478. sll $acc6,8,$acc6
  479. xor $acc4,$s1,$s1
  480. sll $acc8,24,$acc8 !
  481. xor $acc5,$s1,$s1
  482. sll $acc9,16,$acc9
  483. xor $acc11,$s2,$s2
  484. sll $acc10,8,$acc10
  485. xor $acc6,$s1,$s1
  486. sll $acc12,24,$acc12
  487. xor $acc8,$s2,$s2
  488. sll $acc13,16,$acc13 !
  489. xor $acc9,$s2,$s2
  490. sll $acc14,8,$acc14
  491. xor $acc10,$s2,$s2
  492. xor $acc12,$acc14,$acc14
  493. xor $acc13,$s3,$s3
  494. xor $acc14,$s3,$s3
  495. xor $acc15,$s3,$s3
  496. ret
  497. restore
  498. .type _sparcv9_AES_encrypt,#function
  499. .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
  500. .align 32
  501. .globl AES_encrypt
  502. AES_encrypt:
  503. or %o0,%o1,%g1
  504. andcc %g1,3,%g0
  505. bnz,pn %xcc,.Lunaligned_enc
  506. save %sp,-$frame,%sp
  507. ld [%i0+0],%o0
  508. ld [%i0+4],%o1
  509. ld [%i0+8],%o2
  510. ld [%i0+12],%o3
  511. 1: call .+8
  512. add %o7,AES_Te-1b,%o4
  513. call _sparcv9_AES_encrypt
  514. mov %i2,%o5
  515. st %o0,[%i1+0]
  516. st %o1,[%i1+4]
  517. st %o2,[%i1+8]
  518. st %o3,[%i1+12]
  519. ret
  520. restore
  521. .align 32
  522. .Lunaligned_enc:
  523. ldub [%i0+0],%l0
  524. ldub [%i0+1],%l1
  525. ldub [%i0+2],%l2
  526. sll %l0,24,%l0
  527. ldub [%i0+3],%l3
  528. sll %l1,16,%l1
  529. ldub [%i0+4],%l4
  530. sll %l2,8,%l2
  531. or %l1,%l0,%l0
  532. ldub [%i0+5],%l5
  533. sll %l4,24,%l4
  534. or %l3,%l2,%l2
  535. ldub [%i0+6],%l6
  536. sll %l5,16,%l5
  537. or %l0,%l2,%o0
  538. ldub [%i0+7],%l7
  539. sll %l6,8,%l6
  540. or %l5,%l4,%l4
  541. ldub [%i0+8],%l0
  542. or %l7,%l6,%l6
  543. ldub [%i0+9],%l1
  544. or %l4,%l6,%o1
  545. ldub [%i0+10],%l2
  546. sll %l0,24,%l0
  547. ldub [%i0+11],%l3
  548. sll %l1,16,%l1
  549. ldub [%i0+12],%l4
  550. sll %l2,8,%l2
  551. or %l1,%l0,%l0
  552. ldub [%i0+13],%l5
  553. sll %l4,24,%l4
  554. or %l3,%l2,%l2
  555. ldub [%i0+14],%l6
  556. sll %l5,16,%l5
  557. or %l0,%l2,%o2
  558. ldub [%i0+15],%l7
  559. sll %l6,8,%l6
  560. or %l5,%l4,%l4
  561. or %l7,%l6,%l6
  562. or %l4,%l6,%o3
  563. 1: call .+8
  564. add %o7,AES_Te-1b,%o4
  565. call _sparcv9_AES_encrypt
  566. mov %i2,%o5
  567. srl %o0,24,%l0
  568. srl %o0,16,%l1
  569. stb %l0,[%i1+0]
  570. srl %o0,8,%l2
  571. stb %l1,[%i1+1]
  572. stb %l2,[%i1+2]
  573. srl %o1,24,%l4
  574. stb %o0,[%i1+3]
  575. srl %o1,16,%l5
  576. stb %l4,[%i1+4]
  577. srl %o1,8,%l6
  578. stb %l5,[%i1+5]
  579. stb %l6,[%i1+6]
  580. srl %o2,24,%l0
  581. stb %o1,[%i1+7]
  582. srl %o2,16,%l1
  583. stb %l0,[%i1+8]
  584. srl %o2,8,%l2
  585. stb %l1,[%i1+9]
  586. stb %l2,[%i1+10]
  587. srl %o3,24,%l4
  588. stb %o2,[%i1+11]
  589. srl %o3,16,%l5
  590. stb %l4,[%i1+12]
  591. srl %o3,8,%l6
  592. stb %l5,[%i1+13]
  593. stb %l6,[%i1+14]
  594. stb %o3,[%i1+15]
  595. ret
  596. restore
  597. .type AES_encrypt,#function
  598. .size AES_encrypt,(.-AES_encrypt)
  599. ___
  600. $code.=<<___;
  601. .align 256
  602. AES_Td:
  603. ___
  604. &_data_word(
  605. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  606. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  607. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  608. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  609. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  610. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  611. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  612. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  613. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  614. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  615. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  616. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  617. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  618. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  619. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  620. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  621. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  622. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  623. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  624. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  625. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  626. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  627. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  628. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  629. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  630. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  631. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  632. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  633. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  634. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  635. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  636. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  637. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  638. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  639. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  640. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  641. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  642. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  643. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  644. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  645. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  646. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  647. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  648. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  649. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  650. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  651. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  652. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  653. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  654. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  655. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  656. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  657. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  658. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  659. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  660. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  661. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  662. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  663. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  664. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  665. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  666. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  667. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  668. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  669. $code.=<<___;
  670. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  671. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  672. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  673. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  674. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  675. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  676. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  677. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  678. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  679. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  680. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  681. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  682. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  683. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  684. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  685. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  686. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  687. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  688. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  689. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  690. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  691. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  692. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  693. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  694. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  695. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  696. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  697. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  698. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  699. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  700. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  701. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  702. .type AES_Td,#object
  703. .size AES_Td,(.-AES_Td)
  704. .align 64
  705. .skip 16
  706. _sparcv9_AES_decrypt:
  707. save %sp,-$frame-$locals,%sp
  708. stx %i7,[%sp+$bias+$frame+0] ! off-load return address
  709. ld [$key+240],$rounds
  710. ld [$key+0],$t0
  711. ld [$key+4],$t1 !
  712. ld [$key+8],$t2
  713. ld [$key+12],$t3
  714. srl $rounds,1,$rounds
  715. xor $t0,$s0,$s0
  716. ld [$key+16],$t0
  717. xor $t1,$s1,$s1
  718. ld [$key+20],$t1
  719. srl $s0,21,$acc0 !
  720. xor $t2,$s2,$s2
  721. ld [$key+24],$t2
  722. xor $t3,$s3,$s3
  723. and $acc0,2040,$acc0
  724. ld [$key+28],$t3
  725. srl $s3,13,$acc1
  726. nop
  727. .Ldec_loop:
  728. srl $s2,5,$acc2 !
  729. and $acc1,2040,$acc1
  730. ldx [$tbl+$acc0],$acc0
  731. sll $s1,3,$acc3
  732. and $acc2,2040,$acc2
  733. ldx [$tbl+$acc1],$acc1
  734. srl $s1,21,$acc4
  735. and $acc3,2040,$acc3
  736. ldx [$tbl+$acc2],$acc2 !
  737. srl $s0,13,$acc5
  738. and $acc4,2040,$acc4
  739. ldx [$tbl+$acc3],$acc3
  740. srl $s3,5,$acc6
  741. and $acc5,2040,$acc5
  742. ldx [$tbl+$acc4],$acc4
  743. fmovs %f0,%f0
  744. sll $s2,3,$acc7 !
  745. and $acc6,2040,$acc6
  746. ldx [$tbl+$acc5],$acc5
  747. srl $s2,21,$acc8
  748. and $acc7,2040,$acc7
  749. ldx [$tbl+$acc6],$acc6
  750. srl $s1,13,$acc9
  751. and $acc8,2040,$acc8
  752. ldx [$tbl+$acc7],$acc7 !
  753. srl $s0,5,$acc10
  754. and $acc9,2040,$acc9
  755. ldx [$tbl+$acc8],$acc8
  756. sll $s3,3,$acc11
  757. and $acc10,2040,$acc10
  758. ldx [$tbl+$acc9],$acc9
  759. fmovs %f0,%f0
  760. srl $s3,21,$acc12 !
  761. and $acc11,2040,$acc11
  762. ldx [$tbl+$acc10],$acc10
  763. srl $s2,13,$acc13
  764. and $acc12,2040,$acc12
  765. ldx [$tbl+$acc11],$acc11
  766. srl $s1,5,$acc14
  767. and $acc13,2040,$acc13
  768. ldx [$tbl+$acc12],$acc12 !
  769. sll $s0,3,$acc15
  770. and $acc14,2040,$acc14
  771. ldx [$tbl+$acc13],$acc13
  772. and $acc15,2040,$acc15
  773. add $key,32,$key
  774. ldx [$tbl+$acc14],$acc14
  775. fmovs %f0,%f0
  776. subcc $rounds,1,$rounds !
  777. ldx [$tbl+$acc15],$acc15
  778. bz,a,pn %icc,.Ldec_last
  779. add $tbl,2048,$rounds
  780. srlx $acc1,8,$acc1
  781. xor $acc0,$t0,$t0
  782. ld [$key+0],$s0
  783. fmovs %f0,%f0
  784. srlx $acc2,16,$acc2 !
  785. xor $acc1,$t0,$t0
  786. ld [$key+4],$s1
  787. srlx $acc3,24,$acc3
  788. xor $acc2,$t0,$t0
  789. ld [$key+8],$s2
  790. srlx $acc5,8,$acc5
  791. xor $acc3,$t0,$t0
  792. ld [$key+12],$s3 !
  793. srlx $acc6,16,$acc6
  794. xor $acc4,$t1,$t1
  795. fmovs %f0,%f0
  796. srlx $acc7,24,$acc7
  797. xor $acc5,$t1,$t1
  798. srlx $acc9,8,$acc9
  799. xor $acc6,$t1,$t1
  800. srlx $acc10,16,$acc10 !
  801. xor $acc7,$t1,$t1
  802. srlx $acc11,24,$acc11
  803. xor $acc8,$t2,$t2
  804. srlx $acc13,8,$acc13
  805. xor $acc9,$t2,$t2
  806. srlx $acc14,16,$acc14
  807. xor $acc10,$t2,$t2
  808. srlx $acc15,24,$acc15 !
  809. xor $acc11,$t2,$t2
  810. xor $acc12,$acc14,$acc14
  811. xor $acc13,$t3,$t3
  812. srl $t0,21,$acc0
  813. xor $acc14,$t3,$t3
  814. xor $acc15,$t3,$t3
  815. srl $t3,13,$acc1
  816. and $acc0,2040,$acc0 !
  817. srl $t2,5,$acc2
  818. and $acc1,2040,$acc1
  819. ldx [$tbl+$acc0],$acc0
  820. sll $t1,3,$acc3
  821. and $acc2,2040,$acc2
  822. ldx [$tbl+$acc1],$acc1
  823. fmovs %f0,%f0
  824. srl $t1,21,$acc4 !
  825. and $acc3,2040,$acc3
  826. ldx [$tbl+$acc2],$acc2
  827. srl $t0,13,$acc5
  828. and $acc4,2040,$acc4
  829. ldx [$tbl+$acc3],$acc3
  830. srl $t3,5,$acc6
  831. and $acc5,2040,$acc5
  832. ldx [$tbl+$acc4],$acc4 !
  833. sll $t2,3,$acc7
  834. and $acc6,2040,$acc6
  835. ldx [$tbl+$acc5],$acc5
  836. srl $t2,21,$acc8
  837. and $acc7,2040,$acc7
  838. ldx [$tbl+$acc6],$acc6
  839. fmovs %f0,%f0
  840. srl $t1,13,$acc9 !
  841. and $acc8,2040,$acc8
  842. ldx [$tbl+$acc7],$acc7
  843. srl $t0,5,$acc10
  844. and $acc9,2040,$acc9
  845. ldx [$tbl+$acc8],$acc8
  846. sll $t3,3,$acc11
  847. and $acc10,2040,$acc10
  848. ldx [$tbl+$acc9],$acc9 !
  849. srl $t3,21,$acc12
  850. and $acc11,2040,$acc11
  851. ldx [$tbl+$acc10],$acc10
  852. srl $t2,13,$acc13
  853. and $acc12,2040,$acc12
  854. ldx [$tbl+$acc11],$acc11
  855. fmovs %f0,%f0
  856. srl $t1,5,$acc14 !
  857. and $acc13,2040,$acc13
  858. ldx [$tbl+$acc12],$acc12
  859. sll $t0,3,$acc15
  860. and $acc14,2040,$acc14
  861. ldx [$tbl+$acc13],$acc13
  862. srlx $acc1,8,$acc1
  863. and $acc15,2040,$acc15
  864. ldx [$tbl+$acc14],$acc14 !
  865. srlx $acc2,16,$acc2
  866. xor $acc0,$s0,$s0
  867. ldx [$tbl+$acc15],$acc15
  868. srlx $acc3,24,$acc3
  869. xor $acc1,$s0,$s0
  870. ld [$key+16],$t0
  871. fmovs %f0,%f0
  872. srlx $acc5,8,$acc5 !
  873. xor $acc2,$s0,$s0
  874. ld [$key+20],$t1
  875. srlx $acc6,16,$acc6
  876. xor $acc3,$s0,$s0
  877. ld [$key+24],$t2
  878. srlx $acc7,24,$acc7
  879. xor $acc4,$s1,$s1
  880. ld [$key+28],$t3 !
  881. srlx $acc9,8,$acc9
  882. xor $acc5,$s1,$s1
  883. ldx [$tbl+2048+0],%g0 ! prefetch td4
  884. srlx $acc10,16,$acc10
  885. xor $acc6,$s1,$s1
  886. ldx [$tbl+2048+32],%g0 ! prefetch td4
  887. srlx $acc11,24,$acc11
  888. xor $acc7,$s1,$s1
  889. ldx [$tbl+2048+64],%g0 ! prefetch td4
  890. srlx $acc13,8,$acc13
  891. xor $acc8,$s2,$s2
  892. ldx [$tbl+2048+96],%g0 ! prefetch td4
  893. srlx $acc14,16,$acc14 !
  894. xor $acc9,$s2,$s2
  895. ldx [$tbl+2048+128],%g0 ! prefetch td4
  896. srlx $acc15,24,$acc15
  897. xor $acc10,$s2,$s2
  898. ldx [$tbl+2048+160],%g0 ! prefetch td4
  899. srl $s0,21,$acc0
  900. xor $acc11,$s2,$s2
  901. ldx [$tbl+2048+192],%g0 ! prefetch td4
  902. xor $acc12,$acc14,$acc14
  903. xor $acc13,$s3,$s3
  904. ldx [$tbl+2048+224],%g0 ! prefetch td4
  905. and $acc0,2040,$acc0 !
  906. xor $acc14,$s3,$s3
  907. xor $acc15,$s3,$s3
  908. ba .Ldec_loop
  909. srl $s3,13,$acc1
  910. .align 32
  911. .Ldec_last:
  912. srlx $acc1,8,$acc1 !
  913. xor $acc0,$t0,$t0
  914. ld [$key+0],$s0
  915. srlx $acc2,16,$acc2
  916. xor $acc1,$t0,$t0
  917. ld [$key+4],$s1
  918. srlx $acc3,24,$acc3
  919. xor $acc2,$t0,$t0
  920. ld [$key+8],$s2 !
  921. srlx $acc5,8,$acc5
  922. xor $acc3,$t0,$t0
  923. ld [$key+12],$s3
  924. srlx $acc6,16,$acc6
  925. xor $acc4,$t1,$t1
  926. srlx $acc7,24,$acc7
  927. xor $acc5,$t1,$t1
  928. srlx $acc9,8,$acc9 !
  929. xor $acc6,$t1,$t1
  930. srlx $acc10,16,$acc10
  931. xor $acc7,$t1,$t1
  932. srlx $acc11,24,$acc11
  933. xor $acc8,$t2,$t2
  934. srlx $acc13,8,$acc13
  935. xor $acc9,$t2,$t2
  936. srlx $acc14,16,$acc14 !
  937. xor $acc10,$t2,$t2
  938. srlx $acc15,24,$acc15
  939. xor $acc11,$t2,$t2
  940. xor $acc12,$acc14,$acc14
  941. xor $acc13,$t3,$t3
  942. srl $t0,24,$acc0
  943. xor $acc14,$t3,$t3
  944. xor $acc15,$t3,$t3 !
  945. srl $t3,16,$acc1
  946. srl $t2,8,$acc2
  947. and $acc1,255,$acc1
  948. ldub [$rounds+$acc0],$acc0
  949. srl $t1,24,$acc4
  950. and $acc2,255,$acc2
  951. ldub [$rounds+$acc1],$acc1
  952. srl $t0,16,$acc5 !
  953. and $t1,255,$acc3
  954. ldub [$rounds+$acc2],$acc2
  955. ldub [$rounds+$acc3],$acc3
  956. srl $t3,8,$acc6
  957. and $acc5,255,$acc5
  958. ldub [$rounds+$acc4],$acc4
  959. fmovs %f0,%f0
  960. srl $t2,24,$acc8 !
  961. and $acc6,255,$acc6
  962. ldub [$rounds+$acc5],$acc5
  963. srl $t1,16,$acc9
  964. and $t2,255,$acc7
  965. ldub [$rounds+$acc6],$acc6
  966. ldub [$rounds+$acc7],$acc7
  967. fmovs %f0,%f0
  968. srl $t0,8,$acc10 !
  969. and $acc9,255,$acc9
  970. ldub [$rounds+$acc8],$acc8
  971. srl $t3,24,$acc12
  972. and $acc10,255,$acc10
  973. ldub [$rounds+$acc9],$acc9
  974. srl $t2,16,$acc13
  975. and $t3,255,$acc11
  976. ldub [$rounds+$acc10],$acc10 !
  977. srl $t1,8,$acc14
  978. and $acc13,255,$acc13
  979. ldub [$rounds+$acc11],$acc11
  980. ldub [$rounds+$acc12],$acc12
  981. and $acc14,255,$acc14
  982. ldub [$rounds+$acc13],$acc13
  983. and $t0,255,$acc15
  984. ldub [$rounds+$acc14],$acc14 !
  985. sll $acc0,24,$acc0
  986. xor $acc3,$s0,$s0
  987. ldub [$rounds+$acc15],$acc15
  988. sll $acc1,16,$acc1
  989. xor $acc0,$s0,$s0
  990. ldx [%sp+$bias+$frame+0],%i7 ! restore return address
  991. fmovs %f0,%f0
  992. sll $acc2,8,$acc2 !
  993. xor $acc1,$s0,$s0
  994. sll $acc4,24,$acc4
  995. xor $acc2,$s0,$s0
  996. sll $acc5,16,$acc5
  997. xor $acc7,$s1,$s1
  998. sll $acc6,8,$acc6
  999. xor $acc4,$s1,$s1
  1000. sll $acc8,24,$acc8 !
  1001. xor $acc5,$s1,$s1
  1002. sll $acc9,16,$acc9
  1003. xor $acc11,$s2,$s2
  1004. sll $acc10,8,$acc10
  1005. xor $acc6,$s1,$s1
  1006. sll $acc12,24,$acc12
  1007. xor $acc8,$s2,$s2
  1008. sll $acc13,16,$acc13 !
  1009. xor $acc9,$s2,$s2
  1010. sll $acc14,8,$acc14
  1011. xor $acc10,$s2,$s2
  1012. xor $acc12,$acc14,$acc14
  1013. xor $acc13,$s3,$s3
  1014. xor $acc14,$s3,$s3
  1015. xor $acc15,$s3,$s3
  1016. ret
  1017. restore
  1018. .type _sparcv9_AES_decrypt,#function
  1019. .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
  1020. .align 32
  1021. .globl AES_decrypt
  1022. AES_decrypt:
  1023. or %o0,%o1,%g1
  1024. andcc %g1,3,%g0
  1025. bnz,pn %xcc,.Lunaligned_dec
  1026. save %sp,-$frame,%sp
  1027. ld [%i0+0],%o0
  1028. ld [%i0+4],%o1
  1029. ld [%i0+8],%o2
  1030. ld [%i0+12],%o3
  1031. 1: call .+8
  1032. add %o7,AES_Td-1b,%o4
  1033. call _sparcv9_AES_decrypt
  1034. mov %i2,%o5
  1035. st %o0,[%i1+0]
  1036. st %o1,[%i1+4]
  1037. st %o2,[%i1+8]
  1038. st %o3,[%i1+12]
  1039. ret
  1040. restore
  1041. .align 32
  1042. .Lunaligned_dec:
  1043. ldub [%i0+0],%l0
  1044. ldub [%i0+1],%l1
  1045. ldub [%i0+2],%l2
  1046. sll %l0,24,%l0
  1047. ldub [%i0+3],%l3
  1048. sll %l1,16,%l1
  1049. ldub [%i0+4],%l4
  1050. sll %l2,8,%l2
  1051. or %l1,%l0,%l0
  1052. ldub [%i0+5],%l5
  1053. sll %l4,24,%l4
  1054. or %l3,%l2,%l2
  1055. ldub [%i0+6],%l6
  1056. sll %l5,16,%l5
  1057. or %l0,%l2,%o0
  1058. ldub [%i0+7],%l7
  1059. sll %l6,8,%l6
  1060. or %l5,%l4,%l4
  1061. ldub [%i0+8],%l0
  1062. or %l7,%l6,%l6
  1063. ldub [%i0+9],%l1
  1064. or %l4,%l6,%o1
  1065. ldub [%i0+10],%l2
  1066. sll %l0,24,%l0
  1067. ldub [%i0+11],%l3
  1068. sll %l1,16,%l1
  1069. ldub [%i0+12],%l4
  1070. sll %l2,8,%l2
  1071. or %l1,%l0,%l0
  1072. ldub [%i0+13],%l5
  1073. sll %l4,24,%l4
  1074. or %l3,%l2,%l2
  1075. ldub [%i0+14],%l6
  1076. sll %l5,16,%l5
  1077. or %l0,%l2,%o2
  1078. ldub [%i0+15],%l7
  1079. sll %l6,8,%l6
  1080. or %l5,%l4,%l4
  1081. or %l7,%l6,%l6
  1082. or %l4,%l6,%o3
  1083. 1: call .+8
  1084. add %o7,AES_Td-1b,%o4
  1085. call _sparcv9_AES_decrypt
  1086. mov %i2,%o5
  1087. srl %o0,24,%l0
  1088. srl %o0,16,%l1
  1089. stb %l0,[%i1+0]
  1090. srl %o0,8,%l2
  1091. stb %l1,[%i1+1]
  1092. stb %l2,[%i1+2]
  1093. srl %o1,24,%l4
  1094. stb %o0,[%i1+3]
  1095. srl %o1,16,%l5
  1096. stb %l4,[%i1+4]
  1097. srl %o1,8,%l6
  1098. stb %l5,[%i1+5]
  1099. stb %l6,[%i1+6]
  1100. srl %o2,24,%l0
  1101. stb %o1,[%i1+7]
  1102. srl %o2,16,%l1
  1103. stb %l0,[%i1+8]
  1104. srl %o2,8,%l2
  1105. stb %l1,[%i1+9]
  1106. stb %l2,[%i1+10]
  1107. srl %o3,24,%l4
  1108. stb %o2,[%i1+11]
  1109. srl %o3,16,%l5
  1110. stb %l4,[%i1+12]
  1111. srl %o3,8,%l6
  1112. stb %l5,[%i1+13]
  1113. stb %l6,[%i1+14]
  1114. stb %o3,[%i1+15]
  1115. ret
  1116. restore
  1117. .type AES_decrypt,#function
  1118. .size AES_decrypt,(.-AES_decrypt)
  1119. ___
  1120. # fmovs instructions substituting for FP nops were originally added
  1121. # to meet specific instruction alignment requirements to maximize ILP.
  1122. # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
  1123. # undesired effect, so just omit them and sacrifice some portion of
  1124. # percent in performance...
  1125. $code =~ s/fmovs.*$//gm;
  1126. print $code;
  1127. close STDOUT; # ensure flush