aes-sparcv9.pl 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162
  1. #!/usr/bin/env perl
  2. #
  3. # ====================================================================
  4. # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  5. # project. Rights for redistribution and usage in source and binary
  6. # forms are granted according to the OpenSSL license.
  7. # ====================================================================
  8. #
  9. # Version 1.0
  10. #
  11. # The major reason for undertaken effort was to mitigate the hazard of
  12. # cache-timing attack. This is [currently and initially!] addressed in
  13. # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
  14. # 2. References to them are scheduled for L2 cache latency, meaning
  15. # that the tables don't have to reside in L1 cache. Once again, this
  16. # is an initial draft and one should expect more countermeasures to
  17. # be implemented...
  18. #
  19. # Even though performance was not the primary goal [on the contrary,
  20. # extra shifts "induced" by compressed S-box and longer loop epilogue
  21. # "induced" by scheduling for L2 have negative effect on performance],
  22. # the code turned out to run in ~23 cycles per processed byte en-/
  23. # decrypted with 128-bit key. This is pretty good result for code
  24. # with mentioned qualities and UltraSPARC core. Compared to Sun C
  25. # generated code my encrypt procedure runs just few percents faster,
  26. # while decrypt one - whole 50% faster [yes, Sun C failed to generate
  27. # optimal decrypt procedure]. Compared to GNU C generated code both
  28. # procedures are more than 60% faster:-)
  29. $bits=32;
  30. for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
  31. if ($bits==64) { $bias=2047; $frame=192; }
  32. else { $bias=0; $frame=112; }
  33. $locals=16;
  34. $acc0="%l0";
  35. $acc1="%o0";
  36. $acc2="%o1";
  37. $acc3="%o2";
  38. $acc4="%l1";
  39. $acc5="%o3";
  40. $acc6="%o4";
  41. $acc7="%o5";
  42. $acc8="%l2";
  43. $acc9="%o7";
  44. $acc10="%g1";
  45. $acc11="%g2";
  46. $acc12="%l3";
  47. $acc13="%g3";
  48. $acc14="%g4";
  49. $acc15="%g5";
  50. $t0="%l4";
  51. $t1="%l5";
  52. $t2="%l6";
  53. $t3="%l7";
  54. $s0="%i0";
  55. $s1="%i1";
  56. $s2="%i2";
  57. $s3="%i3";
  58. $tbl="%i4";
  59. $key="%i5";
  60. $rounds="%i7"; # aliases with return address, which is off-loaded to stack
  61. sub _data_word()
  62. { my $i;
  63. while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
  64. }
  65. $code.=<<___ if ($bits==64);
  66. .register %g2,#scratch
  67. .register %g3,#scratch
  68. ___
  69. $code.=<<___;
  70. .section ".text",#alloc,#execinstr
  71. .align 64
  72. AES_Te:
  73. ___
  74. &_data_word(
  75. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  76. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  77. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  78. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  79. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  80. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  81. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  82. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  83. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  84. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  85. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  86. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  87. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  88. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  89. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  90. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  91. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  92. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  93. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  94. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  95. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  96. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  97. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  98. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  99. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  100. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  101. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  102. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  103. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  104. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  105. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  106. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  107. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  108. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  109. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  110. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  111. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  112. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  113. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  114. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  115. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  116. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  117. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  118. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  119. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  120. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  121. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  122. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  123. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  124. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  125. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  126. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  127. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  128. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  129. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  130. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  131. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  132. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  133. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  134. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  135. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  136. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  137. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  138. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  139. $code.=<<___;
  140. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  141. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  142. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  143. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  144. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  145. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  146. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  147. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  148. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  149. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  150. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  151. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  152. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  153. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  154. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  155. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  156. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  157. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  158. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  159. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  160. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  161. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  162. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  163. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  164. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  165. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  166. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  167. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  168. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  169. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  170. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  171. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  172. .type AES_Te,#object
  173. .size AES_Te,(.-AES_Te)
  174. .align 64
  175. .skip 16
  176. _sparcv9_AES_encrypt:
  177. save %sp,-$frame-$locals,%sp
  178. stx %i7,[%sp+$bias+$frame+0] ! off-load return address
  179. ld [$key+240],$rounds
  180. ld [$key+0],$t0
  181. ld [$key+4],$t1 !
  182. ld [$key+8],$t2
  183. srl $rounds,1,$rounds
  184. xor $t0,$s0,$s0
  185. ld [$key+12],$t3
  186. srl $s0,21,$acc0
  187. xor $t1,$s1,$s1
  188. ld [$key+16],$t0
  189. srl $s1,13,$acc1 !
  190. xor $t2,$s2,$s2
  191. ld [$key+20],$t1
  192. xor $t3,$s3,$s3
  193. ld [$key+24],$t2
  194. and $acc0,2040,$acc0
  195. ld [$key+28],$t3
  196. nop
  197. .Lenc_loop:
  198. srl $s2,5,$acc2 !
  199. and $acc1,2040,$acc1
  200. ldx [$tbl+$acc0],$acc0
  201. sll $s3,3,$acc3
  202. and $acc2,2040,$acc2
  203. ldx [$tbl+$acc1],$acc1
  204. srl $s1,21,$acc4
  205. and $acc3,2040,$acc3
  206. ldx [$tbl+$acc2],$acc2 !
  207. srl $s2,13,$acc5
  208. and $acc4,2040,$acc4
  209. ldx [$tbl+$acc3],$acc3
  210. srl $s3,5,$acc6
  211. and $acc5,2040,$acc5
  212. ldx [$tbl+$acc4],$acc4
  213. fmovs %f0,%f0
  214. sll $s0,3,$acc7 !
  215. and $acc6,2040,$acc6
  216. ldx [$tbl+$acc5],$acc5
  217. srl $s2,21,$acc8
  218. and $acc7,2040,$acc7
  219. ldx [$tbl+$acc6],$acc6
  220. srl $s3,13,$acc9
  221. and $acc8,2040,$acc8
  222. ldx [$tbl+$acc7],$acc7 !
  223. srl $s0,5,$acc10
  224. and $acc9,2040,$acc9
  225. ldx [$tbl+$acc8],$acc8
  226. sll $s1,3,$acc11
  227. and $acc10,2040,$acc10
  228. ldx [$tbl+$acc9],$acc9
  229. fmovs %f0,%f0
  230. srl $s3,21,$acc12 !
  231. and $acc11,2040,$acc11
  232. ldx [$tbl+$acc10],$acc10
  233. srl $s0,13,$acc13
  234. and $acc12,2040,$acc12
  235. ldx [$tbl+$acc11],$acc11
  236. srl $s1,5,$acc14
  237. and $acc13,2040,$acc13
  238. ldx [$tbl+$acc12],$acc12 !
  239. sll $s2,3,$acc15
  240. and $acc14,2040,$acc14
  241. ldx [$tbl+$acc13],$acc13
  242. and $acc15,2040,$acc15
  243. add $key,32,$key
  244. ldx [$tbl+$acc14],$acc14
  245. fmovs %f0,%f0
  246. subcc $rounds,1,$rounds !
  247. ldx [$tbl+$acc15],$acc15
  248. bz,a,pn %icc,.Lenc_last
  249. add $tbl,2048,$rounds
  250. srlx $acc1,8,$acc1
  251. xor $acc0,$t0,$t0
  252. ld [$key+0],$s0
  253. fmovs %f0,%f0
  254. srlx $acc2,16,$acc2 !
  255. xor $acc1,$t0,$t0
  256. ld [$key+4],$s1
  257. srlx $acc3,24,$acc3
  258. xor $acc2,$t0,$t0
  259. ld [$key+8],$s2
  260. srlx $acc5,8,$acc5
  261. xor $acc3,$t0,$t0
  262. ld [$key+12],$s3 !
  263. srlx $acc6,16,$acc6
  264. xor $acc4,$t1,$t1
  265. fmovs %f0,%f0
  266. srlx $acc7,24,$acc7
  267. xor $acc5,$t1,$t1
  268. srlx $acc9,8,$acc9
  269. xor $acc6,$t1,$t1
  270. srlx $acc10,16,$acc10 !
  271. xor $acc7,$t1,$t1
  272. srlx $acc11,24,$acc11
  273. xor $acc8,$t2,$t2
  274. srlx $acc13,8,$acc13
  275. xor $acc9,$t2,$t2
  276. srlx $acc14,16,$acc14
  277. xor $acc10,$t2,$t2
  278. srlx $acc15,24,$acc15 !
  279. xor $acc11,$t2,$t2
  280. xor $acc12,$acc14,$acc14
  281. xor $acc13,$t3,$t3
  282. srl $t0,21,$acc0
  283. xor $acc14,$t3,$t3
  284. srl $t1,13,$acc1
  285. xor $acc15,$t3,$t3
  286. and $acc0,2040,$acc0 !
  287. srl $t2,5,$acc2
  288. and $acc1,2040,$acc1
  289. ldx [$tbl+$acc0],$acc0
  290. sll $t3,3,$acc3
  291. and $acc2,2040,$acc2
  292. ldx [$tbl+$acc1],$acc1
  293. fmovs %f0,%f0
  294. srl $t1,21,$acc4 !
  295. and $acc3,2040,$acc3
  296. ldx [$tbl+$acc2],$acc2
  297. srl $t2,13,$acc5
  298. and $acc4,2040,$acc4
  299. ldx [$tbl+$acc3],$acc3
  300. srl $t3,5,$acc6
  301. and $acc5,2040,$acc5
  302. ldx [$tbl+$acc4],$acc4 !
  303. sll $t0,3,$acc7
  304. and $acc6,2040,$acc6
  305. ldx [$tbl+$acc5],$acc5
  306. srl $t2,21,$acc8
  307. and $acc7,2040,$acc7
  308. ldx [$tbl+$acc6],$acc6
  309. fmovs %f0,%f0
  310. srl $t3,13,$acc9 !
  311. and $acc8,2040,$acc8
  312. ldx [$tbl+$acc7],$acc7
  313. srl $t0,5,$acc10
  314. and $acc9,2040,$acc9
  315. ldx [$tbl+$acc8],$acc8
  316. sll $t1,3,$acc11
  317. and $acc10,2040,$acc10
  318. ldx [$tbl+$acc9],$acc9 !
  319. srl $t3,21,$acc12
  320. and $acc11,2040,$acc11
  321. ldx [$tbl+$acc10],$acc10
  322. srl $t0,13,$acc13
  323. and $acc12,2040,$acc12
  324. ldx [$tbl+$acc11],$acc11
  325. fmovs %f0,%f0
  326. srl $t1,5,$acc14 !
  327. and $acc13,2040,$acc13
  328. ldx [$tbl+$acc12],$acc12
  329. sll $t2,3,$acc15
  330. and $acc14,2040,$acc14
  331. ldx [$tbl+$acc13],$acc13
  332. srlx $acc1,8,$acc1
  333. and $acc15,2040,$acc15
  334. ldx [$tbl+$acc14],$acc14 !
  335. srlx $acc2,16,$acc2
  336. xor $acc0,$s0,$s0
  337. ldx [$tbl+$acc15],$acc15
  338. srlx $acc3,24,$acc3
  339. xor $acc1,$s0,$s0
  340. ld [$key+16],$t0
  341. fmovs %f0,%f0
  342. srlx $acc5,8,$acc5 !
  343. xor $acc2,$s0,$s0
  344. ld [$key+20],$t1
  345. srlx $acc6,16,$acc6
  346. xor $acc3,$s0,$s0
  347. ld [$key+24],$t2
  348. srlx $acc7,24,$acc7
  349. xor $acc4,$s1,$s1
  350. ld [$key+28],$t3 !
  351. srlx $acc9,8,$acc9
  352. xor $acc5,$s1,$s1
  353. srlx $acc10,16,$acc10
  354. xor $acc6,$s1,$s1
  355. srlx $acc11,24,$acc11
  356. xor $acc7,$s1,$s1
  357. srlx $acc13,8,$acc13
  358. xor $acc8,$s2,$s2
  359. srlx $acc14,16,$acc14 !
  360. xor $acc9,$s2,$s2
  361. srlx $acc15,24,$acc15
  362. xor $acc10,$s2,$s2
  363. srl $s0,21,$acc0
  364. xor $acc11,$s2,$s2
  365. xor $acc12,$acc14,$acc14
  366. xor $acc13,$s3,$s3
  367. srl $s1,13,$acc1 !
  368. xor $acc14,$s3,$s3
  369. xor $acc15,$s3,$s3
  370. ba .Lenc_loop
  371. and $acc0,2040,$acc0
  372. .align 32
  373. .Lenc_last:
  374. srlx $acc1,8,$acc1 !
  375. xor $acc0,$t0,$t0
  376. ld [$key+0],$s0
  377. srlx $acc2,16,$acc2
  378. xor $acc1,$t0,$t0
  379. ld [$key+4],$s1
  380. srlx $acc3,24,$acc3
  381. xor $acc2,$t0,$t0
  382. ld [$key+8],$s2 !
  383. srlx $acc5,8,$acc5
  384. xor $acc3,$t0,$t0
  385. ld [$key+12],$s3
  386. srlx $acc6,16,$acc6
  387. xor $acc4,$t1,$t1
  388. srlx $acc7,24,$acc7
  389. xor $acc5,$t1,$t1
  390. srlx $acc9,8,$acc9 !
  391. xor $acc6,$t1,$t1
  392. srlx $acc10,16,$acc10
  393. xor $acc7,$t1,$t1
  394. srlx $acc11,24,$acc11
  395. xor $acc8,$t2,$t2
  396. srlx $acc13,8,$acc13
  397. xor $acc9,$t2,$t2
  398. srlx $acc14,16,$acc14 !
  399. xor $acc10,$t2,$t2
  400. srlx $acc15,24,$acc15
  401. xor $acc11,$t2,$t2
  402. xor $acc12,$acc14,$acc14
  403. xor $acc13,$t3,$t3
  404. srl $t0,24,$acc0
  405. xor $acc14,$t3,$t3
  406. srl $t1,16,$acc1 !
  407. xor $acc15,$t3,$t3
  408. srl $t2,8,$acc2
  409. and $acc1,255,$acc1
  410. ldub [$rounds+$acc0],$acc0
  411. srl $t1,24,$acc4
  412. and $acc2,255,$acc2
  413. ldub [$rounds+$acc1],$acc1
  414. srl $t2,16,$acc5 !
  415. and $t3,255,$acc3
  416. ldub [$rounds+$acc2],$acc2
  417. ldub [$rounds+$acc3],$acc3
  418. srl $t3,8,$acc6
  419. and $acc5,255,$acc5
  420. ldub [$rounds+$acc4],$acc4
  421. fmovs %f0,%f0
  422. srl $t2,24,$acc8 !
  423. and $acc6,255,$acc6
  424. ldub [$rounds+$acc5],$acc5
  425. srl $t3,16,$acc9
  426. and $t0,255,$acc7
  427. ldub [$rounds+$acc6],$acc6
  428. ldub [$rounds+$acc7],$acc7
  429. fmovs %f0,%f0
  430. srl $t0,8,$acc10 !
  431. and $acc9,255,$acc9
  432. ldub [$rounds+$acc8],$acc8
  433. srl $t3,24,$acc12
  434. and $acc10,255,$acc10
  435. ldub [$rounds+$acc9],$acc9
  436. srl $t0,16,$acc13
  437. and $t1,255,$acc11
  438. ldub [$rounds+$acc10],$acc10 !
  439. srl $t1,8,$acc14
  440. and $acc13,255,$acc13
  441. ldub [$rounds+$acc11],$acc11
  442. ldub [$rounds+$acc12],$acc12
  443. and $acc14,255,$acc14
  444. ldub [$rounds+$acc13],$acc13
  445. and $t2,255,$acc15
  446. ldub [$rounds+$acc14],$acc14 !
  447. sll $acc0,24,$acc0
  448. xor $acc3,$s0,$s0
  449. ldub [$rounds+$acc15],$acc15
  450. sll $acc1,16,$acc1
  451. xor $acc0,$s0,$s0
  452. ldx [%sp+$bias+$frame+0],%i7 ! restore return address
  453. fmovs %f0,%f0
  454. sll $acc2,8,$acc2 !
  455. xor $acc1,$s0,$s0
  456. sll $acc4,24,$acc4
  457. xor $acc2,$s0,$s0
  458. sll $acc5,16,$acc5
  459. xor $acc7,$s1,$s1
  460. sll $acc6,8,$acc6
  461. xor $acc4,$s1,$s1
  462. sll $acc8,24,$acc8 !
  463. xor $acc5,$s1,$s1
  464. sll $acc9,16,$acc9
  465. xor $acc11,$s2,$s2
  466. sll $acc10,8,$acc10
  467. xor $acc6,$s1,$s1
  468. sll $acc12,24,$acc12
  469. xor $acc8,$s2,$s2
  470. sll $acc13,16,$acc13 !
  471. xor $acc9,$s2,$s2
  472. sll $acc14,8,$acc14
  473. xor $acc10,$s2,$s2
  474. xor $acc12,$acc14,$acc14
  475. xor $acc13,$s3,$s3
  476. xor $acc14,$s3,$s3
  477. xor $acc15,$s3,$s3
  478. ret
  479. restore
  480. .type _sparcv9_AES_encrypt,#function
  481. .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
  482. .align 32
  483. .globl AES_encrypt
  484. AES_encrypt:
  485. or %o0,%o1,%g1
  486. andcc %g1,3,%g0
  487. bnz,pn %xcc,.Lunaligned_enc
  488. save %sp,-$frame,%sp
  489. ld [%i0+0],%o0
  490. ld [%i0+4],%o1
  491. ld [%i0+8],%o2
  492. ld [%i0+12],%o3
  493. mov %i2,%o5
  494. nop
  495. 1: call _sparcv9_AES_encrypt
  496. sub %o7,1b-AES_Te,%o4
  497. st %o0,[%i1+0]
  498. st %o1,[%i1+4]
  499. st %o2,[%i1+8]
  500. st %o3,[%i1+12]
  501. ret
  502. restore
  503. .align 32
  504. .Lunaligned_enc:
  505. ldub [%i0+0],%l0
  506. ldub [%i0+1],%l1
  507. ldub [%i0+2],%l2
  508. sll %l0,24,%l0
  509. ldub [%i0+3],%l3
  510. sll %l1,16,%l1
  511. ldub [%i0+4],%l4
  512. sll %l2,8,%l2
  513. or %l1,%l0,%l0
  514. ldub [%i0+5],%l5
  515. sll %l4,24,%l4
  516. or %l3,%l2,%l2
  517. ldub [%i0+6],%l6
  518. sll %l5,16,%l5
  519. or %l0,%l2,%o0
  520. ldub [%i0+7],%l7
  521. sll %l6,8,%l6
  522. or %l5,%l4,%l4
  523. ldub [%i0+8],%l0
  524. or %l7,%l6,%l6
  525. ldub [%i0+9],%l1
  526. or %l4,%l6,%o1
  527. ldub [%i0+10],%l2
  528. sll %l0,24,%l0
  529. ldub [%i0+11],%l3
  530. sll %l1,16,%l1
  531. ldub [%i0+12],%l4
  532. sll %l2,8,%l2
  533. or %l1,%l0,%l0
  534. ldub [%i0+13],%l5
  535. sll %l4,24,%l4
  536. or %l3,%l2,%l2
  537. ldub [%i0+14],%l6
  538. sll %l5,16,%l5
  539. or %l0,%l2,%o2
  540. ldub [%i0+15],%l7
  541. sll %l6,8,%l6
  542. or %l5,%l4,%l4
  543. or %l7,%l6,%l6
  544. or %l4,%l6,%o3
  545. mov %i2,%o5
  546. nop
  547. 1: call _sparcv9_AES_encrypt
  548. sub %o7,1b-AES_Te,%o4
  549. srl %o0,24,%l0
  550. srl %o0,16,%l1
  551. stb %l0,[%i1+0]
  552. srl %o0,8,%l2
  553. stb %l1,[%i1+1]
  554. stb %l2,[%i1+2]
  555. srl %o1,24,%l4
  556. stb %o0,[%i1+3]
  557. srl %o1,16,%l5
  558. stb %l4,[%i1+4]
  559. srl %o1,8,%l6
  560. stb %l5,[%i1+5]
  561. stb %l6,[%i1+6]
  562. srl %o2,24,%l0
  563. stb %o1,[%i1+7]
  564. srl %o2,16,%l1
  565. stb %l0,[%i1+8]
  566. srl %o2,8,%l2
  567. stb %l1,[%i1+9]
  568. stb %l2,[%i1+10]
  569. srl %o3,24,%l4
  570. stb %o2,[%i1+11]
  571. srl %o3,16,%l5
  572. stb %l4,[%i1+12]
  573. srl %o3,8,%l6
  574. stb %l5,[%i1+13]
  575. stb %l6,[%i1+14]
  576. stb %o3,[%i1+15]
  577. ret
  578. restore
  579. .type AES_encrypt,#function
  580. .size AES_encrypt,(.-AES_encrypt)
  581. ___
  582. $code.=<<___;
  583. .align 64
  584. AES_Td:
  585. ___
  586. &_data_word(
  587. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  588. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  589. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  590. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  591. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  592. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  593. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  594. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  595. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  596. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  597. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  598. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  599. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  600. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  601. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  602. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  603. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  604. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  605. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  606. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  607. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  608. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  609. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  610. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  611. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  612. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  613. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  614. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  615. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  616. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  617. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  618. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  619. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  620. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  621. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  622. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  623. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  624. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  625. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  626. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  627. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  628. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  629. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  630. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  631. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  632. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  633. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  634. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  635. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  636. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  637. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  638. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  639. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  640. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  641. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  642. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  643. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  644. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  645. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  646. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  647. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  648. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  649. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  650. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  651. $code.=<<___;
  652. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  653. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  654. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  655. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  656. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  657. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  658. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  659. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  660. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  661. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  662. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  663. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  664. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  665. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  666. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  667. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  668. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  669. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  670. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  671. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  672. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  673. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  674. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  675. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  676. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  677. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  678. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  679. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  680. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  681. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  682. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  683. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  684. .type AES_Td,#object
  685. .size AES_Td,(.-AES_Td)
  686. .align 64
  687. .skip 16
  688. _sparcv9_AES_decrypt:
  689. save %sp,-$frame-$locals,%sp
  690. stx %i7,[%sp+$bias+$frame+0] ! off-load return address
  691. ld [$key+240],$rounds
  692. ld [$key+0],$t0
  693. ld [$key+4],$t1 !
  694. ld [$key+8],$t2
  695. ld [$key+12],$t3
  696. srl $rounds,1,$rounds
  697. xor $t0,$s0,$s0
  698. ld [$key+16],$t0
  699. xor $t1,$s1,$s1
  700. ld [$key+20],$t1
  701. srl $s0,21,$acc0 !
  702. xor $t2,$s2,$s2
  703. ld [$key+24],$t2
  704. xor $t3,$s3,$s3
  705. and $acc0,2040,$acc0
  706. ld [$key+28],$t3
  707. srl $s3,13,$acc1
  708. nop
  709. .Ldec_loop:
  710. srl $s2,5,$acc2 !
  711. and $acc1,2040,$acc1
  712. ldx [$tbl+$acc0],$acc0
  713. sll $s1,3,$acc3
  714. and $acc2,2040,$acc2
  715. ldx [$tbl+$acc1],$acc1
  716. srl $s1,21,$acc4
  717. and $acc3,2040,$acc3
  718. ldx [$tbl+$acc2],$acc2 !
  719. srl $s0,13,$acc5
  720. and $acc4,2040,$acc4
  721. ldx [$tbl+$acc3],$acc3
  722. srl $s3,5,$acc6
  723. and $acc5,2040,$acc5
  724. ldx [$tbl+$acc4],$acc4
  725. fmovs %f0,%f0
  726. sll $s2,3,$acc7 !
  727. and $acc6,2040,$acc6
  728. ldx [$tbl+$acc5],$acc5
  729. srl $s2,21,$acc8
  730. and $acc7,2040,$acc7
  731. ldx [$tbl+$acc6],$acc6
  732. srl $s1,13,$acc9
  733. and $acc8,2040,$acc8
  734. ldx [$tbl+$acc7],$acc7 !
  735. srl $s0,5,$acc10
  736. and $acc9,2040,$acc9
  737. ldx [$tbl+$acc8],$acc8
  738. sll $s3,3,$acc11
  739. and $acc10,2040,$acc10
  740. ldx [$tbl+$acc9],$acc9
  741. fmovs %f0,%f0
  742. srl $s3,21,$acc12 !
  743. and $acc11,2040,$acc11
  744. ldx [$tbl+$acc10],$acc10
  745. srl $s2,13,$acc13
  746. and $acc12,2040,$acc12
  747. ldx [$tbl+$acc11],$acc11
  748. srl $s1,5,$acc14
  749. and $acc13,2040,$acc13
  750. ldx [$tbl+$acc12],$acc12 !
  751. sll $s0,3,$acc15
  752. and $acc14,2040,$acc14
  753. ldx [$tbl+$acc13],$acc13
  754. and $acc15,2040,$acc15
  755. add $key,32,$key
  756. ldx [$tbl+$acc14],$acc14
  757. fmovs %f0,%f0
  758. subcc $rounds,1,$rounds !
  759. ldx [$tbl+$acc15],$acc15
  760. bz,a,pn %icc,.Ldec_last
  761. add $tbl,2048,$rounds
  762. srlx $acc1,8,$acc1
  763. xor $acc0,$t0,$t0
  764. ld [$key+0],$s0
  765. fmovs %f0,%f0
  766. srlx $acc2,16,$acc2 !
  767. xor $acc1,$t0,$t0
  768. ld [$key+4],$s1
  769. srlx $acc3,24,$acc3
  770. xor $acc2,$t0,$t0
  771. ld [$key+8],$s2
  772. srlx $acc5,8,$acc5
  773. xor $acc3,$t0,$t0
  774. ld [$key+12],$s3 !
  775. srlx $acc6,16,$acc6
  776. xor $acc4,$t1,$t1
  777. fmovs %f0,%f0
  778. srlx $acc7,24,$acc7
  779. xor $acc5,$t1,$t1
  780. srlx $acc9,8,$acc9
  781. xor $acc6,$t1,$t1
  782. srlx $acc10,16,$acc10 !
  783. xor $acc7,$t1,$t1
  784. srlx $acc11,24,$acc11
  785. xor $acc8,$t2,$t2
  786. srlx $acc13,8,$acc13
  787. xor $acc9,$t2,$t2
  788. srlx $acc14,16,$acc14
  789. xor $acc10,$t2,$t2
  790. srlx $acc15,24,$acc15 !
  791. xor $acc11,$t2,$t2
  792. xor $acc12,$acc14,$acc14
  793. xor $acc13,$t3,$t3
  794. srl $t0,21,$acc0
  795. xor $acc14,$t3,$t3
  796. xor $acc15,$t3,$t3
  797. srl $t3,13,$acc1
  798. and $acc0,2040,$acc0 !
  799. srl $t2,5,$acc2
  800. and $acc1,2040,$acc1
  801. ldx [$tbl+$acc0],$acc0
  802. sll $t1,3,$acc3
  803. and $acc2,2040,$acc2
  804. ldx [$tbl+$acc1],$acc1
  805. fmovs %f0,%f0
  806. srl $t1,21,$acc4 !
  807. and $acc3,2040,$acc3
  808. ldx [$tbl+$acc2],$acc2
  809. srl $t0,13,$acc5
  810. and $acc4,2040,$acc4
  811. ldx [$tbl+$acc3],$acc3
  812. srl $t3,5,$acc6
  813. and $acc5,2040,$acc5
  814. ldx [$tbl+$acc4],$acc4 !
  815. sll $t2,3,$acc7
  816. and $acc6,2040,$acc6
  817. ldx [$tbl+$acc5],$acc5
  818. srl $t2,21,$acc8
  819. and $acc7,2040,$acc7
  820. ldx [$tbl+$acc6],$acc6
  821. fmovs %f0,%f0
  822. srl $t1,13,$acc9 !
  823. and $acc8,2040,$acc8
  824. ldx [$tbl+$acc7],$acc7
  825. srl $t0,5,$acc10
  826. and $acc9,2040,$acc9
  827. ldx [$tbl+$acc8],$acc8
  828. sll $t3,3,$acc11
  829. and $acc10,2040,$acc10
  830. ldx [$tbl+$acc9],$acc9 !
  831. srl $t3,21,$acc12
  832. and $acc11,2040,$acc11
  833. ldx [$tbl+$acc10],$acc10
  834. srl $t2,13,$acc13
  835. and $acc12,2040,$acc12
  836. ldx [$tbl+$acc11],$acc11
  837. fmovs %f0,%f0
  838. srl $t1,5,$acc14 !
  839. and $acc13,2040,$acc13
  840. ldx [$tbl+$acc12],$acc12
  841. sll $t0,3,$acc15
  842. and $acc14,2040,$acc14
  843. ldx [$tbl+$acc13],$acc13
  844. srlx $acc1,8,$acc1
  845. and $acc15,2040,$acc15
  846. ldx [$tbl+$acc14],$acc14 !
  847. srlx $acc2,16,$acc2
  848. xor $acc0,$s0,$s0
  849. ldx [$tbl+$acc15],$acc15
  850. srlx $acc3,24,$acc3
  851. xor $acc1,$s0,$s0
  852. ld [$key+16],$t0
  853. fmovs %f0,%f0
  854. srlx $acc5,8,$acc5 !
  855. xor $acc2,$s0,$s0
  856. ld [$key+20],$t1
  857. srlx $acc6,16,$acc6
  858. xor $acc3,$s0,$s0
  859. ld [$key+24],$t2
  860. srlx $acc7,24,$acc7
  861. xor $acc4,$s1,$s1
  862. ld [$key+28],$t3 !
  863. srlx $acc9,8,$acc9
  864. xor $acc5,$s1,$s1
  865. srlx $acc10,16,$acc10
  866. xor $acc6,$s1,$s1
  867. srlx $acc11,24,$acc11
  868. xor $acc7,$s1,$s1
  869. srlx $acc13,8,$acc13
  870. xor $acc8,$s2,$s2
  871. srlx $acc14,16,$acc14 !
  872. xor $acc9,$s2,$s2
  873. srlx $acc15,24,$acc15
  874. xor $acc10,$s2,$s2
  875. srl $s0,21,$acc0
  876. xor $acc11,$s2,$s2
  877. xor $acc12,$acc14,$acc14
  878. xor $acc13,$s3,$s3
  879. and $acc0,2040,$acc0 !
  880. xor $acc14,$s3,$s3
  881. xor $acc15,$s3,$s3
  882. ba .Ldec_loop
  883. srl $s3,13,$acc1
  884. .align 32
  885. .Ldec_last:
  886. srlx $acc1,8,$acc1 !
  887. xor $acc0,$t0,$t0
  888. ld [$key+0],$s0
  889. srlx $acc2,16,$acc2
  890. xor $acc1,$t0,$t0
  891. ld [$key+4],$s1
  892. srlx $acc3,24,$acc3
  893. xor $acc2,$t0,$t0
  894. ld [$key+8],$s2 !
  895. srlx $acc5,8,$acc5
  896. xor $acc3,$t0,$t0
  897. ld [$key+12],$s3
  898. srlx $acc6,16,$acc6
  899. xor $acc4,$t1,$t1
  900. srlx $acc7,24,$acc7
  901. xor $acc5,$t1,$t1
  902. srlx $acc9,8,$acc9 !
  903. xor $acc6,$t1,$t1
  904. srlx $acc10,16,$acc10
  905. xor $acc7,$t1,$t1
  906. srlx $acc11,24,$acc11
  907. xor $acc8,$t2,$t2
  908. srlx $acc13,8,$acc13
  909. xor $acc9,$t2,$t2
  910. srlx $acc14,16,$acc14 !
  911. xor $acc10,$t2,$t2
  912. srlx $acc15,24,$acc15
  913. xor $acc11,$t2,$t2
  914. xor $acc12,$acc14,$acc14
  915. xor $acc13,$t3,$t3
  916. srl $t0,24,$acc0
  917. xor $acc14,$t3,$t3
  918. xor $acc15,$t3,$t3 !
  919. srl $t3,16,$acc1
  920. srl $t2,8,$acc2
  921. and $acc1,255,$acc1
  922. ldub [$rounds+$acc0],$acc0
  923. srl $t1,24,$acc4
  924. and $acc2,255,$acc2
  925. ldub [$rounds+$acc1],$acc1
  926. srl $t0,16,$acc5 !
  927. and $t1,255,$acc3
  928. ldub [$rounds+$acc2],$acc2
  929. ldub [$rounds+$acc3],$acc3
  930. srl $t3,8,$acc6
  931. and $acc5,255,$acc5
  932. ldub [$rounds+$acc4],$acc4
  933. fmovs %f0,%f0
  934. srl $t2,24,$acc8 !
  935. and $acc6,255,$acc6
  936. ldub [$rounds+$acc5],$acc5
  937. srl $t1,16,$acc9
  938. and $t2,255,$acc7
  939. ldub [$rounds+$acc6],$acc6
  940. ldub [$rounds+$acc7],$acc7
  941. fmovs %f0,%f0
  942. srl $t0,8,$acc10 !
  943. and $acc9,255,$acc9
  944. ldub [$rounds+$acc8],$acc8
  945. srl $t3,24,$acc12
  946. and $acc10,255,$acc10
  947. ldub [$rounds+$acc9],$acc9
  948. srl $t2,16,$acc13
  949. and $t3,255,$acc11
  950. ldub [$rounds+$acc10],$acc10 !
  951. srl $t1,8,$acc14
  952. and $acc13,255,$acc13
  953. ldub [$rounds+$acc11],$acc11
  954. ldub [$rounds+$acc12],$acc12
  955. and $acc14,255,$acc14
  956. ldub [$rounds+$acc13],$acc13
  957. and $t0,255,$acc15
  958. ldub [$rounds+$acc14],$acc14 !
  959. sll $acc0,24,$acc0
  960. xor $acc3,$s0,$s0
  961. ldub [$rounds+$acc15],$acc15
  962. sll $acc1,16,$acc1
  963. xor $acc0,$s0,$s0
  964. ldx [%sp+$bias+$frame+0],%i7 ! restore return address
  965. fmovs %f0,%f0
  966. sll $acc2,8,$acc2 !
  967. xor $acc1,$s0,$s0
  968. sll $acc4,24,$acc4
  969. xor $acc2,$s0,$s0
  970. sll $acc5,16,$acc5
  971. xor $acc7,$s1,$s1
  972. sll $acc6,8,$acc6
  973. xor $acc4,$s1,$s1
  974. sll $acc8,24,$acc8 !
  975. xor $acc5,$s1,$s1
  976. sll $acc9,16,$acc9
  977. xor $acc11,$s2,$s2
  978. sll $acc10,8,$acc10
  979. xor $acc6,$s1,$s1
  980. sll $acc12,24,$acc12
  981. xor $acc8,$s2,$s2
  982. sll $acc13,16,$acc13 !
  983. xor $acc9,$s2,$s2
  984. sll $acc14,8,$acc14
  985. xor $acc10,$s2,$s2
  986. xor $acc12,$acc14,$acc14
  987. xor $acc13,$s3,$s3
  988. xor $acc14,$s3,$s3
  989. xor $acc15,$s3,$s3
  990. ret
  991. restore
  992. .type _sparcv9_AES_decrypt,#function
  993. .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
  994. .align 32
  995. .globl AES_decrypt
  996. AES_decrypt:
  997. or %o0,%o1,%g1
  998. andcc %g1,3,%g0
  999. bnz,pn %xcc,.Lunaligned_dec
  1000. save %sp,-$frame,%sp
  1001. ld [%i0+0],%o0
  1002. ld [%i0+4],%o1
  1003. ld [%i0+8],%o2
  1004. ld [%i0+12],%o3
  1005. mov %i2,%o5
  1006. nop
  1007. 1: call _sparcv9_AES_decrypt
  1008. sub %o7,1b-AES_Td,%o4
  1009. st %o0,[%i1+0]
  1010. st %o1,[%i1+4]
  1011. st %o2,[%i1+8]
  1012. st %o3,[%i1+12]
  1013. ret
  1014. restore
  1015. .align 32
  1016. .Lunaligned_dec:
  1017. ldub [%i0+0],%l0
  1018. ldub [%i0+1],%l1
  1019. ldub [%i0+2],%l2
  1020. sll %l0,24,%l0
  1021. ldub [%i0+3],%l3
  1022. sll %l1,16,%l1
  1023. ldub [%i0+4],%l4
  1024. sll %l2,8,%l2
  1025. or %l1,%l0,%l0
  1026. ldub [%i0+5],%l5
  1027. sll %l4,24,%l4
  1028. or %l3,%l2,%l2
  1029. ldub [%i0+6],%l6
  1030. sll %l5,16,%l5
  1031. or %l0,%l2,%o0
  1032. ldub [%i0+7],%l7
  1033. sll %l6,8,%l6
  1034. or %l5,%l4,%l4
  1035. ldub [%i0+8],%l0
  1036. or %l7,%l6,%l6
  1037. ldub [%i0+9],%l1
  1038. or %l4,%l6,%o1
  1039. ldub [%i0+10],%l2
  1040. sll %l0,24,%l0
  1041. ldub [%i0+11],%l3
  1042. sll %l1,16,%l1
  1043. ldub [%i0+12],%l4
  1044. sll %l2,8,%l2
  1045. or %l1,%l0,%l0
  1046. ldub [%i0+13],%l5
  1047. sll %l4,24,%l4
  1048. or %l3,%l2,%l2
  1049. ldub [%i0+14],%l6
  1050. sll %l5,16,%l5
  1051. or %l0,%l2,%o2
  1052. ldub [%i0+15],%l7
  1053. sll %l6,8,%l6
  1054. or %l5,%l4,%l4
  1055. or %l7,%l6,%l6
  1056. or %l4,%l6,%o3
  1057. mov %i2,%o5
  1058. nop
  1059. 1: call _sparcv9_AES_decrypt
  1060. sub %o7,1b-AES_Td,%o4
  1061. srl %o0,24,%l0
  1062. srl %o0,16,%l1
  1063. stb %l0,[%i1+0]
  1064. srl %o0,8,%l2
  1065. stb %l1,[%i1+1]
  1066. stb %l2,[%i1+2]
  1067. srl %o1,24,%l4
  1068. stb %o0,[%i1+3]
  1069. srl %o1,16,%l5
  1070. stb %l4,[%i1+4]
  1071. srl %o1,8,%l6
  1072. stb %l5,[%i1+5]
  1073. stb %l6,[%i1+6]
  1074. srl %o2,24,%l0
  1075. stb %o1,[%i1+7]
  1076. srl %o2,16,%l1
  1077. stb %l0,[%i1+8]
  1078. srl %o2,8,%l2
  1079. stb %l1,[%i1+9]
  1080. stb %l2,[%i1+10]
  1081. srl %o3,24,%l4
  1082. stb %o2,[%i1+11]
  1083. srl %o3,16,%l5
  1084. stb %l4,[%i1+12]
  1085. srl %o3,8,%l6
  1086. stb %l5,[%i1+13]
  1087. stb %l6,[%i1+14]
  1088. stb %o3,[%i1+15]
  1089. ret
  1090. restore
  1091. .type AES_decrypt,#function
  1092. .size AES_decrypt,(.-AES_decrypt)
  1093. ___
  1094. # fmovs instructions substituting for FP nops were originally added
  1095. # to meet specific instruction alignment requirements to maximize ILP.
  1096. # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
  1097. # undesired effect, so just omit them and sacrifice some portion of
  1098. # percent in performance...
  1099. $code =~ s/fmovs.*$//gem;
  1100. print $code;