2
0

aes-sparcv9.pl 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194
  1. #! /usr/bin/env perl
  2. # Copyright 2005-2021 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  11. # project. Rights for redistribution and usage in source and binary
  12. # forms are granted according to the License.
  13. # ====================================================================
  14. #
  15. # Version 1.1
  16. #
  17. # The major reason for undertaken effort was to mitigate the hazard of
  18. # cache-timing attack. This is [currently and initially!] addressed in
  19. # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
  20. # 2. References to them are scheduled for L2 cache latency, meaning
  21. # that the tables don't have to reside in L1 cache. Once again, this
  22. # is an initial draft and one should expect more countermeasures to
  23. # be implemented...
  24. #
  25. # Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
  26. # round.
  27. #
  28. # Even though performance was not the primary goal [on the contrary,
  29. # extra shifts "induced" by compressed S-box and longer loop epilogue
  30. # "induced" by scheduling for L2 have negative effect on performance],
  31. # the code turned out to run in ~23 cycles per processed byte en-/
  32. # decrypted with 128-bit key. This is pretty good result for code
  33. # with mentioned qualities and UltraSPARC core. Compared to Sun C
  34. # generated code my encrypt procedure runs just few percents faster,
  35. # while decrypt one - whole 50% faster [yes, Sun C failed to generate
  36. # optimal decrypt procedure]. Compared to GNU C generated code both
  37. # procedures are more than 60% faster:-)
  38. $output = pop and open STDOUT,">$output";
  39. $frame="STACK_FRAME";
  40. $bias="STACK_BIAS";
  41. $locals=16;
  42. $acc0="%l0";
  43. $acc1="%o0";
  44. $acc2="%o1";
  45. $acc3="%o2";
  46. $acc4="%l1";
  47. $acc5="%o3";
  48. $acc6="%o4";
  49. $acc7="%o5";
  50. $acc8="%l2";
  51. $acc9="%o7";
  52. $acc10="%g1";
  53. $acc11="%g2";
  54. $acc12="%l3";
  55. $acc13="%g3";
  56. $acc14="%g4";
  57. $acc15="%g5";
  58. $t0="%l4";
  59. $t1="%l5";
  60. $t2="%l6";
  61. $t3="%l7";
  62. $s0="%i0";
  63. $s1="%i1";
  64. $s2="%i2";
  65. $s3="%i3";
  66. $tbl="%i4";
  67. $key="%i5";
  68. $rounds="%i7"; # aliases with return address, which is off-loaded to stack
  69. sub _data_word()
  70. { my $i;
  71. while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
  72. }
  73. $code.=<<___;
  74. #ifndef __ASSEMBLER__
  75. # define __ASSEMBLER__ 1
  76. #endif
  77. #include "crypto/sparc_arch.h"
  78. #ifdef __arch64__
  79. .register %g2,#scratch
  80. .register %g3,#scratch
  81. #endif
  82. .section ".text",#alloc,#execinstr
  83. .align 256
  84. AES_Te:
  85. ___
  86. &_data_word(
  87. 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  88. 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  89. 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  90. 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  91. 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  92. 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  93. 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  94. 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  95. 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  96. 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  97. 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  98. 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  99. 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  100. 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  101. 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  102. 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  103. 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  104. 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  105. 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  106. 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  107. 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  108. 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  109. 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  110. 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  111. 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  112. 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  113. 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  114. 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  115. 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  116. 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  117. 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  118. 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  119. 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  120. 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  121. 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  122. 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  123. 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  124. 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  125. 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  126. 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  127. 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  128. 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  129. 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  130. 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  131. 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  132. 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  133. 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  134. 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  135. 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  136. 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  137. 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  138. 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  139. 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  140. 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  141. 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  142. 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  143. 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  144. 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  145. 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  146. 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  147. 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  148. 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  149. 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  150. 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  151. $code.=<<___;
  152. .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  153. .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  154. .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  155. .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  156. .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  157. .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  158. .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  159. .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  160. .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  161. .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  162. .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  163. .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  164. .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  165. .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  166. .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  167. .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  168. .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  169. .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  170. .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  171. .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  172. .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  173. .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  174. .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  175. .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  176. .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  177. .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  178. .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  179. .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  180. .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  181. .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  182. .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  183. .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  184. .type AES_Te,#object
  185. .size AES_Te,(.-AES_Te)
  186. .align 64
  187. .skip 16
  188. _sparcv9_AES_encrypt:
  189. save %sp,-$frame-$locals,%sp
  190. stx %i7,[%sp+$bias+$frame+0] ! off-load return address
  191. ld [$key+240],$rounds
  192. ld [$key+0],$t0
  193. ld [$key+4],$t1 !
  194. ld [$key+8],$t2
  195. srl $rounds,1,$rounds
  196. xor $t0,$s0,$s0
  197. ld [$key+12],$t3
  198. srl $s0,21,$acc0
  199. xor $t1,$s1,$s1
  200. ld [$key+16],$t0
  201. srl $s1,13,$acc1 !
  202. xor $t2,$s2,$s2
  203. ld [$key+20],$t1
  204. xor $t3,$s3,$s3
  205. ld [$key+24],$t2
  206. and $acc0,2040,$acc0
  207. ld [$key+28],$t3
  208. nop
  209. .Lenc_loop:
  210. srl $s2,5,$acc2 !
  211. and $acc1,2040,$acc1
  212. ldx [$tbl+$acc0],$acc0
  213. sll $s3,3,$acc3
  214. and $acc2,2040,$acc2
  215. ldx [$tbl+$acc1],$acc1
  216. srl $s1,21,$acc4
  217. and $acc3,2040,$acc3
  218. ldx [$tbl+$acc2],$acc2 !
  219. srl $s2,13,$acc5
  220. and $acc4,2040,$acc4
  221. ldx [$tbl+$acc3],$acc3
  222. srl $s3,5,$acc6
  223. and $acc5,2040,$acc5
  224. ldx [$tbl+$acc4],$acc4
  225. fmovs %f0,%f0
  226. sll $s0,3,$acc7 !
  227. and $acc6,2040,$acc6
  228. ldx [$tbl+$acc5],$acc5
  229. srl $s2,21,$acc8
  230. and $acc7,2040,$acc7
  231. ldx [$tbl+$acc6],$acc6
  232. srl $s3,13,$acc9
  233. and $acc8,2040,$acc8
  234. ldx [$tbl+$acc7],$acc7 !
  235. srl $s0,5,$acc10
  236. and $acc9,2040,$acc9
  237. ldx [$tbl+$acc8],$acc8
  238. sll $s1,3,$acc11
  239. and $acc10,2040,$acc10
  240. ldx [$tbl+$acc9],$acc9
  241. fmovs %f0,%f0
  242. srl $s3,21,$acc12 !
  243. and $acc11,2040,$acc11
  244. ldx [$tbl+$acc10],$acc10
  245. srl $s0,13,$acc13
  246. and $acc12,2040,$acc12
  247. ldx [$tbl+$acc11],$acc11
  248. srl $s1,5,$acc14
  249. and $acc13,2040,$acc13
  250. ldx [$tbl+$acc12],$acc12 !
  251. sll $s2,3,$acc15
  252. and $acc14,2040,$acc14
  253. ldx [$tbl+$acc13],$acc13
  254. and $acc15,2040,$acc15
  255. add $key,32,$key
  256. ldx [$tbl+$acc14],$acc14
  257. fmovs %f0,%f0
  258. subcc $rounds,1,$rounds !
  259. ldx [$tbl+$acc15],$acc15
  260. bz,a,pn %icc,.Lenc_last
  261. add $tbl,2048,$rounds
  262. srlx $acc1,8,$acc1
  263. xor $acc0,$t0,$t0
  264. ld [$key+0],$s0
  265. fmovs %f0,%f0
  266. srlx $acc2,16,$acc2 !
  267. xor $acc1,$t0,$t0
  268. ld [$key+4],$s1
  269. srlx $acc3,24,$acc3
  270. xor $acc2,$t0,$t0
  271. ld [$key+8],$s2
  272. srlx $acc5,8,$acc5
  273. xor $acc3,$t0,$t0
  274. ld [$key+12],$s3 !
  275. srlx $acc6,16,$acc6
  276. xor $acc4,$t1,$t1
  277. fmovs %f0,%f0
  278. srlx $acc7,24,$acc7
  279. xor $acc5,$t1,$t1
  280. srlx $acc9,8,$acc9
  281. xor $acc6,$t1,$t1
  282. srlx $acc10,16,$acc10 !
  283. xor $acc7,$t1,$t1
  284. srlx $acc11,24,$acc11
  285. xor $acc8,$t2,$t2
  286. srlx $acc13,8,$acc13
  287. xor $acc9,$t2,$t2
  288. srlx $acc14,16,$acc14
  289. xor $acc10,$t2,$t2
  290. srlx $acc15,24,$acc15 !
  291. xor $acc11,$t2,$t2
  292. xor $acc12,$acc14,$acc14
  293. xor $acc13,$t3,$t3
  294. srl $t0,21,$acc0
  295. xor $acc14,$t3,$t3
  296. srl $t1,13,$acc1
  297. xor $acc15,$t3,$t3
  298. and $acc0,2040,$acc0 !
  299. srl $t2,5,$acc2
  300. and $acc1,2040,$acc1
  301. ldx [$tbl+$acc0],$acc0
  302. sll $t3,3,$acc3
  303. and $acc2,2040,$acc2
  304. ldx [$tbl+$acc1],$acc1
  305. fmovs %f0,%f0
  306. srl $t1,21,$acc4 !
  307. and $acc3,2040,$acc3
  308. ldx [$tbl+$acc2],$acc2
  309. srl $t2,13,$acc5
  310. and $acc4,2040,$acc4
  311. ldx [$tbl+$acc3],$acc3
  312. srl $t3,5,$acc6
  313. and $acc5,2040,$acc5
  314. ldx [$tbl+$acc4],$acc4 !
  315. sll $t0,3,$acc7
  316. and $acc6,2040,$acc6
  317. ldx [$tbl+$acc5],$acc5
  318. srl $t2,21,$acc8
  319. and $acc7,2040,$acc7
  320. ldx [$tbl+$acc6],$acc6
  321. fmovs %f0,%f0
  322. srl $t3,13,$acc9 !
  323. and $acc8,2040,$acc8
  324. ldx [$tbl+$acc7],$acc7
  325. srl $t0,5,$acc10
  326. and $acc9,2040,$acc9
  327. ldx [$tbl+$acc8],$acc8
  328. sll $t1,3,$acc11
  329. and $acc10,2040,$acc10
  330. ldx [$tbl+$acc9],$acc9 !
  331. srl $t3,21,$acc12
  332. and $acc11,2040,$acc11
  333. ldx [$tbl+$acc10],$acc10
  334. srl $t0,13,$acc13
  335. and $acc12,2040,$acc12
  336. ldx [$tbl+$acc11],$acc11
  337. fmovs %f0,%f0
  338. srl $t1,5,$acc14 !
  339. and $acc13,2040,$acc13
  340. ldx [$tbl+$acc12],$acc12
  341. sll $t2,3,$acc15
  342. and $acc14,2040,$acc14
  343. ldx [$tbl+$acc13],$acc13
  344. srlx $acc1,8,$acc1
  345. and $acc15,2040,$acc15
  346. ldx [$tbl+$acc14],$acc14 !
  347. srlx $acc2,16,$acc2
  348. xor $acc0,$s0,$s0
  349. ldx [$tbl+$acc15],$acc15
  350. srlx $acc3,24,$acc3
  351. xor $acc1,$s0,$s0
  352. ld [$key+16],$t0
  353. fmovs %f0,%f0
  354. srlx $acc5,8,$acc5 !
  355. xor $acc2,$s0,$s0
  356. ld [$key+20],$t1
  357. srlx $acc6,16,$acc6
  358. xor $acc3,$s0,$s0
  359. ld [$key+24],$t2
  360. srlx $acc7,24,$acc7
  361. xor $acc4,$s1,$s1
  362. ld [$key+28],$t3 !
  363. srlx $acc9,8,$acc9
  364. xor $acc5,$s1,$s1
  365. ldx [$tbl+2048+0],%g0 ! prefetch te4
  366. srlx $acc10,16,$acc10
  367. xor $acc6,$s1,$s1
  368. ldx [$tbl+2048+32],%g0 ! prefetch te4
  369. srlx $acc11,24,$acc11
  370. xor $acc7,$s1,$s1
  371. ldx [$tbl+2048+64],%g0 ! prefetch te4
  372. srlx $acc13,8,$acc13
  373. xor $acc8,$s2,$s2
  374. ldx [$tbl+2048+96],%g0 ! prefetch te4
  375. srlx $acc14,16,$acc14 !
  376. xor $acc9,$s2,$s2
  377. ldx [$tbl+2048+128],%g0 ! prefetch te4
  378. srlx $acc15,24,$acc15
  379. xor $acc10,$s2,$s2
  380. ldx [$tbl+2048+160],%g0 ! prefetch te4
  381. srl $s0,21,$acc0
  382. xor $acc11,$s2,$s2
  383. ldx [$tbl+2048+192],%g0 ! prefetch te4
  384. xor $acc12,$acc14,$acc14
  385. xor $acc13,$s3,$s3
  386. ldx [$tbl+2048+224],%g0 ! prefetch te4
  387. srl $s1,13,$acc1 !
  388. xor $acc14,$s3,$s3
  389. xor $acc15,$s3,$s3
  390. ba .Lenc_loop
  391. and $acc0,2040,$acc0
  392. .align 32
  393. .Lenc_last:
  394. srlx $acc1,8,$acc1 !
  395. xor $acc0,$t0,$t0
  396. ld [$key+0],$s0
  397. srlx $acc2,16,$acc2
  398. xor $acc1,$t0,$t0
  399. ld [$key+4],$s1
  400. srlx $acc3,24,$acc3
  401. xor $acc2,$t0,$t0
  402. ld [$key+8],$s2 !
  403. srlx $acc5,8,$acc5
  404. xor $acc3,$t0,$t0
  405. ld [$key+12],$s3
  406. srlx $acc6,16,$acc6
  407. xor $acc4,$t1,$t1
  408. srlx $acc7,24,$acc7
  409. xor $acc5,$t1,$t1
  410. srlx $acc9,8,$acc9 !
  411. xor $acc6,$t1,$t1
  412. srlx $acc10,16,$acc10
  413. xor $acc7,$t1,$t1
  414. srlx $acc11,24,$acc11
  415. xor $acc8,$t2,$t2
  416. srlx $acc13,8,$acc13
  417. xor $acc9,$t2,$t2
  418. srlx $acc14,16,$acc14 !
  419. xor $acc10,$t2,$t2
  420. srlx $acc15,24,$acc15
  421. xor $acc11,$t2,$t2
  422. xor $acc12,$acc14,$acc14
  423. xor $acc13,$t3,$t3
  424. srl $t0,24,$acc0
  425. xor $acc14,$t3,$t3
  426. srl $t1,16,$acc1 !
  427. xor $acc15,$t3,$t3
  428. srl $t2,8,$acc2
  429. and $acc1,255,$acc1
  430. ldub [$rounds+$acc0],$acc0
  431. srl $t1,24,$acc4
  432. and $acc2,255,$acc2
  433. ldub [$rounds+$acc1],$acc1
  434. srl $t2,16,$acc5 !
  435. and $t3,255,$acc3
  436. ldub [$rounds+$acc2],$acc2
  437. ldub [$rounds+$acc3],$acc3
  438. srl $t3,8,$acc6
  439. and $acc5,255,$acc5
  440. ldub [$rounds+$acc4],$acc4
  441. fmovs %f0,%f0
  442. srl $t2,24,$acc8 !
  443. and $acc6,255,$acc6
  444. ldub [$rounds+$acc5],$acc5
  445. srl $t3,16,$acc9
  446. and $t0,255,$acc7
  447. ldub [$rounds+$acc6],$acc6
  448. ldub [$rounds+$acc7],$acc7
  449. fmovs %f0,%f0
  450. srl $t0,8,$acc10 !
  451. and $acc9,255,$acc9
  452. ldub [$rounds+$acc8],$acc8
  453. srl $t3,24,$acc12
  454. and $acc10,255,$acc10
  455. ldub [$rounds+$acc9],$acc9
  456. srl $t0,16,$acc13
  457. and $t1,255,$acc11
  458. ldub [$rounds+$acc10],$acc10 !
  459. srl $t1,8,$acc14
  460. and $acc13,255,$acc13
  461. ldub [$rounds+$acc11],$acc11
  462. ldub [$rounds+$acc12],$acc12
  463. and $acc14,255,$acc14
  464. ldub [$rounds+$acc13],$acc13
  465. and $t2,255,$acc15
  466. ldub [$rounds+$acc14],$acc14 !
  467. sll $acc0,24,$acc0
  468. xor $acc3,$s0,$s0
  469. ldub [$rounds+$acc15],$acc15
  470. sll $acc1,16,$acc1
  471. xor $acc0,$s0,$s0
  472. ldx [%sp+$bias+$frame+0],%i7 ! restore return address
  473. fmovs %f0,%f0
  474. sll $acc2,8,$acc2 !
  475. xor $acc1,$s0,$s0
  476. sll $acc4,24,$acc4
  477. xor $acc2,$s0,$s0
  478. sll $acc5,16,$acc5
  479. xor $acc7,$s1,$s1
  480. sll $acc6,8,$acc6
  481. xor $acc4,$s1,$s1
  482. sll $acc8,24,$acc8 !
  483. xor $acc5,$s1,$s1
  484. sll $acc9,16,$acc9
  485. xor $acc11,$s2,$s2
  486. sll $acc10,8,$acc10
  487. xor $acc6,$s1,$s1
  488. sll $acc12,24,$acc12
  489. xor $acc8,$s2,$s2
  490. sll $acc13,16,$acc13 !
  491. xor $acc9,$s2,$s2
  492. sll $acc14,8,$acc14
  493. xor $acc10,$s2,$s2
  494. xor $acc12,$acc14,$acc14
  495. xor $acc13,$s3,$s3
  496. xor $acc14,$s3,$s3
  497. xor $acc15,$s3,$s3
  498. ret
  499. restore
  500. .type _sparcv9_AES_encrypt,#function
  501. .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
  502. .align 32
  503. .globl AES_encrypt
  504. AES_encrypt:
  505. or %o0,%o1,%g1
  506. andcc %g1,3,%g0
  507. bnz,pn %xcc,.Lunaligned_enc
  508. save %sp,-$frame,%sp
  509. ld [%i0+0],%o0
  510. ld [%i0+4],%o1
  511. ld [%i0+8],%o2
  512. ld [%i0+12],%o3
  513. 1: call .+8
  514. add %o7,AES_Te-1b,%o4
  515. call _sparcv9_AES_encrypt
  516. mov %i2,%o5
  517. st %o0,[%i1+0]
  518. st %o1,[%i1+4]
  519. st %o2,[%i1+8]
  520. st %o3,[%i1+12]
  521. ret
  522. restore
  523. .align 32
  524. .Lunaligned_enc:
  525. ldub [%i0+0],%l0
  526. ldub [%i0+1],%l1
  527. ldub [%i0+2],%l2
  528. sll %l0,24,%l0
  529. ldub [%i0+3],%l3
  530. sll %l1,16,%l1
  531. ldub [%i0+4],%l4
  532. sll %l2,8,%l2
  533. or %l1,%l0,%l0
  534. ldub [%i0+5],%l5
  535. sll %l4,24,%l4
  536. or %l3,%l2,%l2
  537. ldub [%i0+6],%l6
  538. sll %l5,16,%l5
  539. or %l0,%l2,%o0
  540. ldub [%i0+7],%l7
  541. sll %l6,8,%l6
  542. or %l5,%l4,%l4
  543. ldub [%i0+8],%l0
  544. or %l7,%l6,%l6
  545. ldub [%i0+9],%l1
  546. or %l4,%l6,%o1
  547. ldub [%i0+10],%l2
  548. sll %l0,24,%l0
  549. ldub [%i0+11],%l3
  550. sll %l1,16,%l1
  551. ldub [%i0+12],%l4
  552. sll %l2,8,%l2
  553. or %l1,%l0,%l0
  554. ldub [%i0+13],%l5
  555. sll %l4,24,%l4
  556. or %l3,%l2,%l2
  557. ldub [%i0+14],%l6
  558. sll %l5,16,%l5
  559. or %l0,%l2,%o2
  560. ldub [%i0+15],%l7
  561. sll %l6,8,%l6
  562. or %l5,%l4,%l4
  563. or %l7,%l6,%l6
  564. or %l4,%l6,%o3
  565. 1: call .+8
  566. add %o7,AES_Te-1b,%o4
  567. call _sparcv9_AES_encrypt
  568. mov %i2,%o5
  569. srl %o0,24,%l0
  570. srl %o0,16,%l1
  571. stb %l0,[%i1+0]
  572. srl %o0,8,%l2
  573. stb %l1,[%i1+1]
  574. stb %l2,[%i1+2]
  575. srl %o1,24,%l4
  576. stb %o0,[%i1+3]
  577. srl %o1,16,%l5
  578. stb %l4,[%i1+4]
  579. srl %o1,8,%l6
  580. stb %l5,[%i1+5]
  581. stb %l6,[%i1+6]
  582. srl %o2,24,%l0
  583. stb %o1,[%i1+7]
  584. srl %o2,16,%l1
  585. stb %l0,[%i1+8]
  586. srl %o2,8,%l2
  587. stb %l1,[%i1+9]
  588. stb %l2,[%i1+10]
  589. srl %o3,24,%l4
  590. stb %o2,[%i1+11]
  591. srl %o3,16,%l5
  592. stb %l4,[%i1+12]
  593. srl %o3,8,%l6
  594. stb %l5,[%i1+13]
  595. stb %l6,[%i1+14]
  596. stb %o3,[%i1+15]
  597. ret
  598. restore
  599. .type AES_encrypt,#function
  600. .size AES_encrypt,(.-AES_encrypt)
  601. ___
  602. $code.=<<___;
  603. .align 256
  604. AES_Td:
  605. ___
  606. &_data_word(
  607. 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  608. 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  609. 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  610. 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  611. 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  612. 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  613. 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  614. 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  615. 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  616. 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  617. 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  618. 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  619. 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  620. 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  621. 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  622. 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  623. 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  624. 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  625. 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  626. 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  627. 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  628. 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  629. 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  630. 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  631. 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  632. 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  633. 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  634. 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  635. 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  636. 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  637. 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  638. 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  639. 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  640. 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  641. 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  642. 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  643. 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  644. 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  645. 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  646. 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  647. 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  648. 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  649. 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  650. 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  651. 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  652. 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  653. 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  654. 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  655. 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  656. 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  657. 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  658. 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  659. 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  660. 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  661. 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  662. 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  663. 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  664. 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  665. 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  666. 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  667. 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  668. 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  669. 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  670. 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  671. $code.=<<___;
  672. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  673. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  674. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  675. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  676. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  677. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  678. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  679. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  680. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  681. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  682. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  683. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  684. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  685. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  686. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  687. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  688. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  689. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  690. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  691. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  692. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  693. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  694. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  695. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  696. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  697. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  698. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  699. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  700. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  701. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  702. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  703. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  704. .type AES_Td,#object
  705. .size AES_Td,(.-AES_Td)
  706. .align 64
  707. .skip 16
  708. _sparcv9_AES_decrypt:
  709. save %sp,-$frame-$locals,%sp
  710. stx %i7,[%sp+$bias+$frame+0] ! off-load return address
  711. ld [$key+240],$rounds
  712. ld [$key+0],$t0
  713. ld [$key+4],$t1 !
  714. ld [$key+8],$t2
  715. ld [$key+12],$t3
  716. srl $rounds,1,$rounds
  717. xor $t0,$s0,$s0
  718. ld [$key+16],$t0
  719. xor $t1,$s1,$s1
  720. ld [$key+20],$t1
  721. srl $s0,21,$acc0 !
  722. xor $t2,$s2,$s2
  723. ld [$key+24],$t2
  724. xor $t3,$s3,$s3
  725. and $acc0,2040,$acc0
  726. ld [$key+28],$t3
  727. srl $s3,13,$acc1
  728. nop
  729. .Ldec_loop:
  730. srl $s2,5,$acc2 !
  731. and $acc1,2040,$acc1
  732. ldx [$tbl+$acc0],$acc0
  733. sll $s1,3,$acc3
  734. and $acc2,2040,$acc2
  735. ldx [$tbl+$acc1],$acc1
  736. srl $s1,21,$acc4
  737. and $acc3,2040,$acc3
  738. ldx [$tbl+$acc2],$acc2 !
  739. srl $s0,13,$acc5
  740. and $acc4,2040,$acc4
  741. ldx [$tbl+$acc3],$acc3
  742. srl $s3,5,$acc6
  743. and $acc5,2040,$acc5
  744. ldx [$tbl+$acc4],$acc4
  745. fmovs %f0,%f0
  746. sll $s2,3,$acc7 !
  747. and $acc6,2040,$acc6
  748. ldx [$tbl+$acc5],$acc5
  749. srl $s2,21,$acc8
  750. and $acc7,2040,$acc7
  751. ldx [$tbl+$acc6],$acc6
  752. srl $s1,13,$acc9
  753. and $acc8,2040,$acc8
  754. ldx [$tbl+$acc7],$acc7 !
  755. srl $s0,5,$acc10
  756. and $acc9,2040,$acc9
  757. ldx [$tbl+$acc8],$acc8
  758. sll $s3,3,$acc11
  759. and $acc10,2040,$acc10
  760. ldx [$tbl+$acc9],$acc9
  761. fmovs %f0,%f0
  762. srl $s3,21,$acc12 !
  763. and $acc11,2040,$acc11
  764. ldx [$tbl+$acc10],$acc10
  765. srl $s2,13,$acc13
  766. and $acc12,2040,$acc12
  767. ldx [$tbl+$acc11],$acc11
  768. srl $s1,5,$acc14
  769. and $acc13,2040,$acc13
  770. ldx [$tbl+$acc12],$acc12 !
  771. sll $s0,3,$acc15
  772. and $acc14,2040,$acc14
  773. ldx [$tbl+$acc13],$acc13
  774. and $acc15,2040,$acc15
  775. add $key,32,$key
  776. ldx [$tbl+$acc14],$acc14
  777. fmovs %f0,%f0
  778. subcc $rounds,1,$rounds !
  779. ldx [$tbl+$acc15],$acc15
  780. bz,a,pn %icc,.Ldec_last
  781. add $tbl,2048,$rounds
  782. srlx $acc1,8,$acc1
  783. xor $acc0,$t0,$t0
  784. ld [$key+0],$s0
  785. fmovs %f0,%f0
  786. srlx $acc2,16,$acc2 !
  787. xor $acc1,$t0,$t0
  788. ld [$key+4],$s1
  789. srlx $acc3,24,$acc3
  790. xor $acc2,$t0,$t0
  791. ld [$key+8],$s2
  792. srlx $acc5,8,$acc5
  793. xor $acc3,$t0,$t0
  794. ld [$key+12],$s3 !
  795. srlx $acc6,16,$acc6
  796. xor $acc4,$t1,$t1
  797. fmovs %f0,%f0
  798. srlx $acc7,24,$acc7
  799. xor $acc5,$t1,$t1
  800. srlx $acc9,8,$acc9
  801. xor $acc6,$t1,$t1
  802. srlx $acc10,16,$acc10 !
  803. xor $acc7,$t1,$t1
  804. srlx $acc11,24,$acc11
  805. xor $acc8,$t2,$t2
  806. srlx $acc13,8,$acc13
  807. xor $acc9,$t2,$t2
  808. srlx $acc14,16,$acc14
  809. xor $acc10,$t2,$t2
  810. srlx $acc15,24,$acc15 !
  811. xor $acc11,$t2,$t2
  812. xor $acc12,$acc14,$acc14
  813. xor $acc13,$t3,$t3
  814. srl $t0,21,$acc0
  815. xor $acc14,$t3,$t3
  816. xor $acc15,$t3,$t3
  817. srl $t3,13,$acc1
  818. and $acc0,2040,$acc0 !
  819. srl $t2,5,$acc2
  820. and $acc1,2040,$acc1
  821. ldx [$tbl+$acc0],$acc0
  822. sll $t1,3,$acc3
  823. and $acc2,2040,$acc2
  824. ldx [$tbl+$acc1],$acc1
  825. fmovs %f0,%f0
  826. srl $t1,21,$acc4 !
  827. and $acc3,2040,$acc3
  828. ldx [$tbl+$acc2],$acc2
  829. srl $t0,13,$acc5
  830. and $acc4,2040,$acc4
  831. ldx [$tbl+$acc3],$acc3
  832. srl $t3,5,$acc6
  833. and $acc5,2040,$acc5
  834. ldx [$tbl+$acc4],$acc4 !
  835. sll $t2,3,$acc7
  836. and $acc6,2040,$acc6
  837. ldx [$tbl+$acc5],$acc5
  838. srl $t2,21,$acc8
  839. and $acc7,2040,$acc7
  840. ldx [$tbl+$acc6],$acc6
  841. fmovs %f0,%f0
  842. srl $t1,13,$acc9 !
  843. and $acc8,2040,$acc8
  844. ldx [$tbl+$acc7],$acc7
  845. srl $t0,5,$acc10
  846. and $acc9,2040,$acc9
  847. ldx [$tbl+$acc8],$acc8
  848. sll $t3,3,$acc11
  849. and $acc10,2040,$acc10
  850. ldx [$tbl+$acc9],$acc9 !
  851. srl $t3,21,$acc12
  852. and $acc11,2040,$acc11
  853. ldx [$tbl+$acc10],$acc10
  854. srl $t2,13,$acc13
  855. and $acc12,2040,$acc12
  856. ldx [$tbl+$acc11],$acc11
  857. fmovs %f0,%f0
  858. srl $t1,5,$acc14 !
  859. and $acc13,2040,$acc13
  860. ldx [$tbl+$acc12],$acc12
  861. sll $t0,3,$acc15
  862. and $acc14,2040,$acc14
  863. ldx [$tbl+$acc13],$acc13
  864. srlx $acc1,8,$acc1
  865. and $acc15,2040,$acc15
  866. ldx [$tbl+$acc14],$acc14 !
  867. srlx $acc2,16,$acc2
  868. xor $acc0,$s0,$s0
  869. ldx [$tbl+$acc15],$acc15
  870. srlx $acc3,24,$acc3
  871. xor $acc1,$s0,$s0
  872. ld [$key+16],$t0
  873. fmovs %f0,%f0
  874. srlx $acc5,8,$acc5 !
  875. xor $acc2,$s0,$s0
  876. ld [$key+20],$t1
  877. srlx $acc6,16,$acc6
  878. xor $acc3,$s0,$s0
  879. ld [$key+24],$t2
  880. srlx $acc7,24,$acc7
  881. xor $acc4,$s1,$s1
  882. ld [$key+28],$t3 !
  883. srlx $acc9,8,$acc9
  884. xor $acc5,$s1,$s1
  885. ldx [$tbl+2048+0],%g0 ! prefetch td4
  886. srlx $acc10,16,$acc10
  887. xor $acc6,$s1,$s1
  888. ldx [$tbl+2048+32],%g0 ! prefetch td4
  889. srlx $acc11,24,$acc11
  890. xor $acc7,$s1,$s1
  891. ldx [$tbl+2048+64],%g0 ! prefetch td4
  892. srlx $acc13,8,$acc13
  893. xor $acc8,$s2,$s2
  894. ldx [$tbl+2048+96],%g0 ! prefetch td4
  895. srlx $acc14,16,$acc14 !
  896. xor $acc9,$s2,$s2
  897. ldx [$tbl+2048+128],%g0 ! prefetch td4
  898. srlx $acc15,24,$acc15
  899. xor $acc10,$s2,$s2
  900. ldx [$tbl+2048+160],%g0 ! prefetch td4
  901. srl $s0,21,$acc0
  902. xor $acc11,$s2,$s2
  903. ldx [$tbl+2048+192],%g0 ! prefetch td4
  904. xor $acc12,$acc14,$acc14
  905. xor $acc13,$s3,$s3
  906. ldx [$tbl+2048+224],%g0 ! prefetch td4
  907. and $acc0,2040,$acc0 !
  908. xor $acc14,$s3,$s3
  909. xor $acc15,$s3,$s3
  910. ba .Ldec_loop
  911. srl $s3,13,$acc1
  912. .align 32
  913. .Ldec_last:
  914. srlx $acc1,8,$acc1 !
  915. xor $acc0,$t0,$t0
  916. ld [$key+0],$s0
  917. srlx $acc2,16,$acc2
  918. xor $acc1,$t0,$t0
  919. ld [$key+4],$s1
  920. srlx $acc3,24,$acc3
  921. xor $acc2,$t0,$t0
  922. ld [$key+8],$s2 !
  923. srlx $acc5,8,$acc5
  924. xor $acc3,$t0,$t0
  925. ld [$key+12],$s3
  926. srlx $acc6,16,$acc6
  927. xor $acc4,$t1,$t1
  928. srlx $acc7,24,$acc7
  929. xor $acc5,$t1,$t1
  930. srlx $acc9,8,$acc9 !
  931. xor $acc6,$t1,$t1
  932. srlx $acc10,16,$acc10
  933. xor $acc7,$t1,$t1
  934. srlx $acc11,24,$acc11
  935. xor $acc8,$t2,$t2
  936. srlx $acc13,8,$acc13
  937. xor $acc9,$t2,$t2
  938. srlx $acc14,16,$acc14 !
  939. xor $acc10,$t2,$t2
  940. srlx $acc15,24,$acc15
  941. xor $acc11,$t2,$t2
  942. xor $acc12,$acc14,$acc14
  943. xor $acc13,$t3,$t3
  944. srl $t0,24,$acc0
  945. xor $acc14,$t3,$t3
  946. xor $acc15,$t3,$t3 !
  947. srl $t3,16,$acc1
  948. srl $t2,8,$acc2
  949. and $acc1,255,$acc1
  950. ldub [$rounds+$acc0],$acc0
  951. srl $t1,24,$acc4
  952. and $acc2,255,$acc2
  953. ldub [$rounds+$acc1],$acc1
  954. srl $t0,16,$acc5 !
  955. and $t1,255,$acc3
  956. ldub [$rounds+$acc2],$acc2
  957. ldub [$rounds+$acc3],$acc3
  958. srl $t3,8,$acc6
  959. and $acc5,255,$acc5
  960. ldub [$rounds+$acc4],$acc4
  961. fmovs %f0,%f0
  962. srl $t2,24,$acc8 !
  963. and $acc6,255,$acc6
  964. ldub [$rounds+$acc5],$acc5
  965. srl $t1,16,$acc9
  966. and $t2,255,$acc7
  967. ldub [$rounds+$acc6],$acc6
  968. ldub [$rounds+$acc7],$acc7
  969. fmovs %f0,%f0
  970. srl $t0,8,$acc10 !
  971. and $acc9,255,$acc9
  972. ldub [$rounds+$acc8],$acc8
  973. srl $t3,24,$acc12
  974. and $acc10,255,$acc10
  975. ldub [$rounds+$acc9],$acc9
  976. srl $t2,16,$acc13
  977. and $t3,255,$acc11
  978. ldub [$rounds+$acc10],$acc10 !
  979. srl $t1,8,$acc14
  980. and $acc13,255,$acc13
  981. ldub [$rounds+$acc11],$acc11
  982. ldub [$rounds+$acc12],$acc12
  983. and $acc14,255,$acc14
  984. ldub [$rounds+$acc13],$acc13
  985. and $t0,255,$acc15
  986. ldub [$rounds+$acc14],$acc14 !
  987. sll $acc0,24,$acc0
  988. xor $acc3,$s0,$s0
  989. ldub [$rounds+$acc15],$acc15
  990. sll $acc1,16,$acc1
  991. xor $acc0,$s0,$s0
  992. ldx [%sp+$bias+$frame+0],%i7 ! restore return address
  993. fmovs %f0,%f0
  994. sll $acc2,8,$acc2 !
  995. xor $acc1,$s0,$s0
  996. sll $acc4,24,$acc4
  997. xor $acc2,$s0,$s0
  998. sll $acc5,16,$acc5
  999. xor $acc7,$s1,$s1
  1000. sll $acc6,8,$acc6
  1001. xor $acc4,$s1,$s1
  1002. sll $acc8,24,$acc8 !
  1003. xor $acc5,$s1,$s1
  1004. sll $acc9,16,$acc9
  1005. xor $acc11,$s2,$s2
  1006. sll $acc10,8,$acc10
  1007. xor $acc6,$s1,$s1
  1008. sll $acc12,24,$acc12
  1009. xor $acc8,$s2,$s2
  1010. sll $acc13,16,$acc13 !
  1011. xor $acc9,$s2,$s2
  1012. sll $acc14,8,$acc14
  1013. xor $acc10,$s2,$s2
  1014. xor $acc12,$acc14,$acc14
  1015. xor $acc13,$s3,$s3
  1016. xor $acc14,$s3,$s3
  1017. xor $acc15,$s3,$s3
  1018. ret
  1019. restore
  1020. .type _sparcv9_AES_decrypt,#function
  1021. .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
  1022. .align 32
  1023. .globl AES_decrypt
  1024. AES_decrypt:
  1025. or %o0,%o1,%g1
  1026. andcc %g1,3,%g0
  1027. bnz,pn %xcc,.Lunaligned_dec
  1028. save %sp,-$frame,%sp
  1029. ld [%i0+0],%o0
  1030. ld [%i0+4],%o1
  1031. ld [%i0+8],%o2
  1032. ld [%i0+12],%o3
  1033. 1: call .+8
  1034. add %o7,AES_Td-1b,%o4
  1035. call _sparcv9_AES_decrypt
  1036. mov %i2,%o5
  1037. st %o0,[%i1+0]
  1038. st %o1,[%i1+4]
  1039. st %o2,[%i1+8]
  1040. st %o3,[%i1+12]
  1041. ret
  1042. restore
  1043. .align 32
  1044. .Lunaligned_dec:
  1045. ldub [%i0+0],%l0
  1046. ldub [%i0+1],%l1
  1047. ldub [%i0+2],%l2
  1048. sll %l0,24,%l0
  1049. ldub [%i0+3],%l3
  1050. sll %l1,16,%l1
  1051. ldub [%i0+4],%l4
  1052. sll %l2,8,%l2
  1053. or %l1,%l0,%l0
  1054. ldub [%i0+5],%l5
  1055. sll %l4,24,%l4
  1056. or %l3,%l2,%l2
  1057. ldub [%i0+6],%l6
  1058. sll %l5,16,%l5
  1059. or %l0,%l2,%o0
  1060. ldub [%i0+7],%l7
  1061. sll %l6,8,%l6
  1062. or %l5,%l4,%l4
  1063. ldub [%i0+8],%l0
  1064. or %l7,%l6,%l6
  1065. ldub [%i0+9],%l1
  1066. or %l4,%l6,%o1
  1067. ldub [%i0+10],%l2
  1068. sll %l0,24,%l0
  1069. ldub [%i0+11],%l3
  1070. sll %l1,16,%l1
  1071. ldub [%i0+12],%l4
  1072. sll %l2,8,%l2
  1073. or %l1,%l0,%l0
  1074. ldub [%i0+13],%l5
  1075. sll %l4,24,%l4
  1076. or %l3,%l2,%l2
  1077. ldub [%i0+14],%l6
  1078. sll %l5,16,%l5
  1079. or %l0,%l2,%o2
  1080. ldub [%i0+15],%l7
  1081. sll %l6,8,%l6
  1082. or %l5,%l4,%l4
  1083. or %l7,%l6,%l6
  1084. or %l4,%l6,%o3
  1085. 1: call .+8
  1086. add %o7,AES_Td-1b,%o4
  1087. call _sparcv9_AES_decrypt
  1088. mov %i2,%o5
  1089. srl %o0,24,%l0
  1090. srl %o0,16,%l1
  1091. stb %l0,[%i1+0]
  1092. srl %o0,8,%l2
  1093. stb %l1,[%i1+1]
  1094. stb %l2,[%i1+2]
  1095. srl %o1,24,%l4
  1096. stb %o0,[%i1+3]
  1097. srl %o1,16,%l5
  1098. stb %l4,[%i1+4]
  1099. srl %o1,8,%l6
  1100. stb %l5,[%i1+5]
  1101. stb %l6,[%i1+6]
  1102. srl %o2,24,%l0
  1103. stb %o1,[%i1+7]
  1104. srl %o2,16,%l1
  1105. stb %l0,[%i1+8]
  1106. srl %o2,8,%l2
  1107. stb %l1,[%i1+9]
  1108. stb %l2,[%i1+10]
  1109. srl %o3,24,%l4
  1110. stb %o2,[%i1+11]
  1111. srl %o3,16,%l5
  1112. stb %l4,[%i1+12]
  1113. srl %o3,8,%l6
  1114. stb %l5,[%i1+13]
  1115. stb %l6,[%i1+14]
  1116. stb %o3,[%i1+15]
  1117. ret
  1118. restore
  1119. .type AES_decrypt,#function
  1120. .size AES_decrypt,(.-AES_decrypt)
  1121. ___
  1122. # fmovs instructions substituting for FP nops were originally added
  1123. # to meet specific instruction alignment requirements to maximize ILP.
  1124. # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
  1125. # undesired effect, so just omit them and sacrifice some portion of
  1126. # percent in performance...
  1127. $code =~ s/fmovs.*$//gm;
  1128. print $code;
  1129. close STDOUT or die "error closing STDOUT: $!"; # ensure flush