dest4-sparcv9.pl 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. #!/usr/bin/env perl
  2. # ====================================================================
  3. # Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
  4. # <appro@openssl.org>. The module is licensed under 2-clause BSD
  5. # license. March 2013. All rights reserved.
  6. # ====================================================================
  7. ######################################################################
  8. # DES for SPARC T4.
  9. #
  10. # As with other hardware-assisted ciphers CBC encrypt results [for
  11. # aligned data] are virtually identical to critical path lengths:
  12. #
  13. # DES Triple-DES
  14. # CBC encrypt 4.14/4.15(*) 11.7/11.7
  15. # CBC decrypt 1.77/4.11(**) 6.42/7.47
  16. #
  17. # (*) numbers after slash are for
  18. # misaligned data;
  19. # (**) this is result for largest
  20. # block size, unlike all other
  21. # cases smaller blocks results
  22. # are better[?];
  23. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  24. push(@INC,"${dir}","${dir}../../perlasm");
  25. require "sparcv9_modes.pl";
  26. &asm_init(@ARGV);
  27. $code.=<<___ if ($::abibits==64);
  28. .register %g2,#scratch
  29. .register %g3,#scratch
  30. ___
  31. $code.=<<___;
  32. .text
  33. ___
  34. { my ($inp,$out)=("%o0","%o1");
  35. $code.=<<___;
  36. .align 32
  37. .globl des_t4_key_expand
  38. .type des_t4_key_expand,#function
  39. des_t4_key_expand:
  40. andcc $inp, 0x7, %g0
  41. alignaddr $inp, %g0, $inp
  42. bz,pt %icc, 1f
  43. ldd [$inp + 0x00], %f0
  44. ldd [$inp + 0x08], %f2
  45. faligndata %f0, %f2, %f0
  46. 1: des_kexpand %f0, 0, %f0
  47. des_kexpand %f0, 1, %f2
  48. std %f0, [$out + 0x00]
  49. des_kexpand %f2, 3, %f6
  50. std %f2, [$out + 0x08]
  51. des_kexpand %f2, 2, %f4
  52. des_kexpand %f6, 3, %f10
  53. std %f6, [$out + 0x18]
  54. des_kexpand %f6, 2, %f8
  55. std %f4, [$out + 0x10]
  56. des_kexpand %f10, 3, %f14
  57. std %f10, [$out + 0x28]
  58. des_kexpand %f10, 2, %f12
  59. std %f8, [$out + 0x20]
  60. des_kexpand %f14, 1, %f16
  61. std %f14, [$out + 0x38]
  62. des_kexpand %f16, 3, %f20
  63. std %f12, [$out + 0x30]
  64. des_kexpand %f16, 2, %f18
  65. std %f16, [$out + 0x40]
  66. des_kexpand %f20, 3, %f24
  67. std %f20, [$out + 0x50]
  68. des_kexpand %f20, 2, %f22
  69. std %f18, [$out + 0x48]
  70. des_kexpand %f24, 3, %f28
  71. std %f24, [$out + 0x60]
  72. des_kexpand %f24, 2, %f26
  73. std %f22, [$out + 0x58]
  74. des_kexpand %f28, 1, %f30
  75. std %f28, [$out + 0x70]
  76. std %f26, [$out + 0x68]
  77. retl
  78. std %f30, [$out + 0x78]
  79. .size des_t4_key_expand,.-des_t4_key_expand
  80. ___
  81. }
  82. { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
  83. my ($ileft,$iright,$omask) = map("%g$_",(1..3));
  84. $code.=<<___;
  85. .globl des_t4_cbc_encrypt
  86. .align 32
  87. des_t4_cbc_encrypt:
  88. cmp $len, 0
  89. be,pn $::size_t_cc, .Lcbc_abort
  90. nop
  91. ld [$ivec + 0], %f0 ! load ivec
  92. ld [$ivec + 4], %f1
  93. and $inp, 7, $ileft
  94. andn $inp, 7, $inp
  95. sll $ileft, 3, $ileft
  96. mov 0xff, $omask
  97. prefetch [$inp], 20
  98. prefetch [$inp + 63], 20
  99. sub %g0, $ileft, $iright
  100. and $out, 7, %g4
  101. alignaddrl $out, %g0, $out
  102. srl $omask, %g4, $omask
  103. srlx $len, 3, $len
  104. movrz %g4, 0, $omask
  105. prefetch [$out], 22
  106. ldd [$key + 0x00], %f4 ! load key schedule
  107. ldd [$key + 0x08], %f6
  108. ldd [$key + 0x10], %f8
  109. ldd [$key + 0x18], %f10
  110. ldd [$key + 0x20], %f12
  111. ldd [$key + 0x28], %f14
  112. ldd [$key + 0x30], %f16
  113. ldd [$key + 0x38], %f18
  114. ldd [$key + 0x40], %f20
  115. ldd [$key + 0x48], %f22
  116. ldd [$key + 0x50], %f24
  117. ldd [$key + 0x58], %f26
  118. ldd [$key + 0x60], %f28
  119. ldd [$key + 0x68], %f30
  120. ldd [$key + 0x70], %f32
  121. ldd [$key + 0x78], %f34
  122. .Ldes_cbc_enc_loop:
  123. ldx [$inp + 0], %g4
  124. brz,pt $ileft, 4f
  125. nop
  126. ldx [$inp + 8], %g5
  127. sllx %g4, $ileft, %g4
  128. srlx %g5, $iright, %g5
  129. or %g5, %g4, %g4
  130. 4:
  131. movxtod %g4, %f2
  132. prefetch [$inp + 8+63], 20
  133. add $inp, 8, $inp
  134. fxor %f2, %f0, %f0 ! ^= ivec
  135. prefetch [$out + 63], 22
  136. des_ip %f0, %f0
  137. des_round %f4, %f6, %f0, %f0
  138. des_round %f8, %f10, %f0, %f0
  139. des_round %f12, %f14, %f0, %f0
  140. des_round %f16, %f18, %f0, %f0
  141. des_round %f20, %f22, %f0, %f0
  142. des_round %f24, %f26, %f0, %f0
  143. des_round %f28, %f30, %f0, %f0
  144. des_round %f32, %f34, %f0, %f0
  145. des_iip %f0, %f0
  146. brnz,pn $omask, 2f
  147. sub $len, 1, $len
  148. std %f0, [$out + 0]
  149. brnz,pt $len, .Ldes_cbc_enc_loop
  150. add $out, 8, $out
  151. st %f0, [$ivec + 0] ! write out ivec
  152. retl
  153. st %f1, [$ivec + 4]
  154. .Lcbc_abort:
  155. retl
  156. nop
  157. .align 16
  158. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  159. ! and ~4x deterioration
  160. ! in inp==out case
  161. faligndata %f0, %f0, %f2 ! handle unaligned output
  162. stda %f2, [$out + $omask]0xc0 ! partial store
  163. add $out, 8, $out
  164. orn %g0, $omask, $omask
  165. stda %f2, [$out + $omask]0xc0 ! partial store
  166. brnz,pt $len, .Ldes_cbc_enc_loop+4
  167. orn %g0, $omask, $omask
  168. st %f0, [$ivec + 0] ! write out ivec
  169. retl
  170. st %f1, [$ivec + 4]
  171. .type des_t4_cbc_encrypt,#function
  172. .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
  173. .globl des_t4_cbc_decrypt
  174. .align 32
  175. des_t4_cbc_decrypt:
  176. cmp $len, 0
  177. be,pn $::size_t_cc, .Lcbc_abort
  178. nop
  179. ld [$ivec + 0], %f2 ! load ivec
  180. ld [$ivec + 4], %f3
  181. and $inp, 7, $ileft
  182. andn $inp, 7, $inp
  183. sll $ileft, 3, $ileft
  184. mov 0xff, $omask
  185. prefetch [$inp], 20
  186. prefetch [$inp + 63], 20
  187. sub %g0, $ileft, $iright
  188. and $out, 7, %g4
  189. alignaddrl $out, %g0, $out
  190. srl $omask, %g4, $omask
  191. srlx $len, 3, $len
  192. movrz %g4, 0, $omask
  193. prefetch [$out], 22
  194. ldd [$key + 0x78], %f4 ! load key schedule
  195. ldd [$key + 0x70], %f6
  196. ldd [$key + 0x68], %f8
  197. ldd [$key + 0x60], %f10
  198. ldd [$key + 0x58], %f12
  199. ldd [$key + 0x50], %f14
  200. ldd [$key + 0x48], %f16
  201. ldd [$key + 0x40], %f18
  202. ldd [$key + 0x38], %f20
  203. ldd [$key + 0x30], %f22
  204. ldd [$key + 0x28], %f24
  205. ldd [$key + 0x20], %f26
  206. ldd [$key + 0x18], %f28
  207. ldd [$key + 0x10], %f30
  208. ldd [$key + 0x08], %f32
  209. ldd [$key + 0x00], %f34
  210. .Ldes_cbc_dec_loop:
  211. ldx [$inp + 0], %g4
  212. brz,pt $ileft, 4f
  213. nop
  214. ldx [$inp + 8], %g5
  215. sllx %g4, $ileft, %g4
  216. srlx %g5, $iright, %g5
  217. or %g5, %g4, %g4
  218. 4:
  219. movxtod %g4, %f0
  220. prefetch [$inp + 8+63], 20
  221. add $inp, 8, $inp
  222. prefetch [$out + 63], 22
  223. des_ip %f0, %f0
  224. des_round %f4, %f6, %f0, %f0
  225. des_round %f8, %f10, %f0, %f0
  226. des_round %f12, %f14, %f0, %f0
  227. des_round %f16, %f18, %f0, %f0
  228. des_round %f20, %f22, %f0, %f0
  229. des_round %f24, %f26, %f0, %f0
  230. des_round %f28, %f30, %f0, %f0
  231. des_round %f32, %f34, %f0, %f0
  232. des_iip %f0, %f0
  233. fxor %f2, %f0, %f0 ! ^= ivec
  234. movxtod %g4, %f2
  235. brnz,pn $omask, 2f
  236. sub $len, 1, $len
  237. std %f0, [$out + 0]
  238. brnz,pt $len, .Ldes_cbc_dec_loop
  239. add $out, 8, $out
  240. st %f2, [$ivec + 0] ! write out ivec
  241. retl
  242. st %f3, [$ivec + 4]
  243. .align 16
  244. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  245. ! and ~4x deterioration
  246. ! in inp==out case
  247. faligndata %f0, %f0, %f0 ! handle unaligned output
  248. stda %f0, [$out + $omask]0xc0 ! partial store
  249. add $out, 8, $out
  250. orn %g0, $omask, $omask
  251. stda %f0, [$out + $omask]0xc0 ! partial store
  252. brnz,pt $len, .Ldes_cbc_dec_loop+4
  253. orn %g0, $omask, $omask
  254. st %f2, [$ivec + 0] ! write out ivec
  255. retl
  256. st %f3, [$ivec + 4]
  257. .type des_t4_cbc_decrypt,#function
  258. .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
  259. ___
  260. # One might wonder why does one have back-to-back des_iip/des_ip
  261. # pairs between EDE passes. Indeed, aren't they inverse of each other?
  262. # They almost are. Outcome of the pair is 32-bit words being swapped
  263. # in target register. Consider pair of des_iip/des_ip as a way to
  264. # perform the due swap, it's actually fastest way in this case.
  265. $code.=<<___;
  266. .globl des_t4_ede3_cbc_encrypt
  267. .align 32
  268. des_t4_ede3_cbc_encrypt:
  269. cmp $len, 0
  270. be,pn $::size_t_cc, .Lcbc_abort
  271. nop
  272. ld [$ivec + 0], %f0 ! load ivec
  273. ld [$ivec + 4], %f1
  274. and $inp, 7, $ileft
  275. andn $inp, 7, $inp
  276. sll $ileft, 3, $ileft
  277. mov 0xff, $omask
  278. prefetch [$inp], 20
  279. prefetch [$inp + 63], 20
  280. sub %g0, $ileft, $iright
  281. and $out, 7, %g4
  282. alignaddrl $out, %g0, $out
  283. srl $omask, %g4, $omask
  284. srlx $len, 3, $len
  285. movrz %g4, 0, $omask
  286. prefetch [$out], 22
  287. ldd [$key + 0x00], %f4 ! load key schedule
  288. ldd [$key + 0x08], %f6
  289. ldd [$key + 0x10], %f8
  290. ldd [$key + 0x18], %f10
  291. ldd [$key + 0x20], %f12
  292. ldd [$key + 0x28], %f14
  293. ldd [$key + 0x30], %f16
  294. ldd [$key + 0x38], %f18
  295. ldd [$key + 0x40], %f20
  296. ldd [$key + 0x48], %f22
  297. ldd [$key + 0x50], %f24
  298. ldd [$key + 0x58], %f26
  299. ldd [$key + 0x60], %f28
  300. ldd [$key + 0x68], %f30
  301. ldd [$key + 0x70], %f32
  302. ldd [$key + 0x78], %f34
  303. .Ldes_ede3_cbc_enc_loop:
  304. ldx [$inp + 0], %g4
  305. brz,pt $ileft, 4f
  306. nop
  307. ldx [$inp + 8], %g5
  308. sllx %g4, $ileft, %g4
  309. srlx %g5, $iright, %g5
  310. or %g5, %g4, %g4
  311. 4:
  312. movxtod %g4, %f2
  313. prefetch [$inp + 8+63], 20
  314. add $inp, 8, $inp
  315. fxor %f2, %f0, %f0 ! ^= ivec
  316. prefetch [$out + 63], 22
  317. des_ip %f0, %f0
  318. des_round %f4, %f6, %f0, %f0
  319. des_round %f8, %f10, %f0, %f0
  320. des_round %f12, %f14, %f0, %f0
  321. des_round %f16, %f18, %f0, %f0
  322. ldd [$key + 0x100-0x08], %f36
  323. ldd [$key + 0x100-0x10], %f38
  324. des_round %f20, %f22, %f0, %f0
  325. ldd [$key + 0x100-0x18], %f40
  326. ldd [$key + 0x100-0x20], %f42
  327. des_round %f24, %f26, %f0, %f0
  328. ldd [$key + 0x100-0x28], %f44
  329. ldd [$key + 0x100-0x30], %f46
  330. des_round %f28, %f30, %f0, %f0
  331. ldd [$key + 0x100-0x38], %f48
  332. ldd [$key + 0x100-0x40], %f50
  333. des_round %f32, %f34, %f0, %f0
  334. ldd [$key + 0x100-0x48], %f52
  335. ldd [$key + 0x100-0x50], %f54
  336. des_iip %f0, %f0
  337. ldd [$key + 0x100-0x58], %f56
  338. ldd [$key + 0x100-0x60], %f58
  339. des_ip %f0, %f0
  340. ldd [$key + 0x100-0x68], %f60
  341. ldd [$key + 0x100-0x70], %f62
  342. des_round %f36, %f38, %f0, %f0
  343. ldd [$key + 0x100-0x78], %f36
  344. ldd [$key + 0x100-0x80], %f38
  345. des_round %f40, %f42, %f0, %f0
  346. des_round %f44, %f46, %f0, %f0
  347. des_round %f48, %f50, %f0, %f0
  348. ldd [$key + 0x100+0x00], %f40
  349. ldd [$key + 0x100+0x08], %f42
  350. des_round %f52, %f54, %f0, %f0
  351. ldd [$key + 0x100+0x10], %f44
  352. ldd [$key + 0x100+0x18], %f46
  353. des_round %f56, %f58, %f0, %f0
  354. ldd [$key + 0x100+0x20], %f48
  355. ldd [$key + 0x100+0x28], %f50
  356. des_round %f60, %f62, %f0, %f0
  357. ldd [$key + 0x100+0x30], %f52
  358. ldd [$key + 0x100+0x38], %f54
  359. des_round %f36, %f38, %f0, %f0
  360. ldd [$key + 0x100+0x40], %f56
  361. ldd [$key + 0x100+0x48], %f58
  362. des_iip %f0, %f0
  363. ldd [$key + 0x100+0x50], %f60
  364. ldd [$key + 0x100+0x58], %f62
  365. des_ip %f0, %f0
  366. ldd [$key + 0x100+0x60], %f36
  367. ldd [$key + 0x100+0x68], %f38
  368. des_round %f40, %f42, %f0, %f0
  369. ldd [$key + 0x100+0x70], %f40
  370. ldd [$key + 0x100+0x78], %f42
  371. des_round %f44, %f46, %f0, %f0
  372. des_round %f48, %f50, %f0, %f0
  373. des_round %f52, %f54, %f0, %f0
  374. des_round %f56, %f58, %f0, %f0
  375. des_round %f60, %f62, %f0, %f0
  376. des_round %f36, %f38, %f0, %f0
  377. des_round %f40, %f42, %f0, %f0
  378. des_iip %f0, %f0
  379. brnz,pn $omask, 2f
  380. sub $len, 1, $len
  381. std %f0, [$out + 0]
  382. brnz,pt $len, .Ldes_ede3_cbc_enc_loop
  383. add $out, 8, $out
  384. st %f0, [$ivec + 0] ! write out ivec
  385. retl
  386. st %f1, [$ivec + 4]
  387. .align 16
  388. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  389. ! and ~2x deterioration
  390. ! in inp==out case
  391. faligndata %f0, %f0, %f2 ! handle unaligned output
  392. stda %f2, [$out + $omask]0xc0 ! partial store
  393. add $out, 8, $out
  394. orn %g0, $omask, $omask
  395. stda %f2, [$out + $omask]0xc0 ! partial store
  396. brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
  397. orn %g0, $omask, $omask
  398. st %f0, [$ivec + 0] ! write out ivec
  399. retl
  400. st %f1, [$ivec + 4]
  401. .type des_t4_ede3_cbc_encrypt,#function
  402. .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
  403. .globl des_t4_ede3_cbc_decrypt
  404. .align 32
  405. des_t4_ede3_cbc_decrypt:
  406. cmp $len, 0
  407. be,pn $::size_t_cc, .Lcbc_abort
  408. nop
  409. ld [$ivec + 0], %f2 ! load ivec
  410. ld [$ivec + 4], %f3
  411. and $inp, 7, $ileft
  412. andn $inp, 7, $inp
  413. sll $ileft, 3, $ileft
  414. mov 0xff, $omask
  415. prefetch [$inp], 20
  416. prefetch [$inp + 63], 20
  417. sub %g0, $ileft, $iright
  418. and $out, 7, %g4
  419. alignaddrl $out, %g0, $out
  420. srl $omask, %g4, $omask
  421. srlx $len, 3, $len
  422. movrz %g4, 0, $omask
  423. prefetch [$out], 22
  424. ldd [$key + 0x100+0x78], %f4 ! load key schedule
  425. ldd [$key + 0x100+0x70], %f6
  426. ldd [$key + 0x100+0x68], %f8
  427. ldd [$key + 0x100+0x60], %f10
  428. ldd [$key + 0x100+0x58], %f12
  429. ldd [$key + 0x100+0x50], %f14
  430. ldd [$key + 0x100+0x48], %f16
  431. ldd [$key + 0x100+0x40], %f18
  432. ldd [$key + 0x100+0x38], %f20
  433. ldd [$key + 0x100+0x30], %f22
  434. ldd [$key + 0x100+0x28], %f24
  435. ldd [$key + 0x100+0x20], %f26
  436. ldd [$key + 0x100+0x18], %f28
  437. ldd [$key + 0x100+0x10], %f30
  438. ldd [$key + 0x100+0x08], %f32
  439. ldd [$key + 0x100+0x00], %f34
  440. .Ldes_ede3_cbc_dec_loop:
  441. ldx [$inp + 0], %g4
  442. brz,pt $ileft, 4f
  443. nop
  444. ldx [$inp + 8], %g5
  445. sllx %g4, $ileft, %g4
  446. srlx %g5, $iright, %g5
  447. or %g5, %g4, %g4
  448. 4:
  449. movxtod %g4, %f0
  450. prefetch [$inp + 8+63], 20
  451. add $inp, 8, $inp
  452. prefetch [$out + 63], 22
  453. des_ip %f0, %f0
  454. des_round %f4, %f6, %f0, %f0
  455. des_round %f8, %f10, %f0, %f0
  456. des_round %f12, %f14, %f0, %f0
  457. des_round %f16, %f18, %f0, %f0
  458. ldd [$key + 0x80+0x00], %f36
  459. ldd [$key + 0x80+0x08], %f38
  460. des_round %f20, %f22, %f0, %f0
  461. ldd [$key + 0x80+0x10], %f40
  462. ldd [$key + 0x80+0x18], %f42
  463. des_round %f24, %f26, %f0, %f0
  464. ldd [$key + 0x80+0x20], %f44
  465. ldd [$key + 0x80+0x28], %f46
  466. des_round %f28, %f30, %f0, %f0
  467. ldd [$key + 0x80+0x30], %f48
  468. ldd [$key + 0x80+0x38], %f50
  469. des_round %f32, %f34, %f0, %f0
  470. ldd [$key + 0x80+0x40], %f52
  471. ldd [$key + 0x80+0x48], %f54
  472. des_iip %f0, %f0
  473. ldd [$key + 0x80+0x50], %f56
  474. ldd [$key + 0x80+0x58], %f58
  475. des_ip %f0, %f0
  476. ldd [$key + 0x80+0x60], %f60
  477. ldd [$key + 0x80+0x68], %f62
  478. des_round %f36, %f38, %f0, %f0
  479. ldd [$key + 0x80+0x70], %f36
  480. ldd [$key + 0x80+0x78], %f38
  481. des_round %f40, %f42, %f0, %f0
  482. des_round %f44, %f46, %f0, %f0
  483. des_round %f48, %f50, %f0, %f0
  484. ldd [$key + 0x80-0x08], %f40
  485. ldd [$key + 0x80-0x10], %f42
  486. des_round %f52, %f54, %f0, %f0
  487. ldd [$key + 0x80-0x18], %f44
  488. ldd [$key + 0x80-0x20], %f46
  489. des_round %f56, %f58, %f0, %f0
  490. ldd [$key + 0x80-0x28], %f48
  491. ldd [$key + 0x80-0x30], %f50
  492. des_round %f60, %f62, %f0, %f0
  493. ldd [$key + 0x80-0x38], %f52
  494. ldd [$key + 0x80-0x40], %f54
  495. des_round %f36, %f38, %f0, %f0
  496. ldd [$key + 0x80-0x48], %f56
  497. ldd [$key + 0x80-0x50], %f58
  498. des_iip %f0, %f0
  499. ldd [$key + 0x80-0x58], %f60
  500. ldd [$key + 0x80-0x60], %f62
  501. des_ip %f0, %f0
  502. ldd [$key + 0x80-0x68], %f36
  503. ldd [$key + 0x80-0x70], %f38
  504. des_round %f40, %f42, %f0, %f0
  505. ldd [$key + 0x80-0x78], %f40
  506. ldd [$key + 0x80-0x80], %f42
  507. des_round %f44, %f46, %f0, %f0
  508. des_round %f48, %f50, %f0, %f0
  509. des_round %f52, %f54, %f0, %f0
  510. des_round %f56, %f58, %f0, %f0
  511. des_round %f60, %f62, %f0, %f0
  512. des_round %f36, %f38, %f0, %f0
  513. des_round %f40, %f42, %f0, %f0
  514. des_iip %f0, %f0
  515. fxor %f2, %f0, %f0 ! ^= ivec
  516. movxtod %g4, %f2
  517. brnz,pn $omask, 2f
  518. sub $len, 1, $len
  519. std %f0, [$out + 0]
  520. brnz,pt $len, .Ldes_ede3_cbc_dec_loop
  521. add $out, 8, $out
  522. st %f2, [$ivec + 0] ! write out ivec
  523. retl
  524. st %f3, [$ivec + 4]
  525. .align 16
  526. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  527. ! and ~3x deterioration
  528. ! in inp==out case
  529. faligndata %f0, %f0, %f0 ! handle unaligned output
  530. stda %f0, [$out + $omask]0xc0 ! partial store
  531. add $out, 8, $out
  532. orn %g0, $omask, $omask
  533. stda %f0, [$out + $omask]0xc0 ! partial store
  534. brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
  535. orn %g0, $omask, $omask
  536. st %f2, [$ivec + 0] ! write out ivec
  537. retl
  538. st %f3, [$ivec + 4]
  539. .type des_t4_ede3_cbc_decrypt,#function
  540. .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
  541. ___
  542. }
  543. $code.=<<___;
  544. .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"
  545. .align 4
  546. ___
  547. &emit_assembler();
  548. close STDOUT;