2
0

dest4-sparcv9.pl 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. #! /usr/bin/env perl
  2. # Copyright 2013-2020 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by David S. Miller and Andy Polyakov.
  10. # The module is licensed under 2-clause BSD
  11. # license. March 2013. All rights reserved.
  12. # ====================================================================
  13. ######################################################################
  14. # DES for SPARC T4.
  15. #
  16. # As with other hardware-assisted ciphers CBC encrypt results [for
  17. # aligned data] are virtually identical to critical path lengths:
  18. #
  19. # DES Triple-DES
  20. # CBC encrypt 4.14/4.15(*) 11.7/11.7
  21. # CBC decrypt 1.77/4.11(**) 6.42/7.47
  22. #
  23. # (*) numbers after slash are for
  24. # misaligned data;
  25. # (**) this is result for largest
  26. # block size, unlike all other
  27. # cases smaller blocks results
  28. # are better[?];
  29. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  30. push(@INC,"${dir}","${dir}../../perlasm");
  31. require "sparcv9_modes.pl";
  32. $output=pop and open STDOUT,">$output";
  33. $code.=<<___;
  34. #include "sparc_arch.h"
  35. #ifdef __arch64__
  36. .register %g2,#scratch
  37. .register %g3,#scratch
  38. #endif
  39. .text
  40. ___
  41. { my ($inp,$out)=("%o0","%o1");
  42. $code.=<<___;
  43. .align 32
  44. .globl des_t4_key_expand
  45. .type des_t4_key_expand,#function
  46. des_t4_key_expand:
  47. andcc $inp, 0x7, %g0
  48. alignaddr $inp, %g0, $inp
  49. bz,pt %icc, 1f
  50. ldd [$inp + 0x00], %f0
  51. ldd [$inp + 0x08], %f2
  52. faligndata %f0, %f2, %f0
  53. 1: des_kexpand %f0, 0, %f0
  54. des_kexpand %f0, 1, %f2
  55. std %f0, [$out + 0x00]
  56. des_kexpand %f2, 3, %f6
  57. std %f2, [$out + 0x08]
  58. des_kexpand %f2, 2, %f4
  59. des_kexpand %f6, 3, %f10
  60. std %f6, [$out + 0x18]
  61. des_kexpand %f6, 2, %f8
  62. std %f4, [$out + 0x10]
  63. des_kexpand %f10, 3, %f14
  64. std %f10, [$out + 0x28]
  65. des_kexpand %f10, 2, %f12
  66. std %f8, [$out + 0x20]
  67. des_kexpand %f14, 1, %f16
  68. std %f14, [$out + 0x38]
  69. des_kexpand %f16, 3, %f20
  70. std %f12, [$out + 0x30]
  71. des_kexpand %f16, 2, %f18
  72. std %f16, [$out + 0x40]
  73. des_kexpand %f20, 3, %f24
  74. std %f20, [$out + 0x50]
  75. des_kexpand %f20, 2, %f22
  76. std %f18, [$out + 0x48]
  77. des_kexpand %f24, 3, %f28
  78. std %f24, [$out + 0x60]
  79. des_kexpand %f24, 2, %f26
  80. std %f22, [$out + 0x58]
  81. des_kexpand %f28, 1, %f30
  82. std %f28, [$out + 0x70]
  83. std %f26, [$out + 0x68]
  84. retl
  85. std %f30, [$out + 0x78]
  86. .size des_t4_key_expand,.-des_t4_key_expand
  87. ___
  88. }
  89. { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
  90. my ($ileft,$iright,$omask) = map("%g$_",(1..3));
  91. $code.=<<___;
  92. .globl des_t4_cbc_encrypt
  93. .align 32
  94. des_t4_cbc_encrypt:
  95. cmp $len, 0
  96. be,pn $::size_t_cc, .Lcbc_abort
  97. srln $len, 0, $len ! needed on v8+, "nop" on v9
  98. ld [$ivec + 0], %f0 ! load ivec
  99. ld [$ivec + 4], %f1
  100. and $inp, 7, $ileft
  101. andn $inp, 7, $inp
  102. sll $ileft, 3, $ileft
  103. mov 0xff, $omask
  104. prefetch [$inp], 20
  105. prefetch [$inp + 63], 20
  106. sub %g0, $ileft, $iright
  107. and $out, 7, %g4
  108. alignaddrl $out, %g0, $out
  109. srl $omask, %g4, $omask
  110. srlx $len, 3, $len
  111. movrz %g4, 0, $omask
  112. prefetch [$out], 22
  113. ldd [$key + 0x00], %f4 ! load key schedule
  114. ldd [$key + 0x08], %f6
  115. ldd [$key + 0x10], %f8
  116. ldd [$key + 0x18], %f10
  117. ldd [$key + 0x20], %f12
  118. ldd [$key + 0x28], %f14
  119. ldd [$key + 0x30], %f16
  120. ldd [$key + 0x38], %f18
  121. ldd [$key + 0x40], %f20
  122. ldd [$key + 0x48], %f22
  123. ldd [$key + 0x50], %f24
  124. ldd [$key + 0x58], %f26
  125. ldd [$key + 0x60], %f28
  126. ldd [$key + 0x68], %f30
  127. ldd [$key + 0x70], %f32
  128. ldd [$key + 0x78], %f34
  129. .Ldes_cbc_enc_loop:
  130. ldx [$inp + 0], %g4
  131. brz,pt $ileft, 4f
  132. nop
  133. ldx [$inp + 8], %g5
  134. sllx %g4, $ileft, %g4
  135. srlx %g5, $iright, %g5
  136. or %g5, %g4, %g4
  137. 4:
  138. movxtod %g4, %f2
  139. prefetch [$inp + 8+63], 20
  140. add $inp, 8, $inp
  141. fxor %f2, %f0, %f0 ! ^= ivec
  142. prefetch [$out + 63], 22
  143. des_ip %f0, %f0
  144. des_round %f4, %f6, %f0, %f0
  145. des_round %f8, %f10, %f0, %f0
  146. des_round %f12, %f14, %f0, %f0
  147. des_round %f16, %f18, %f0, %f0
  148. des_round %f20, %f22, %f0, %f0
  149. des_round %f24, %f26, %f0, %f0
  150. des_round %f28, %f30, %f0, %f0
  151. des_round %f32, %f34, %f0, %f0
  152. des_iip %f0, %f0
  153. brnz,pn $omask, 2f
  154. sub $len, 1, $len
  155. std %f0, [$out + 0]
  156. brnz,pt $len, .Ldes_cbc_enc_loop
  157. add $out, 8, $out
  158. st %f0, [$ivec + 0] ! write out ivec
  159. retl
  160. st %f1, [$ivec + 4]
  161. .Lcbc_abort:
  162. retl
  163. nop
  164. .align 16
  165. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  166. ! and ~4x deterioration
  167. ! in inp==out case
  168. faligndata %f0, %f0, %f2 ! handle unaligned output
  169. stda %f2, [$out + $omask]0xc0 ! partial store
  170. add $out, 8, $out
  171. orn %g0, $omask, $omask
  172. stda %f2, [$out + $omask]0xc0 ! partial store
  173. brnz,pt $len, .Ldes_cbc_enc_loop+4
  174. orn %g0, $omask, $omask
  175. st %f0, [$ivec + 0] ! write out ivec
  176. retl
  177. st %f1, [$ivec + 4]
  178. .type des_t4_cbc_encrypt,#function
  179. .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
  180. .globl des_t4_cbc_decrypt
  181. .align 32
  182. des_t4_cbc_decrypt:
  183. cmp $len, 0
  184. be,pn $::size_t_cc, .Lcbc_abort
  185. srln $len, 0, $len ! needed on v8+, "nop" on v9
  186. ld [$ivec + 0], %f2 ! load ivec
  187. ld [$ivec + 4], %f3
  188. and $inp, 7, $ileft
  189. andn $inp, 7, $inp
  190. sll $ileft, 3, $ileft
  191. mov 0xff, $omask
  192. prefetch [$inp], 20
  193. prefetch [$inp + 63], 20
  194. sub %g0, $ileft, $iright
  195. and $out, 7, %g4
  196. alignaddrl $out, %g0, $out
  197. srl $omask, %g4, $omask
  198. srlx $len, 3, $len
  199. movrz %g4, 0, $omask
  200. prefetch [$out], 22
  201. ldd [$key + 0x78], %f4 ! load key schedule
  202. ldd [$key + 0x70], %f6
  203. ldd [$key + 0x68], %f8
  204. ldd [$key + 0x60], %f10
  205. ldd [$key + 0x58], %f12
  206. ldd [$key + 0x50], %f14
  207. ldd [$key + 0x48], %f16
  208. ldd [$key + 0x40], %f18
  209. ldd [$key + 0x38], %f20
  210. ldd [$key + 0x30], %f22
  211. ldd [$key + 0x28], %f24
  212. ldd [$key + 0x20], %f26
  213. ldd [$key + 0x18], %f28
  214. ldd [$key + 0x10], %f30
  215. ldd [$key + 0x08], %f32
  216. ldd [$key + 0x00], %f34
  217. .Ldes_cbc_dec_loop:
  218. ldx [$inp + 0], %g4
  219. brz,pt $ileft, 4f
  220. nop
  221. ldx [$inp + 8], %g5
  222. sllx %g4, $ileft, %g4
  223. srlx %g5, $iright, %g5
  224. or %g5, %g4, %g4
  225. 4:
  226. movxtod %g4, %f0
  227. prefetch [$inp + 8+63], 20
  228. add $inp, 8, $inp
  229. prefetch [$out + 63], 22
  230. des_ip %f0, %f0
  231. des_round %f4, %f6, %f0, %f0
  232. des_round %f8, %f10, %f0, %f0
  233. des_round %f12, %f14, %f0, %f0
  234. des_round %f16, %f18, %f0, %f0
  235. des_round %f20, %f22, %f0, %f0
  236. des_round %f24, %f26, %f0, %f0
  237. des_round %f28, %f30, %f0, %f0
  238. des_round %f32, %f34, %f0, %f0
  239. des_iip %f0, %f0
  240. fxor %f2, %f0, %f0 ! ^= ivec
  241. movxtod %g4, %f2
  242. brnz,pn $omask, 2f
  243. sub $len, 1, $len
  244. std %f0, [$out + 0]
  245. brnz,pt $len, .Ldes_cbc_dec_loop
  246. add $out, 8, $out
  247. st %f2, [$ivec + 0] ! write out ivec
  248. retl
  249. st %f3, [$ivec + 4]
  250. .align 16
  251. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  252. ! and ~4x deterioration
  253. ! in inp==out case
  254. faligndata %f0, %f0, %f0 ! handle unaligned output
  255. stda %f0, [$out + $omask]0xc0 ! partial store
  256. add $out, 8, $out
  257. orn %g0, $omask, $omask
  258. stda %f0, [$out + $omask]0xc0 ! partial store
  259. brnz,pt $len, .Ldes_cbc_dec_loop+4
  260. orn %g0, $omask, $omask
  261. st %f2, [$ivec + 0] ! write out ivec
  262. retl
  263. st %f3, [$ivec + 4]
  264. .type des_t4_cbc_decrypt,#function
  265. .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
  266. ___
  267. # One might wonder why does one have back-to-back des_iip/des_ip
  268. # pairs between EDE passes. Indeed, aren't they inverse of each other?
  269. # They almost are. Outcome of the pair is 32-bit words being swapped
  270. # in target register. Consider pair of des_iip/des_ip as a way to
  271. # perform the due swap, it's actually fastest way in this case.
  272. $code.=<<___;
  273. .globl des_t4_ede3_cbc_encrypt
  274. .align 32
  275. des_t4_ede3_cbc_encrypt:
  276. cmp $len, 0
  277. be,pn $::size_t_cc, .Lcbc_abort
  278. srln $len, 0, $len ! needed on v8+, "nop" on v9
  279. ld [$ivec + 0], %f0 ! load ivec
  280. ld [$ivec + 4], %f1
  281. and $inp, 7, $ileft
  282. andn $inp, 7, $inp
  283. sll $ileft, 3, $ileft
  284. mov 0xff, $omask
  285. prefetch [$inp], 20
  286. prefetch [$inp + 63], 20
  287. sub %g0, $ileft, $iright
  288. and $out, 7, %g4
  289. alignaddrl $out, %g0, $out
  290. srl $omask, %g4, $omask
  291. srlx $len, 3, $len
  292. movrz %g4, 0, $omask
  293. prefetch [$out], 22
  294. ldd [$key + 0x00], %f4 ! load key schedule
  295. ldd [$key + 0x08], %f6
  296. ldd [$key + 0x10], %f8
  297. ldd [$key + 0x18], %f10
  298. ldd [$key + 0x20], %f12
  299. ldd [$key + 0x28], %f14
  300. ldd [$key + 0x30], %f16
  301. ldd [$key + 0x38], %f18
  302. ldd [$key + 0x40], %f20
  303. ldd [$key + 0x48], %f22
  304. ldd [$key + 0x50], %f24
  305. ldd [$key + 0x58], %f26
  306. ldd [$key + 0x60], %f28
  307. ldd [$key + 0x68], %f30
  308. ldd [$key + 0x70], %f32
  309. ldd [$key + 0x78], %f34
  310. .Ldes_ede3_cbc_enc_loop:
  311. ldx [$inp + 0], %g4
  312. brz,pt $ileft, 4f
  313. nop
  314. ldx [$inp + 8], %g5
  315. sllx %g4, $ileft, %g4
  316. srlx %g5, $iright, %g5
  317. or %g5, %g4, %g4
  318. 4:
  319. movxtod %g4, %f2
  320. prefetch [$inp + 8+63], 20
  321. add $inp, 8, $inp
  322. fxor %f2, %f0, %f0 ! ^= ivec
  323. prefetch [$out + 63], 22
  324. des_ip %f0, %f0
  325. des_round %f4, %f6, %f0, %f0
  326. des_round %f8, %f10, %f0, %f0
  327. des_round %f12, %f14, %f0, %f0
  328. des_round %f16, %f18, %f0, %f0
  329. ldd [$key + 0x100-0x08], %f36
  330. ldd [$key + 0x100-0x10], %f38
  331. des_round %f20, %f22, %f0, %f0
  332. ldd [$key + 0x100-0x18], %f40
  333. ldd [$key + 0x100-0x20], %f42
  334. des_round %f24, %f26, %f0, %f0
  335. ldd [$key + 0x100-0x28], %f44
  336. ldd [$key + 0x100-0x30], %f46
  337. des_round %f28, %f30, %f0, %f0
  338. ldd [$key + 0x100-0x38], %f48
  339. ldd [$key + 0x100-0x40], %f50
  340. des_round %f32, %f34, %f0, %f0
  341. ldd [$key + 0x100-0x48], %f52
  342. ldd [$key + 0x100-0x50], %f54
  343. des_iip %f0, %f0
  344. ldd [$key + 0x100-0x58], %f56
  345. ldd [$key + 0x100-0x60], %f58
  346. des_ip %f0, %f0
  347. ldd [$key + 0x100-0x68], %f60
  348. ldd [$key + 0x100-0x70], %f62
  349. des_round %f36, %f38, %f0, %f0
  350. ldd [$key + 0x100-0x78], %f36
  351. ldd [$key + 0x100-0x80], %f38
  352. des_round %f40, %f42, %f0, %f0
  353. des_round %f44, %f46, %f0, %f0
  354. des_round %f48, %f50, %f0, %f0
  355. ldd [$key + 0x100+0x00], %f40
  356. ldd [$key + 0x100+0x08], %f42
  357. des_round %f52, %f54, %f0, %f0
  358. ldd [$key + 0x100+0x10], %f44
  359. ldd [$key + 0x100+0x18], %f46
  360. des_round %f56, %f58, %f0, %f0
  361. ldd [$key + 0x100+0x20], %f48
  362. ldd [$key + 0x100+0x28], %f50
  363. des_round %f60, %f62, %f0, %f0
  364. ldd [$key + 0x100+0x30], %f52
  365. ldd [$key + 0x100+0x38], %f54
  366. des_round %f36, %f38, %f0, %f0
  367. ldd [$key + 0x100+0x40], %f56
  368. ldd [$key + 0x100+0x48], %f58
  369. des_iip %f0, %f0
  370. ldd [$key + 0x100+0x50], %f60
  371. ldd [$key + 0x100+0x58], %f62
  372. des_ip %f0, %f0
  373. ldd [$key + 0x100+0x60], %f36
  374. ldd [$key + 0x100+0x68], %f38
  375. des_round %f40, %f42, %f0, %f0
  376. ldd [$key + 0x100+0x70], %f40
  377. ldd [$key + 0x100+0x78], %f42
  378. des_round %f44, %f46, %f0, %f0
  379. des_round %f48, %f50, %f0, %f0
  380. des_round %f52, %f54, %f0, %f0
  381. des_round %f56, %f58, %f0, %f0
  382. des_round %f60, %f62, %f0, %f0
  383. des_round %f36, %f38, %f0, %f0
  384. des_round %f40, %f42, %f0, %f0
  385. des_iip %f0, %f0
  386. brnz,pn $omask, 2f
  387. sub $len, 1, $len
  388. std %f0, [$out + 0]
  389. brnz,pt $len, .Ldes_ede3_cbc_enc_loop
  390. add $out, 8, $out
  391. st %f0, [$ivec + 0] ! write out ivec
  392. retl
  393. st %f1, [$ivec + 4]
  394. .align 16
  395. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  396. ! and ~2x deterioration
  397. ! in inp==out case
  398. faligndata %f0, %f0, %f2 ! handle unaligned output
  399. stda %f2, [$out + $omask]0xc0 ! partial store
  400. add $out, 8, $out
  401. orn %g0, $omask, $omask
  402. stda %f2, [$out + $omask]0xc0 ! partial store
  403. brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
  404. orn %g0, $omask, $omask
  405. st %f0, [$ivec + 0] ! write out ivec
  406. retl
  407. st %f1, [$ivec + 4]
  408. .type des_t4_ede3_cbc_encrypt,#function
  409. .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
  410. .globl des_t4_ede3_cbc_decrypt
  411. .align 32
  412. des_t4_ede3_cbc_decrypt:
  413. cmp $len, 0
  414. be,pn $::size_t_cc, .Lcbc_abort
  415. srln $len, 0, $len ! needed on v8+, "nop" on v9
  416. ld [$ivec + 0], %f2 ! load ivec
  417. ld [$ivec + 4], %f3
  418. and $inp, 7, $ileft
  419. andn $inp, 7, $inp
  420. sll $ileft, 3, $ileft
  421. mov 0xff, $omask
  422. prefetch [$inp], 20
  423. prefetch [$inp + 63], 20
  424. sub %g0, $ileft, $iright
  425. and $out, 7, %g4
  426. alignaddrl $out, %g0, $out
  427. srl $omask, %g4, $omask
  428. srlx $len, 3, $len
  429. movrz %g4, 0, $omask
  430. prefetch [$out], 22
  431. ldd [$key + 0x100+0x78], %f4 ! load key schedule
  432. ldd [$key + 0x100+0x70], %f6
  433. ldd [$key + 0x100+0x68], %f8
  434. ldd [$key + 0x100+0x60], %f10
  435. ldd [$key + 0x100+0x58], %f12
  436. ldd [$key + 0x100+0x50], %f14
  437. ldd [$key + 0x100+0x48], %f16
  438. ldd [$key + 0x100+0x40], %f18
  439. ldd [$key + 0x100+0x38], %f20
  440. ldd [$key + 0x100+0x30], %f22
  441. ldd [$key + 0x100+0x28], %f24
  442. ldd [$key + 0x100+0x20], %f26
  443. ldd [$key + 0x100+0x18], %f28
  444. ldd [$key + 0x100+0x10], %f30
  445. ldd [$key + 0x100+0x08], %f32
  446. ldd [$key + 0x100+0x00], %f34
  447. .Ldes_ede3_cbc_dec_loop:
  448. ldx [$inp + 0], %g4
  449. brz,pt $ileft, 4f
  450. nop
  451. ldx [$inp + 8], %g5
  452. sllx %g4, $ileft, %g4
  453. srlx %g5, $iright, %g5
  454. or %g5, %g4, %g4
  455. 4:
  456. movxtod %g4, %f0
  457. prefetch [$inp + 8+63], 20
  458. add $inp, 8, $inp
  459. prefetch [$out + 63], 22
  460. des_ip %f0, %f0
  461. des_round %f4, %f6, %f0, %f0
  462. des_round %f8, %f10, %f0, %f0
  463. des_round %f12, %f14, %f0, %f0
  464. des_round %f16, %f18, %f0, %f0
  465. ldd [$key + 0x80+0x00], %f36
  466. ldd [$key + 0x80+0x08], %f38
  467. des_round %f20, %f22, %f0, %f0
  468. ldd [$key + 0x80+0x10], %f40
  469. ldd [$key + 0x80+0x18], %f42
  470. des_round %f24, %f26, %f0, %f0
  471. ldd [$key + 0x80+0x20], %f44
  472. ldd [$key + 0x80+0x28], %f46
  473. des_round %f28, %f30, %f0, %f0
  474. ldd [$key + 0x80+0x30], %f48
  475. ldd [$key + 0x80+0x38], %f50
  476. des_round %f32, %f34, %f0, %f0
  477. ldd [$key + 0x80+0x40], %f52
  478. ldd [$key + 0x80+0x48], %f54
  479. des_iip %f0, %f0
  480. ldd [$key + 0x80+0x50], %f56
  481. ldd [$key + 0x80+0x58], %f58
  482. des_ip %f0, %f0
  483. ldd [$key + 0x80+0x60], %f60
  484. ldd [$key + 0x80+0x68], %f62
  485. des_round %f36, %f38, %f0, %f0
  486. ldd [$key + 0x80+0x70], %f36
  487. ldd [$key + 0x80+0x78], %f38
  488. des_round %f40, %f42, %f0, %f0
  489. des_round %f44, %f46, %f0, %f0
  490. des_round %f48, %f50, %f0, %f0
  491. ldd [$key + 0x80-0x08], %f40
  492. ldd [$key + 0x80-0x10], %f42
  493. des_round %f52, %f54, %f0, %f0
  494. ldd [$key + 0x80-0x18], %f44
  495. ldd [$key + 0x80-0x20], %f46
  496. des_round %f56, %f58, %f0, %f0
  497. ldd [$key + 0x80-0x28], %f48
  498. ldd [$key + 0x80-0x30], %f50
  499. des_round %f60, %f62, %f0, %f0
  500. ldd [$key + 0x80-0x38], %f52
  501. ldd [$key + 0x80-0x40], %f54
  502. des_round %f36, %f38, %f0, %f0
  503. ldd [$key + 0x80-0x48], %f56
  504. ldd [$key + 0x80-0x50], %f58
  505. des_iip %f0, %f0
  506. ldd [$key + 0x80-0x58], %f60
  507. ldd [$key + 0x80-0x60], %f62
  508. des_ip %f0, %f0
  509. ldd [$key + 0x80-0x68], %f36
  510. ldd [$key + 0x80-0x70], %f38
  511. des_round %f40, %f42, %f0, %f0
  512. ldd [$key + 0x80-0x78], %f40
  513. ldd [$key + 0x80-0x80], %f42
  514. des_round %f44, %f46, %f0, %f0
  515. des_round %f48, %f50, %f0, %f0
  516. des_round %f52, %f54, %f0, %f0
  517. des_round %f56, %f58, %f0, %f0
  518. des_round %f60, %f62, %f0, %f0
  519. des_round %f36, %f38, %f0, %f0
  520. des_round %f40, %f42, %f0, %f0
  521. des_iip %f0, %f0
  522. fxor %f2, %f0, %f0 ! ^= ivec
  523. movxtod %g4, %f2
  524. brnz,pn $omask, 2f
  525. sub $len, 1, $len
  526. std %f0, [$out + 0]
  527. brnz,pt $len, .Ldes_ede3_cbc_dec_loop
  528. add $out, 8, $out
  529. st %f2, [$ivec + 0] ! write out ivec
  530. retl
  531. st %f3, [$ivec + 4]
  532. .align 16
  533. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  534. ! and ~3x deterioration
  535. ! in inp==out case
  536. faligndata %f0, %f0, %f0 ! handle unaligned output
  537. stda %f0, [$out + $omask]0xc0 ! partial store
  538. add $out, 8, $out
  539. orn %g0, $omask, $omask
  540. stda %f0, [$out + $omask]0xc0 ! partial store
  541. brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
  542. orn %g0, $omask, $omask
  543. st %f2, [$ivec + 0] ! write out ivec
  544. retl
  545. st %f3, [$ivec + 4]
  546. .type des_t4_ede3_cbc_decrypt,#function
  547. .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
  548. ___
  549. }
  550. $code.=<<___;
  551. .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"
  552. .align 4
  553. ___
  554. &emit_assembler();
  555. close STDOUT or die "error closing STDOUT: $!";