dest4-sparcv9.pl 15 KB


  1. #! /usr/bin/env perl
  2. # Copyright 2013-2021 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by David S. Miller and Andy Polyakov.
  10. # The module is licensed under 2-clause BSD
  11. # license. March 2013. All rights reserved.
  12. # ====================================================================
  13. ######################################################################
  14. # DES for SPARC T4.
  15. #
  16. # As with other hardware-assisted ciphers CBC encrypt results [for
  17. # aligned data] are virtually identical to critical path lengths:
  18. #
  19. # DES Triple-DES
  20. # CBC encrypt 4.14/4.15(*) 11.7/11.7
  21. # CBC decrypt 1.77/4.11(**) 6.42/7.47
  22. #
  23. # (*) numbers after slash are for
  24. # misaligned data;
  25. # (**) this is result for largest
  26. # block size, unlike all other
  27. # cases smaller blocks results
  28. # are better[?];
  29. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  30. push(@INC,"${dir}","${dir}../../perlasm");
  31. require "sparcv9_modes.pl";
  32. $output=pop and open STDOUT,">$output";
  33. $code.=<<___;
  34. #ifndef __ASSEMBLER__
  35. # define __ASSEMBLER__ 1
  36. #endif
  37. #include "crypto/sparc_arch.h"
  38. #ifdef __arch64__
  39. .register %g2,#scratch
  40. .register %g3,#scratch
  41. #endif
  42. .text
  43. ___
  44. { my ($inp,$out)=("%o0","%o1");
  45. $code.=<<___;
  46. .align 32
  47. .globl des_t4_key_expand
  48. .type des_t4_key_expand,#function
  49. des_t4_key_expand:
  50. andcc $inp, 0x7, %g0
  51. alignaddr $inp, %g0, $inp
  52. bz,pt %icc, 1f
  53. ldd [$inp + 0x00], %f0
  54. ldd [$inp + 0x08], %f2
  55. faligndata %f0, %f2, %f0
  56. 1: des_kexpand %f0, 0, %f0
  57. des_kexpand %f0, 1, %f2
  58. std %f0, [$out + 0x00]
  59. des_kexpand %f2, 3, %f6
  60. std %f2, [$out + 0x08]
  61. des_kexpand %f2, 2, %f4
  62. des_kexpand %f6, 3, %f10
  63. std %f6, [$out + 0x18]
  64. des_kexpand %f6, 2, %f8
  65. std %f4, [$out + 0x10]
  66. des_kexpand %f10, 3, %f14
  67. std %f10, [$out + 0x28]
  68. des_kexpand %f10, 2, %f12
  69. std %f8, [$out + 0x20]
  70. des_kexpand %f14, 1, %f16
  71. std %f14, [$out + 0x38]
  72. des_kexpand %f16, 3, %f20
  73. std %f12, [$out + 0x30]
  74. des_kexpand %f16, 2, %f18
  75. std %f16, [$out + 0x40]
  76. des_kexpand %f20, 3, %f24
  77. std %f20, [$out + 0x50]
  78. des_kexpand %f20, 2, %f22
  79. std %f18, [$out + 0x48]
  80. des_kexpand %f24, 3, %f28
  81. std %f24, [$out + 0x60]
  82. des_kexpand %f24, 2, %f26
  83. std %f22, [$out + 0x58]
  84. des_kexpand %f28, 1, %f30
  85. std %f28, [$out + 0x70]
  86. std %f26, [$out + 0x68]
  87. retl
  88. std %f30, [$out + 0x78]
  89. .size des_t4_key_expand,.-des_t4_key_expand
  90. ___
  91. }
  92. { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
  93. my ($ileft,$iright,$omask) = map("%g$_",(1..3));
  94. $code.=<<___;
  95. .globl des_t4_cbc_encrypt
  96. .align 32
  97. des_t4_cbc_encrypt:
  98. cmp $len, 0
  99. be,pn $::size_t_cc, .Lcbc_abort
  100. srln $len, 0, $len ! needed on v8+, "nop" on v9
  101. ld [$ivec + 0], %f0 ! load ivec
  102. ld [$ivec + 4], %f1
  103. and $inp, 7, $ileft
  104. andn $inp, 7, $inp
  105. sll $ileft, 3, $ileft
  106. mov 0xff, $omask
  107. prefetch [$inp], 20
  108. prefetch [$inp + 63], 20
  109. sub %g0, $ileft, $iright
  110. and $out, 7, %g4
  111. alignaddrl $out, %g0, $out
  112. srl $omask, %g4, $omask
  113. srlx $len, 3, $len
  114. movrz %g4, 0, $omask
  115. prefetch [$out], 22
  116. ldd [$key + 0x00], %f4 ! load key schedule
  117. ldd [$key + 0x08], %f6
  118. ldd [$key + 0x10], %f8
  119. ldd [$key + 0x18], %f10
  120. ldd [$key + 0x20], %f12
  121. ldd [$key + 0x28], %f14
  122. ldd [$key + 0x30], %f16
  123. ldd [$key + 0x38], %f18
  124. ldd [$key + 0x40], %f20
  125. ldd [$key + 0x48], %f22
  126. ldd [$key + 0x50], %f24
  127. ldd [$key + 0x58], %f26
  128. ldd [$key + 0x60], %f28
  129. ldd [$key + 0x68], %f30
  130. ldd [$key + 0x70], %f32
  131. ldd [$key + 0x78], %f34
  132. .Ldes_cbc_enc_loop:
  133. ldx [$inp + 0], %g4
  134. brz,pt $ileft, 4f
  135. nop
  136. ldx [$inp + 8], %g5
  137. sllx %g4, $ileft, %g4
  138. srlx %g5, $iright, %g5
  139. or %g5, %g4, %g4
  140. 4:
  141. movxtod %g4, %f2
  142. prefetch [$inp + 8+63], 20
  143. add $inp, 8, $inp
  144. fxor %f2, %f0, %f0 ! ^= ivec
  145. prefetch [$out + 63], 22
  146. des_ip %f0, %f0
  147. des_round %f4, %f6, %f0, %f0
  148. des_round %f8, %f10, %f0, %f0
  149. des_round %f12, %f14, %f0, %f0
  150. des_round %f16, %f18, %f0, %f0
  151. des_round %f20, %f22, %f0, %f0
  152. des_round %f24, %f26, %f0, %f0
  153. des_round %f28, %f30, %f0, %f0
  154. des_round %f32, %f34, %f0, %f0
  155. des_iip %f0, %f0
  156. brnz,pn $omask, 2f
  157. sub $len, 1, $len
  158. std %f0, [$out + 0]
  159. brnz,pt $len, .Ldes_cbc_enc_loop
  160. add $out, 8, $out
  161. st %f0, [$ivec + 0] ! write out ivec
  162. retl
  163. st %f1, [$ivec + 4]
  164. .Lcbc_abort:
  165. retl
  166. nop
  167. .align 16
  168. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  169. ! and ~4x deterioration
  170. ! in inp==out case
  171. faligndata %f0, %f0, %f2 ! handle unaligned output
  172. stda %f2, [$out + $omask]0xc0 ! partial store
  173. add $out, 8, $out
  174. orn %g0, $omask, $omask
  175. stda %f2, [$out + $omask]0xc0 ! partial store
  176. brnz,pt $len, .Ldes_cbc_enc_loop+4
  177. orn %g0, $omask, $omask
  178. st %f0, [$ivec + 0] ! write out ivec
  179. retl
  180. st %f1, [$ivec + 4]
  181. .type des_t4_cbc_encrypt,#function
  182. .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
  183. .globl des_t4_cbc_decrypt
  184. .align 32
  185. des_t4_cbc_decrypt:
  186. cmp $len, 0
  187. be,pn $::size_t_cc, .Lcbc_abort
  188. srln $len, 0, $len ! needed on v8+, "nop" on v9
  189. ld [$ivec + 0], %f2 ! load ivec
  190. ld [$ivec + 4], %f3
  191. and $inp, 7, $ileft
  192. andn $inp, 7, $inp
  193. sll $ileft, 3, $ileft
  194. mov 0xff, $omask
  195. prefetch [$inp], 20
  196. prefetch [$inp + 63], 20
  197. sub %g0, $ileft, $iright
  198. and $out, 7, %g4
  199. alignaddrl $out, %g0, $out
  200. srl $omask, %g4, $omask
  201. srlx $len, 3, $len
  202. movrz %g4, 0, $omask
  203. prefetch [$out], 22
  204. ldd [$key + 0x78], %f4 ! load key schedule
  205. ldd [$key + 0x70], %f6
  206. ldd [$key + 0x68], %f8
  207. ldd [$key + 0x60], %f10
  208. ldd [$key + 0x58], %f12
  209. ldd [$key + 0x50], %f14
  210. ldd [$key + 0x48], %f16
  211. ldd [$key + 0x40], %f18
  212. ldd [$key + 0x38], %f20
  213. ldd [$key + 0x30], %f22
  214. ldd [$key + 0x28], %f24
  215. ldd [$key + 0x20], %f26
  216. ldd [$key + 0x18], %f28
  217. ldd [$key + 0x10], %f30
  218. ldd [$key + 0x08], %f32
  219. ldd [$key + 0x00], %f34
  220. .Ldes_cbc_dec_loop:
  221. ldx [$inp + 0], %g4
  222. brz,pt $ileft, 4f
  223. nop
  224. ldx [$inp + 8], %g5
  225. sllx %g4, $ileft, %g4
  226. srlx %g5, $iright, %g5
  227. or %g5, %g4, %g4
  228. 4:
  229. movxtod %g4, %f0
  230. prefetch [$inp + 8+63], 20
  231. add $inp, 8, $inp
  232. prefetch [$out + 63], 22
  233. des_ip %f0, %f0
  234. des_round %f4, %f6, %f0, %f0
  235. des_round %f8, %f10, %f0, %f0
  236. des_round %f12, %f14, %f0, %f0
  237. des_round %f16, %f18, %f0, %f0
  238. des_round %f20, %f22, %f0, %f0
  239. des_round %f24, %f26, %f0, %f0
  240. des_round %f28, %f30, %f0, %f0
  241. des_round %f32, %f34, %f0, %f0
  242. des_iip %f0, %f0
  243. fxor %f2, %f0, %f0 ! ^= ivec
  244. movxtod %g4, %f2
  245. brnz,pn $omask, 2f
  246. sub $len, 1, $len
  247. std %f0, [$out + 0]
  248. brnz,pt $len, .Ldes_cbc_dec_loop
  249. add $out, 8, $out
  250. st %f2, [$ivec + 0] ! write out ivec
  251. retl
  252. st %f3, [$ivec + 4]
  253. .align 16
  254. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  255. ! and ~4x deterioration
  256. ! in inp==out case
  257. faligndata %f0, %f0, %f0 ! handle unaligned output
  258. stda %f0, [$out + $omask]0xc0 ! partial store
  259. add $out, 8, $out
  260. orn %g0, $omask, $omask
  261. stda %f0, [$out + $omask]0xc0 ! partial store
  262. brnz,pt $len, .Ldes_cbc_dec_loop+4
  263. orn %g0, $omask, $omask
  264. st %f2, [$ivec + 0] ! write out ivec
  265. retl
  266. st %f3, [$ivec + 4]
  267. .type des_t4_cbc_decrypt,#function
  268. .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
  269. ___
  270. # One might wonder why does one have back-to-back des_iip/des_ip
  271. # pairs between EDE passes. Indeed, aren't they inverse of each other?
  272. # They almost are. Outcome of the pair is 32-bit words being swapped
  273. # in target register. Consider pair of des_iip/des_ip as a way to
  274. # perform the due swap, it's actually fastest way in this case.
  275. $code.=<<___;
  276. .globl des_t4_ede3_cbc_encrypt
  277. .align 32
  278. des_t4_ede3_cbc_encrypt:
  279. cmp $len, 0
  280. be,pn $::size_t_cc, .Lcbc_abort
  281. srln $len, 0, $len ! needed on v8+, "nop" on v9
  282. ld [$ivec + 0], %f0 ! load ivec
  283. ld [$ivec + 4], %f1
  284. and $inp, 7, $ileft
  285. andn $inp, 7, $inp
  286. sll $ileft, 3, $ileft
  287. mov 0xff, $omask
  288. prefetch [$inp], 20
  289. prefetch [$inp + 63], 20
  290. sub %g0, $ileft, $iright
  291. and $out, 7, %g4
  292. alignaddrl $out, %g0, $out
  293. srl $omask, %g4, $omask
  294. srlx $len, 3, $len
  295. movrz %g4, 0, $omask
  296. prefetch [$out], 22
  297. ldd [$key + 0x00], %f4 ! load key schedule
  298. ldd [$key + 0x08], %f6
  299. ldd [$key + 0x10], %f8
  300. ldd [$key + 0x18], %f10
  301. ldd [$key + 0x20], %f12
  302. ldd [$key + 0x28], %f14
  303. ldd [$key + 0x30], %f16
  304. ldd [$key + 0x38], %f18
  305. ldd [$key + 0x40], %f20
  306. ldd [$key + 0x48], %f22
  307. ldd [$key + 0x50], %f24
  308. ldd [$key + 0x58], %f26
  309. ldd [$key + 0x60], %f28
  310. ldd [$key + 0x68], %f30
  311. ldd [$key + 0x70], %f32
  312. ldd [$key + 0x78], %f34
  313. .Ldes_ede3_cbc_enc_loop:
  314. ldx [$inp + 0], %g4
  315. brz,pt $ileft, 4f
  316. nop
  317. ldx [$inp + 8], %g5
  318. sllx %g4, $ileft, %g4
  319. srlx %g5, $iright, %g5
  320. or %g5, %g4, %g4
  321. 4:
  322. movxtod %g4, %f2
  323. prefetch [$inp + 8+63], 20
  324. add $inp, 8, $inp
  325. fxor %f2, %f0, %f0 ! ^= ivec
  326. prefetch [$out + 63], 22
  327. des_ip %f0, %f0
  328. des_round %f4, %f6, %f0, %f0
  329. des_round %f8, %f10, %f0, %f0
  330. des_round %f12, %f14, %f0, %f0
  331. des_round %f16, %f18, %f0, %f0
  332. ldd [$key + 0x100-0x08], %f36
  333. ldd [$key + 0x100-0x10], %f38
  334. des_round %f20, %f22, %f0, %f0
  335. ldd [$key + 0x100-0x18], %f40
  336. ldd [$key + 0x100-0x20], %f42
  337. des_round %f24, %f26, %f0, %f0
  338. ldd [$key + 0x100-0x28], %f44
  339. ldd [$key + 0x100-0x30], %f46
  340. des_round %f28, %f30, %f0, %f0
  341. ldd [$key + 0x100-0x38], %f48
  342. ldd [$key + 0x100-0x40], %f50
  343. des_round %f32, %f34, %f0, %f0
  344. ldd [$key + 0x100-0x48], %f52
  345. ldd [$key + 0x100-0x50], %f54
  346. des_iip %f0, %f0
  347. ldd [$key + 0x100-0x58], %f56
  348. ldd [$key + 0x100-0x60], %f58
  349. des_ip %f0, %f0
  350. ldd [$key + 0x100-0x68], %f60
  351. ldd [$key + 0x100-0x70], %f62
  352. des_round %f36, %f38, %f0, %f0
  353. ldd [$key + 0x100-0x78], %f36
  354. ldd [$key + 0x100-0x80], %f38
  355. des_round %f40, %f42, %f0, %f0
  356. des_round %f44, %f46, %f0, %f0
  357. des_round %f48, %f50, %f0, %f0
  358. ldd [$key + 0x100+0x00], %f40
  359. ldd [$key + 0x100+0x08], %f42
  360. des_round %f52, %f54, %f0, %f0
  361. ldd [$key + 0x100+0x10], %f44
  362. ldd [$key + 0x100+0x18], %f46
  363. des_round %f56, %f58, %f0, %f0
  364. ldd [$key + 0x100+0x20], %f48
  365. ldd [$key + 0x100+0x28], %f50
  366. des_round %f60, %f62, %f0, %f0
  367. ldd [$key + 0x100+0x30], %f52
  368. ldd [$key + 0x100+0x38], %f54
  369. des_round %f36, %f38, %f0, %f0
  370. ldd [$key + 0x100+0x40], %f56
  371. ldd [$key + 0x100+0x48], %f58
  372. des_iip %f0, %f0
  373. ldd [$key + 0x100+0x50], %f60
  374. ldd [$key + 0x100+0x58], %f62
  375. des_ip %f0, %f0
  376. ldd [$key + 0x100+0x60], %f36
  377. ldd [$key + 0x100+0x68], %f38
  378. des_round %f40, %f42, %f0, %f0
  379. ldd [$key + 0x100+0x70], %f40
  380. ldd [$key + 0x100+0x78], %f42
  381. des_round %f44, %f46, %f0, %f0
  382. des_round %f48, %f50, %f0, %f0
  383. des_round %f52, %f54, %f0, %f0
  384. des_round %f56, %f58, %f0, %f0
  385. des_round %f60, %f62, %f0, %f0
  386. des_round %f36, %f38, %f0, %f0
  387. des_round %f40, %f42, %f0, %f0
  388. des_iip %f0, %f0
  389. brnz,pn $omask, 2f
  390. sub $len, 1, $len
  391. std %f0, [$out + 0]
  392. brnz,pt $len, .Ldes_ede3_cbc_enc_loop
  393. add $out, 8, $out
  394. st %f0, [$ivec + 0] ! write out ivec
  395. retl
  396. st %f1, [$ivec + 4]
  397. .align 16
  398. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  399. ! and ~2x deterioration
  400. ! in inp==out case
  401. faligndata %f0, %f0, %f2 ! handle unaligned output
  402. stda %f2, [$out + $omask]0xc0 ! partial store
  403. add $out, 8, $out
  404. orn %g0, $omask, $omask
  405. stda %f2, [$out + $omask]0xc0 ! partial store
  406. brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
  407. orn %g0, $omask, $omask
  408. st %f0, [$ivec + 0] ! write out ivec
  409. retl
  410. st %f1, [$ivec + 4]
  411. .type des_t4_ede3_cbc_encrypt,#function
  412. .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
  413. .globl des_t4_ede3_cbc_decrypt
  414. .align 32
  415. des_t4_ede3_cbc_decrypt:
  416. cmp $len, 0
  417. be,pn $::size_t_cc, .Lcbc_abort
  418. srln $len, 0, $len ! needed on v8+, "nop" on v9
  419. ld [$ivec + 0], %f2 ! load ivec
  420. ld [$ivec + 4], %f3
  421. and $inp, 7, $ileft
  422. andn $inp, 7, $inp
  423. sll $ileft, 3, $ileft
  424. mov 0xff, $omask
  425. prefetch [$inp], 20
  426. prefetch [$inp + 63], 20
  427. sub %g0, $ileft, $iright
  428. and $out, 7, %g4
  429. alignaddrl $out, %g0, $out
  430. srl $omask, %g4, $omask
  431. srlx $len, 3, $len
  432. movrz %g4, 0, $omask
  433. prefetch [$out], 22
  434. ldd [$key + 0x100+0x78], %f4 ! load key schedule
  435. ldd [$key + 0x100+0x70], %f6
  436. ldd [$key + 0x100+0x68], %f8
  437. ldd [$key + 0x100+0x60], %f10
  438. ldd [$key + 0x100+0x58], %f12
  439. ldd [$key + 0x100+0x50], %f14
  440. ldd [$key + 0x100+0x48], %f16
  441. ldd [$key + 0x100+0x40], %f18
  442. ldd [$key + 0x100+0x38], %f20
  443. ldd [$key + 0x100+0x30], %f22
  444. ldd [$key + 0x100+0x28], %f24
  445. ldd [$key + 0x100+0x20], %f26
  446. ldd [$key + 0x100+0x18], %f28
  447. ldd [$key + 0x100+0x10], %f30
  448. ldd [$key + 0x100+0x08], %f32
  449. ldd [$key + 0x100+0x00], %f34
  450. .Ldes_ede3_cbc_dec_loop:
  451. ldx [$inp + 0], %g4
  452. brz,pt $ileft, 4f
  453. nop
  454. ldx [$inp + 8], %g5
  455. sllx %g4, $ileft, %g4
  456. srlx %g5, $iright, %g5
  457. or %g5, %g4, %g4
  458. 4:
  459. movxtod %g4, %f0
  460. prefetch [$inp + 8+63], 20
  461. add $inp, 8, $inp
  462. prefetch [$out + 63], 22
  463. des_ip %f0, %f0
  464. des_round %f4, %f6, %f0, %f0
  465. des_round %f8, %f10, %f0, %f0
  466. des_round %f12, %f14, %f0, %f0
  467. des_round %f16, %f18, %f0, %f0
  468. ldd [$key + 0x80+0x00], %f36
  469. ldd [$key + 0x80+0x08], %f38
  470. des_round %f20, %f22, %f0, %f0
  471. ldd [$key + 0x80+0x10], %f40
  472. ldd [$key + 0x80+0x18], %f42
  473. des_round %f24, %f26, %f0, %f0
  474. ldd [$key + 0x80+0x20], %f44
  475. ldd [$key + 0x80+0x28], %f46
  476. des_round %f28, %f30, %f0, %f0
  477. ldd [$key + 0x80+0x30], %f48
  478. ldd [$key + 0x80+0x38], %f50
  479. des_round %f32, %f34, %f0, %f0
  480. ldd [$key + 0x80+0x40], %f52
  481. ldd [$key + 0x80+0x48], %f54
  482. des_iip %f0, %f0
  483. ldd [$key + 0x80+0x50], %f56
  484. ldd [$key + 0x80+0x58], %f58
  485. des_ip %f0, %f0
  486. ldd [$key + 0x80+0x60], %f60
  487. ldd [$key + 0x80+0x68], %f62
  488. des_round %f36, %f38, %f0, %f0
  489. ldd [$key + 0x80+0x70], %f36
  490. ldd [$key + 0x80+0x78], %f38
  491. des_round %f40, %f42, %f0, %f0
  492. des_round %f44, %f46, %f0, %f0
  493. des_round %f48, %f50, %f0, %f0
  494. ldd [$key + 0x80-0x08], %f40
  495. ldd [$key + 0x80-0x10], %f42
  496. des_round %f52, %f54, %f0, %f0
  497. ldd [$key + 0x80-0x18], %f44
  498. ldd [$key + 0x80-0x20], %f46
  499. des_round %f56, %f58, %f0, %f0
  500. ldd [$key + 0x80-0x28], %f48
  501. ldd [$key + 0x80-0x30], %f50
  502. des_round %f60, %f62, %f0, %f0
  503. ldd [$key + 0x80-0x38], %f52
  504. ldd [$key + 0x80-0x40], %f54
  505. des_round %f36, %f38, %f0, %f0
  506. ldd [$key + 0x80-0x48], %f56
  507. ldd [$key + 0x80-0x50], %f58
  508. des_iip %f0, %f0
  509. ldd [$key + 0x80-0x58], %f60
  510. ldd [$key + 0x80-0x60], %f62
  511. des_ip %f0, %f0
  512. ldd [$key + 0x80-0x68], %f36
  513. ldd [$key + 0x80-0x70], %f38
  514. des_round %f40, %f42, %f0, %f0
  515. ldd [$key + 0x80-0x78], %f40
  516. ldd [$key + 0x80-0x80], %f42
  517. des_round %f44, %f46, %f0, %f0
  518. des_round %f48, %f50, %f0, %f0
  519. des_round %f52, %f54, %f0, %f0
  520. des_round %f56, %f58, %f0, %f0
  521. des_round %f60, %f62, %f0, %f0
  522. des_round %f36, %f38, %f0, %f0
  523. des_round %f40, %f42, %f0, %f0
  524. des_iip %f0, %f0
  525. fxor %f2, %f0, %f0 ! ^= ivec
  526. movxtod %g4, %f2
  527. brnz,pn $omask, 2f
  528. sub $len, 1, $len
  529. std %f0, [$out + 0]
  530. brnz,pt $len, .Ldes_ede3_cbc_dec_loop
  531. add $out, 8, $out
  532. st %f2, [$ivec + 0] ! write out ivec
  533. retl
  534. st %f3, [$ivec + 4]
  535. .align 16
  536. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  537. ! and ~3x deterioration
  538. ! in inp==out case
  539. faligndata %f0, %f0, %f0 ! handle unaligned output
  540. stda %f0, [$out + $omask]0xc0 ! partial store
  541. add $out, 8, $out
  542. orn %g0, $omask, $omask
  543. stda %f0, [$out + $omask]0xc0 ! partial store
  544. brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
  545. orn %g0, $omask, $omask
  546. st %f2, [$ivec + 0] ! write out ivec
  547. retl
  548. st %f3, [$ivec + 4]
  549. .type des_t4_ede3_cbc_decrypt,#function
  550. .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
  551. ___
  552. }
  553. $code.=<<___;
  554. .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"
  555. .align 4
  556. ___
  557. &emit_assembler();
  558. close STDOUT or die "error closing STDOUT: $!";