dest4-sparcv9.pl 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627
  1. #! /usr/bin/env perl
  2. # Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the OpenSSL license (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by David S. Miller and Andy Polyakov.
  10. # The module is licensed under 2-clause BSD
  11. # license. March 2013. All rights reserved.
  12. # ====================================================================
  13. ######################################################################
  14. # DES for SPARC T4.
  15. #
  16. # As with other hardware-assisted ciphers CBC encrypt results [for
  17. # aligned data] are virtually identical to critical path lengths:
  18. #
  19. # DES Triple-DES
  20. # CBC encrypt 4.14/4.15(*) 11.7/11.7
  21. # CBC decrypt 1.77/4.11(**) 6.42/7.47
  22. #
  23. # (*) numbers after slash are for
  24. # misaligned data;
  25. # (**) this is result for largest
  26. # block size, unlike all other
  27. # cases smaller blocks results
  28. # are better[?];
  29. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  30. push(@INC,"${dir}","${dir}../../perlasm");
  31. require "sparcv9_modes.pl";
  32. $output=pop;
  33. open STDOUT,">$output";
  34. $code.=<<___;
  35. #include "sparc_arch.h"
  36. #ifdef __arch64__
  37. .register %g2,#scratch
  38. .register %g3,#scratch
  39. #endif
  40. .text
  41. ___
  42. { my ($inp,$out)=("%o0","%o1");
  43. $code.=<<___;
  44. .align 32
  45. .globl des_t4_key_expand
  46. .type des_t4_key_expand,#function
  47. des_t4_key_expand:
  48. andcc $inp, 0x7, %g0
  49. alignaddr $inp, %g0, $inp
  50. bz,pt %icc, 1f
  51. ldd [$inp + 0x00], %f0
  52. ldd [$inp + 0x08], %f2
  53. faligndata %f0, %f2, %f0
  54. 1: des_kexpand %f0, 0, %f0
  55. des_kexpand %f0, 1, %f2
  56. std %f0, [$out + 0x00]
  57. des_kexpand %f2, 3, %f6
  58. std %f2, [$out + 0x08]
  59. des_kexpand %f2, 2, %f4
  60. des_kexpand %f6, 3, %f10
  61. std %f6, [$out + 0x18]
  62. des_kexpand %f6, 2, %f8
  63. std %f4, [$out + 0x10]
  64. des_kexpand %f10, 3, %f14
  65. std %f10, [$out + 0x28]
  66. des_kexpand %f10, 2, %f12
  67. std %f8, [$out + 0x20]
  68. des_kexpand %f14, 1, %f16
  69. std %f14, [$out + 0x38]
  70. des_kexpand %f16, 3, %f20
  71. std %f12, [$out + 0x30]
  72. des_kexpand %f16, 2, %f18
  73. std %f16, [$out + 0x40]
  74. des_kexpand %f20, 3, %f24
  75. std %f20, [$out + 0x50]
  76. des_kexpand %f20, 2, %f22
  77. std %f18, [$out + 0x48]
  78. des_kexpand %f24, 3, %f28
  79. std %f24, [$out + 0x60]
  80. des_kexpand %f24, 2, %f26
  81. std %f22, [$out + 0x58]
  82. des_kexpand %f28, 1, %f30
  83. std %f28, [$out + 0x70]
  84. std %f26, [$out + 0x68]
  85. retl
  86. std %f30, [$out + 0x78]
  87. .size des_t4_key_expand,.-des_t4_key_expand
  88. ___
  89. }
  90. { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
  91. my ($ileft,$iright,$omask) = map("%g$_",(1..3));
  92. $code.=<<___;
  93. .globl des_t4_cbc_encrypt
  94. .align 32
  95. des_t4_cbc_encrypt:
  96. cmp $len, 0
  97. be,pn $::size_t_cc, .Lcbc_abort
  98. srln $len, 0, $len ! needed on v8+, "nop" on v9
  99. ld [$ivec + 0], %f0 ! load ivec
  100. ld [$ivec + 4], %f1
  101. and $inp, 7, $ileft
  102. andn $inp, 7, $inp
  103. sll $ileft, 3, $ileft
  104. mov 0xff, $omask
  105. prefetch [$inp], 20
  106. prefetch [$inp + 63], 20
  107. sub %g0, $ileft, $iright
  108. and $out, 7, %g4
  109. alignaddrl $out, %g0, $out
  110. srl $omask, %g4, $omask
  111. srlx $len, 3, $len
  112. movrz %g4, 0, $omask
  113. prefetch [$out], 22
  114. ldd [$key + 0x00], %f4 ! load key schedule
  115. ldd [$key + 0x08], %f6
  116. ldd [$key + 0x10], %f8
  117. ldd [$key + 0x18], %f10
  118. ldd [$key + 0x20], %f12
  119. ldd [$key + 0x28], %f14
  120. ldd [$key + 0x30], %f16
  121. ldd [$key + 0x38], %f18
  122. ldd [$key + 0x40], %f20
  123. ldd [$key + 0x48], %f22
  124. ldd [$key + 0x50], %f24
  125. ldd [$key + 0x58], %f26
  126. ldd [$key + 0x60], %f28
  127. ldd [$key + 0x68], %f30
  128. ldd [$key + 0x70], %f32
  129. ldd [$key + 0x78], %f34
  130. .Ldes_cbc_enc_loop:
  131. ldx [$inp + 0], %g4
  132. brz,pt $ileft, 4f
  133. nop
  134. ldx [$inp + 8], %g5
  135. sllx %g4, $ileft, %g4
  136. srlx %g5, $iright, %g5
  137. or %g5, %g4, %g4
  138. 4:
  139. movxtod %g4, %f2
  140. prefetch [$inp + 8+63], 20
  141. add $inp, 8, $inp
  142. fxor %f2, %f0, %f0 ! ^= ivec
  143. prefetch [$out + 63], 22
  144. des_ip %f0, %f0
  145. des_round %f4, %f6, %f0, %f0
  146. des_round %f8, %f10, %f0, %f0
  147. des_round %f12, %f14, %f0, %f0
  148. des_round %f16, %f18, %f0, %f0
  149. des_round %f20, %f22, %f0, %f0
  150. des_round %f24, %f26, %f0, %f0
  151. des_round %f28, %f30, %f0, %f0
  152. des_round %f32, %f34, %f0, %f0
  153. des_iip %f0, %f0
  154. brnz,pn $omask, 2f
  155. sub $len, 1, $len
  156. std %f0, [$out + 0]
  157. brnz,pt $len, .Ldes_cbc_enc_loop
  158. add $out, 8, $out
  159. st %f0, [$ivec + 0] ! write out ivec
  160. retl
  161. st %f1, [$ivec + 4]
  162. .Lcbc_abort:
  163. retl
  164. nop
  165. .align 16
  166. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  167. ! and ~4x deterioration
  168. ! in inp==out case
  169. faligndata %f0, %f0, %f2 ! handle unaligned output
  170. stda %f2, [$out + $omask]0xc0 ! partial store
  171. add $out, 8, $out
  172. orn %g0, $omask, $omask
  173. stda %f2, [$out + $omask]0xc0 ! partial store
  174. brnz,pt $len, .Ldes_cbc_enc_loop+4
  175. orn %g0, $omask, $omask
  176. st %f0, [$ivec + 0] ! write out ivec
  177. retl
  178. st %f1, [$ivec + 4]
  179. .type des_t4_cbc_encrypt,#function
  180. .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
  181. .globl des_t4_cbc_decrypt
  182. .align 32
  183. des_t4_cbc_decrypt:
  184. cmp $len, 0
  185. be,pn $::size_t_cc, .Lcbc_abort
  186. srln $len, 0, $len ! needed on v8+, "nop" on v9
  187. ld [$ivec + 0], %f2 ! load ivec
  188. ld [$ivec + 4], %f3
  189. and $inp, 7, $ileft
  190. andn $inp, 7, $inp
  191. sll $ileft, 3, $ileft
  192. mov 0xff, $omask
  193. prefetch [$inp], 20
  194. prefetch [$inp + 63], 20
  195. sub %g0, $ileft, $iright
  196. and $out, 7, %g4
  197. alignaddrl $out, %g0, $out
  198. srl $omask, %g4, $omask
  199. srlx $len, 3, $len
  200. movrz %g4, 0, $omask
  201. prefetch [$out], 22
  202. ldd [$key + 0x78], %f4 ! load key schedule
  203. ldd [$key + 0x70], %f6
  204. ldd [$key + 0x68], %f8
  205. ldd [$key + 0x60], %f10
  206. ldd [$key + 0x58], %f12
  207. ldd [$key + 0x50], %f14
  208. ldd [$key + 0x48], %f16
  209. ldd [$key + 0x40], %f18
  210. ldd [$key + 0x38], %f20
  211. ldd [$key + 0x30], %f22
  212. ldd [$key + 0x28], %f24
  213. ldd [$key + 0x20], %f26
  214. ldd [$key + 0x18], %f28
  215. ldd [$key + 0x10], %f30
  216. ldd [$key + 0x08], %f32
  217. ldd [$key + 0x00], %f34
  218. .Ldes_cbc_dec_loop:
  219. ldx [$inp + 0], %g4
  220. brz,pt $ileft, 4f
  221. nop
  222. ldx [$inp + 8], %g5
  223. sllx %g4, $ileft, %g4
  224. srlx %g5, $iright, %g5
  225. or %g5, %g4, %g4
  226. 4:
  227. movxtod %g4, %f0
  228. prefetch [$inp + 8+63], 20
  229. add $inp, 8, $inp
  230. prefetch [$out + 63], 22
  231. des_ip %f0, %f0
  232. des_round %f4, %f6, %f0, %f0
  233. des_round %f8, %f10, %f0, %f0
  234. des_round %f12, %f14, %f0, %f0
  235. des_round %f16, %f18, %f0, %f0
  236. des_round %f20, %f22, %f0, %f0
  237. des_round %f24, %f26, %f0, %f0
  238. des_round %f28, %f30, %f0, %f0
  239. des_round %f32, %f34, %f0, %f0
  240. des_iip %f0, %f0
  241. fxor %f2, %f0, %f0 ! ^= ivec
  242. movxtod %g4, %f2
  243. brnz,pn $omask, 2f
  244. sub $len, 1, $len
  245. std %f0, [$out + 0]
  246. brnz,pt $len, .Ldes_cbc_dec_loop
  247. add $out, 8, $out
  248. st %f2, [$ivec + 0] ! write out ivec
  249. retl
  250. st %f3, [$ivec + 4]
  251. .align 16
  252. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  253. ! and ~4x deterioration
  254. ! in inp==out case
  255. faligndata %f0, %f0, %f0 ! handle unaligned output
  256. stda %f0, [$out + $omask]0xc0 ! partial store
  257. add $out, 8, $out
  258. orn %g0, $omask, $omask
  259. stda %f0, [$out + $omask]0xc0 ! partial store
  260. brnz,pt $len, .Ldes_cbc_dec_loop+4
  261. orn %g0, $omask, $omask
  262. st %f2, [$ivec + 0] ! write out ivec
  263. retl
  264. st %f3, [$ivec + 4]
  265. .type des_t4_cbc_decrypt,#function
  266. .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
  267. ___
  268. # One might wonder why does one have back-to-back des_iip/des_ip
  269. # pairs between EDE passes. Indeed, aren't they inverse of each other?
  270. # They almost are. Outcome of the pair is 32-bit words being swapped
  271. # in target register. Consider pair of des_iip/des_ip as a way to
  272. # perform the due swap, it's actually fastest way in this case.
  273. $code.=<<___;
  274. .globl des_t4_ede3_cbc_encrypt
  275. .align 32
  276. des_t4_ede3_cbc_encrypt:
  277. cmp $len, 0
  278. be,pn $::size_t_cc, .Lcbc_abort
  279. srln $len, 0, $len ! needed on v8+, "nop" on v9
  280. ld [$ivec + 0], %f0 ! load ivec
  281. ld [$ivec + 4], %f1
  282. and $inp, 7, $ileft
  283. andn $inp, 7, $inp
  284. sll $ileft, 3, $ileft
  285. mov 0xff, $omask
  286. prefetch [$inp], 20
  287. prefetch [$inp + 63], 20
  288. sub %g0, $ileft, $iright
  289. and $out, 7, %g4
  290. alignaddrl $out, %g0, $out
  291. srl $omask, %g4, $omask
  292. srlx $len, 3, $len
  293. movrz %g4, 0, $omask
  294. prefetch [$out], 22
  295. ldd [$key + 0x00], %f4 ! load key schedule
  296. ldd [$key + 0x08], %f6
  297. ldd [$key + 0x10], %f8
  298. ldd [$key + 0x18], %f10
  299. ldd [$key + 0x20], %f12
  300. ldd [$key + 0x28], %f14
  301. ldd [$key + 0x30], %f16
  302. ldd [$key + 0x38], %f18
  303. ldd [$key + 0x40], %f20
  304. ldd [$key + 0x48], %f22
  305. ldd [$key + 0x50], %f24
  306. ldd [$key + 0x58], %f26
  307. ldd [$key + 0x60], %f28
  308. ldd [$key + 0x68], %f30
  309. ldd [$key + 0x70], %f32
  310. ldd [$key + 0x78], %f34
  311. .Ldes_ede3_cbc_enc_loop:
  312. ldx [$inp + 0], %g4
  313. brz,pt $ileft, 4f
  314. nop
  315. ldx [$inp + 8], %g5
  316. sllx %g4, $ileft, %g4
  317. srlx %g5, $iright, %g5
  318. or %g5, %g4, %g4
  319. 4:
  320. movxtod %g4, %f2
  321. prefetch [$inp + 8+63], 20
  322. add $inp, 8, $inp
  323. fxor %f2, %f0, %f0 ! ^= ivec
  324. prefetch [$out + 63], 22
  325. des_ip %f0, %f0
  326. des_round %f4, %f6, %f0, %f0
  327. des_round %f8, %f10, %f0, %f0
  328. des_round %f12, %f14, %f0, %f0
  329. des_round %f16, %f18, %f0, %f0
  330. ldd [$key + 0x100-0x08], %f36
  331. ldd [$key + 0x100-0x10], %f38
  332. des_round %f20, %f22, %f0, %f0
  333. ldd [$key + 0x100-0x18], %f40
  334. ldd [$key + 0x100-0x20], %f42
  335. des_round %f24, %f26, %f0, %f0
  336. ldd [$key + 0x100-0x28], %f44
  337. ldd [$key + 0x100-0x30], %f46
  338. des_round %f28, %f30, %f0, %f0
  339. ldd [$key + 0x100-0x38], %f48
  340. ldd [$key + 0x100-0x40], %f50
  341. des_round %f32, %f34, %f0, %f0
  342. ldd [$key + 0x100-0x48], %f52
  343. ldd [$key + 0x100-0x50], %f54
  344. des_iip %f0, %f0
  345. ldd [$key + 0x100-0x58], %f56
  346. ldd [$key + 0x100-0x60], %f58
  347. des_ip %f0, %f0
  348. ldd [$key + 0x100-0x68], %f60
  349. ldd [$key + 0x100-0x70], %f62
  350. des_round %f36, %f38, %f0, %f0
  351. ldd [$key + 0x100-0x78], %f36
  352. ldd [$key + 0x100-0x80], %f38
  353. des_round %f40, %f42, %f0, %f0
  354. des_round %f44, %f46, %f0, %f0
  355. des_round %f48, %f50, %f0, %f0
  356. ldd [$key + 0x100+0x00], %f40
  357. ldd [$key + 0x100+0x08], %f42
  358. des_round %f52, %f54, %f0, %f0
  359. ldd [$key + 0x100+0x10], %f44
  360. ldd [$key + 0x100+0x18], %f46
  361. des_round %f56, %f58, %f0, %f0
  362. ldd [$key + 0x100+0x20], %f48
  363. ldd [$key + 0x100+0x28], %f50
  364. des_round %f60, %f62, %f0, %f0
  365. ldd [$key + 0x100+0x30], %f52
  366. ldd [$key + 0x100+0x38], %f54
  367. des_round %f36, %f38, %f0, %f0
  368. ldd [$key + 0x100+0x40], %f56
  369. ldd [$key + 0x100+0x48], %f58
  370. des_iip %f0, %f0
  371. ldd [$key + 0x100+0x50], %f60
  372. ldd [$key + 0x100+0x58], %f62
  373. des_ip %f0, %f0
  374. ldd [$key + 0x100+0x60], %f36
  375. ldd [$key + 0x100+0x68], %f38
  376. des_round %f40, %f42, %f0, %f0
  377. ldd [$key + 0x100+0x70], %f40
  378. ldd [$key + 0x100+0x78], %f42
  379. des_round %f44, %f46, %f0, %f0
  380. des_round %f48, %f50, %f0, %f0
  381. des_round %f52, %f54, %f0, %f0
  382. des_round %f56, %f58, %f0, %f0
  383. des_round %f60, %f62, %f0, %f0
  384. des_round %f36, %f38, %f0, %f0
  385. des_round %f40, %f42, %f0, %f0
  386. des_iip %f0, %f0
  387. brnz,pn $omask, 2f
  388. sub $len, 1, $len
  389. std %f0, [$out + 0]
  390. brnz,pt $len, .Ldes_ede3_cbc_enc_loop
  391. add $out, 8, $out
  392. st %f0, [$ivec + 0] ! write out ivec
  393. retl
  394. st %f1, [$ivec + 4]
  395. .align 16
  396. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  397. ! and ~2x deterioration
  398. ! in inp==out case
  399. faligndata %f0, %f0, %f2 ! handle unaligned output
  400. stda %f2, [$out + $omask]0xc0 ! partial store
  401. add $out, 8, $out
  402. orn %g0, $omask, $omask
  403. stda %f2, [$out + $omask]0xc0 ! partial store
  404. brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
  405. orn %g0, $omask, $omask
  406. st %f0, [$ivec + 0] ! write out ivec
  407. retl
  408. st %f1, [$ivec + 4]
  409. .type des_t4_ede3_cbc_encrypt,#function
  410. .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
  411. .globl des_t4_ede3_cbc_decrypt
  412. .align 32
  413. des_t4_ede3_cbc_decrypt:
  414. cmp $len, 0
  415. be,pn $::size_t_cc, .Lcbc_abort
  416. srln $len, 0, $len ! needed on v8+, "nop" on v9
  417. ld [$ivec + 0], %f2 ! load ivec
  418. ld [$ivec + 4], %f3
  419. and $inp, 7, $ileft
  420. andn $inp, 7, $inp
  421. sll $ileft, 3, $ileft
  422. mov 0xff, $omask
  423. prefetch [$inp], 20
  424. prefetch [$inp + 63], 20
  425. sub %g0, $ileft, $iright
  426. and $out, 7, %g4
  427. alignaddrl $out, %g0, $out
  428. srl $omask, %g4, $omask
  429. srlx $len, 3, $len
  430. movrz %g4, 0, $omask
  431. prefetch [$out], 22
  432. ldd [$key + 0x100+0x78], %f4 ! load key schedule
  433. ldd [$key + 0x100+0x70], %f6
  434. ldd [$key + 0x100+0x68], %f8
  435. ldd [$key + 0x100+0x60], %f10
  436. ldd [$key + 0x100+0x58], %f12
  437. ldd [$key + 0x100+0x50], %f14
  438. ldd [$key + 0x100+0x48], %f16
  439. ldd [$key + 0x100+0x40], %f18
  440. ldd [$key + 0x100+0x38], %f20
  441. ldd [$key + 0x100+0x30], %f22
  442. ldd [$key + 0x100+0x28], %f24
  443. ldd [$key + 0x100+0x20], %f26
  444. ldd [$key + 0x100+0x18], %f28
  445. ldd [$key + 0x100+0x10], %f30
  446. ldd [$key + 0x100+0x08], %f32
  447. ldd [$key + 0x100+0x00], %f34
  448. .Ldes_ede3_cbc_dec_loop:
  449. ldx [$inp + 0], %g4
  450. brz,pt $ileft, 4f
  451. nop
  452. ldx [$inp + 8], %g5
  453. sllx %g4, $ileft, %g4
  454. srlx %g5, $iright, %g5
  455. or %g5, %g4, %g4
  456. 4:
  457. movxtod %g4, %f0
  458. prefetch [$inp + 8+63], 20
  459. add $inp, 8, $inp
  460. prefetch [$out + 63], 22
  461. des_ip %f0, %f0
  462. des_round %f4, %f6, %f0, %f0
  463. des_round %f8, %f10, %f0, %f0
  464. des_round %f12, %f14, %f0, %f0
  465. des_round %f16, %f18, %f0, %f0
  466. ldd [$key + 0x80+0x00], %f36
  467. ldd [$key + 0x80+0x08], %f38
  468. des_round %f20, %f22, %f0, %f0
  469. ldd [$key + 0x80+0x10], %f40
  470. ldd [$key + 0x80+0x18], %f42
  471. des_round %f24, %f26, %f0, %f0
  472. ldd [$key + 0x80+0x20], %f44
  473. ldd [$key + 0x80+0x28], %f46
  474. des_round %f28, %f30, %f0, %f0
  475. ldd [$key + 0x80+0x30], %f48
  476. ldd [$key + 0x80+0x38], %f50
  477. des_round %f32, %f34, %f0, %f0
  478. ldd [$key + 0x80+0x40], %f52
  479. ldd [$key + 0x80+0x48], %f54
  480. des_iip %f0, %f0
  481. ldd [$key + 0x80+0x50], %f56
  482. ldd [$key + 0x80+0x58], %f58
  483. des_ip %f0, %f0
  484. ldd [$key + 0x80+0x60], %f60
  485. ldd [$key + 0x80+0x68], %f62
  486. des_round %f36, %f38, %f0, %f0
  487. ldd [$key + 0x80+0x70], %f36
  488. ldd [$key + 0x80+0x78], %f38
  489. des_round %f40, %f42, %f0, %f0
  490. des_round %f44, %f46, %f0, %f0
  491. des_round %f48, %f50, %f0, %f0
  492. ldd [$key + 0x80-0x08], %f40
  493. ldd [$key + 0x80-0x10], %f42
  494. des_round %f52, %f54, %f0, %f0
  495. ldd [$key + 0x80-0x18], %f44
  496. ldd [$key + 0x80-0x20], %f46
  497. des_round %f56, %f58, %f0, %f0
  498. ldd [$key + 0x80-0x28], %f48
  499. ldd [$key + 0x80-0x30], %f50
  500. des_round %f60, %f62, %f0, %f0
  501. ldd [$key + 0x80-0x38], %f52
  502. ldd [$key + 0x80-0x40], %f54
  503. des_round %f36, %f38, %f0, %f0
  504. ldd [$key + 0x80-0x48], %f56
  505. ldd [$key + 0x80-0x50], %f58
  506. des_iip %f0, %f0
  507. ldd [$key + 0x80-0x58], %f60
  508. ldd [$key + 0x80-0x60], %f62
  509. des_ip %f0, %f0
  510. ldd [$key + 0x80-0x68], %f36
  511. ldd [$key + 0x80-0x70], %f38
  512. des_round %f40, %f42, %f0, %f0
  513. ldd [$key + 0x80-0x78], %f40
  514. ldd [$key + 0x80-0x80], %f42
  515. des_round %f44, %f46, %f0, %f0
  516. des_round %f48, %f50, %f0, %f0
  517. des_round %f52, %f54, %f0, %f0
  518. des_round %f56, %f58, %f0, %f0
  519. des_round %f60, %f62, %f0, %f0
  520. des_round %f36, %f38, %f0, %f0
  521. des_round %f40, %f42, %f0, %f0
  522. des_iip %f0, %f0
  523. fxor %f2, %f0, %f0 ! ^= ivec
  524. movxtod %g4, %f2
  525. brnz,pn $omask, 2f
  526. sub $len, 1, $len
  527. std %f0, [$out + 0]
  528. brnz,pt $len, .Ldes_ede3_cbc_dec_loop
  529. add $out, 8, $out
  530. st %f2, [$ivec + 0] ! write out ivec
  531. retl
  532. st %f3, [$ivec + 4]
  533. .align 16
  534. 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
  535. ! and ~3x deterioration
  536. ! in inp==out case
  537. faligndata %f0, %f0, %f0 ! handle unaligned output
  538. stda %f0, [$out + $omask]0xc0 ! partial store
  539. add $out, 8, $out
  540. orn %g0, $omask, $omask
  541. stda %f0, [$out + $omask]0xc0 ! partial store
  542. brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
  543. orn %g0, $omask, $omask
  544. st %f2, [$ivec + 0] ! write out ivec
  545. retl
  546. st %f3, [$ivec + 4]
  547. .type des_t4_ede3_cbc_decrypt,#function
  548. .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
  549. ___
  550. }
  551. $code.=<<___;
  552. .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"
  553. .align 4
  554. ___
  555. &emit_assembler();
  556. close STDOUT;