123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617 |
- #!/usr/bin/env perl
- # ====================================================================
- # Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
- # <appro@openssl.org>. The module is licensed under 2-clause BSD
- # license. March 2013. All rights reserved.
- # ====================================================================
- ######################################################################
- # DES for SPARC T4.
- #
- # As with other hardware-assisted ciphers CBC encrypt results [for
- # aligned data] are virtually identical to critical path lengths:
- #
- # DES Triple-DES
- # CBC encrypt 4.14/4.15(*) 11.7/11.7
- # CBC decrypt 1.77/4.11(**) 6.42/7.47
- #
- # (*) numbers after slash are for
- # misaligned data;
- # (**) this is result for largest
- # block size, unlike all other
- # cases smaller blocks results
- # are better[?];
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
- push(@INC,"${dir}","${dir}../../perlasm");
- require "sparcv9_modes.pl";
- &asm_init(@ARGV);
- $code.=<<___ if ($::abibits==64);
- .register %g2,#scratch
- .register %g3,#scratch
- ___
- $code.=<<___;
- .text
- ___
- { my ($inp,$out)=("%o0","%o1");
- $code.=<<___;
- .align 32
- .globl des_t4_key_expand
- .type des_t4_key_expand,#function
- des_t4_key_expand:
- andcc $inp, 0x7, %g0
- alignaddr $inp, %g0, $inp
- bz,pt %icc, 1f
- ldd [$inp + 0x00], %f0
- ldd [$inp + 0x08], %f2
- faligndata %f0, %f2, %f0
- 1: des_kexpand %f0, 0, %f0
- des_kexpand %f0, 1, %f2
- std %f0, [$out + 0x00]
- des_kexpand %f2, 3, %f6
- std %f2, [$out + 0x08]
- des_kexpand %f2, 2, %f4
- des_kexpand %f6, 3, %f10
- std %f6, [$out + 0x18]
- des_kexpand %f6, 2, %f8
- std %f4, [$out + 0x10]
- des_kexpand %f10, 3, %f14
- std %f10, [$out + 0x28]
- des_kexpand %f10, 2, %f12
- std %f8, [$out + 0x20]
- des_kexpand %f14, 1, %f16
- std %f14, [$out + 0x38]
- des_kexpand %f16, 3, %f20
- std %f12, [$out + 0x30]
- des_kexpand %f16, 2, %f18
- std %f16, [$out + 0x40]
- des_kexpand %f20, 3, %f24
- std %f20, [$out + 0x50]
- des_kexpand %f20, 2, %f22
- std %f18, [$out + 0x48]
- des_kexpand %f24, 3, %f28
- std %f24, [$out + 0x60]
- des_kexpand %f24, 2, %f26
- std %f22, [$out + 0x58]
- des_kexpand %f28, 1, %f30
- std %f28, [$out + 0x70]
- std %f26, [$out + 0x68]
- retl
- std %f30, [$out + 0x78]
- .size des_t4_key_expand,.-des_t4_key_expand
- ___
- }
- { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
- my ($ileft,$iright,$omask) = map("%g$_",(1..3));
- $code.=<<___;
- .globl des_t4_cbc_encrypt
- .align 32
- des_t4_cbc_encrypt:
- cmp $len, 0
- be,pn $::size_t_cc, .Lcbc_abort
- nop
- ld [$ivec + 0], %f0 ! load ivec
- ld [$ivec + 4], %f1
- and $inp, 7, $ileft
- andn $inp, 7, $inp
- sll $ileft, 3, $ileft
- mov 0xff, $omask
- prefetch [$inp], 20
- prefetch [$inp + 63], 20
- sub %g0, $ileft, $iright
- and $out, 7, %g4
- alignaddrl $out, %g0, $out
- srl $omask, %g4, $omask
- srlx $len, 3, $len
- movrz %g4, 0, $omask
- prefetch [$out], 22
- ldd [$key + 0x00], %f4 ! load key schedule
- ldd [$key + 0x08], %f6
- ldd [$key + 0x10], %f8
- ldd [$key + 0x18], %f10
- ldd [$key + 0x20], %f12
- ldd [$key + 0x28], %f14
- ldd [$key + 0x30], %f16
- ldd [$key + 0x38], %f18
- ldd [$key + 0x40], %f20
- ldd [$key + 0x48], %f22
- ldd [$key + 0x50], %f24
- ldd [$key + 0x58], %f26
- ldd [$key + 0x60], %f28
- ldd [$key + 0x68], %f30
- ldd [$key + 0x70], %f32
- ldd [$key + 0x78], %f34
- .Ldes_cbc_enc_loop:
- ldx [$inp + 0], %g4
- brz,pt $ileft, 4f
- nop
- ldx [$inp + 8], %g5
- sllx %g4, $ileft, %g4
- srlx %g5, $iright, %g5
- or %g5, %g4, %g4
- 4:
- movxtod %g4, %f2
- prefetch [$inp + 8+63], 20
- add $inp, 8, $inp
- fxor %f2, %f0, %f0 ! ^= ivec
- prefetch [$out + 63], 22
- des_ip %f0, %f0
- des_round %f4, %f6, %f0, %f0
- des_round %f8, %f10, %f0, %f0
- des_round %f12, %f14, %f0, %f0
- des_round %f16, %f18, %f0, %f0
- des_round %f20, %f22, %f0, %f0
- des_round %f24, %f26, %f0, %f0
- des_round %f28, %f30, %f0, %f0
- des_round %f32, %f34, %f0, %f0
- des_iip %f0, %f0
- brnz,pn $omask, 2f
- sub $len, 1, $len
- std %f0, [$out + 0]
- brnz,pt $len, .Ldes_cbc_enc_loop
- add $out, 8, $out
- st %f0, [$ivec + 0] ! write out ivec
- retl
- st %f1, [$ivec + 4]
- .Lcbc_abort:
- retl
- nop
- .align 16
- 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
- ! and ~4x deterioration
- ! in inp==out case
- faligndata %f0, %f0, %f2 ! handle unaligned output
- stda %f2, [$out + $omask]0xc0 ! partial store
- add $out, 8, $out
- orn %g0, $omask, $omask
- stda %f2, [$out + $omask]0xc0 ! partial store
- brnz,pt $len, .Ldes_cbc_enc_loop+4
- orn %g0, $omask, $omask
- st %f0, [$ivec + 0] ! write out ivec
- retl
- st %f1, [$ivec + 4]
- .type des_t4_cbc_encrypt,#function
- .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
- .globl des_t4_cbc_decrypt
- .align 32
- des_t4_cbc_decrypt:
- cmp $len, 0
- be,pn $::size_t_cc, .Lcbc_abort
- nop
- ld [$ivec + 0], %f2 ! load ivec
- ld [$ivec + 4], %f3
- and $inp, 7, $ileft
- andn $inp, 7, $inp
- sll $ileft, 3, $ileft
- mov 0xff, $omask
- prefetch [$inp], 20
- prefetch [$inp + 63], 20
- sub %g0, $ileft, $iright
- and $out, 7, %g4
- alignaddrl $out, %g0, $out
- srl $omask, %g4, $omask
- srlx $len, 3, $len
- movrz %g4, 0, $omask
- prefetch [$out], 22
- ldd [$key + 0x78], %f4 ! load key schedule
- ldd [$key + 0x70], %f6
- ldd [$key + 0x68], %f8
- ldd [$key + 0x60], %f10
- ldd [$key + 0x58], %f12
- ldd [$key + 0x50], %f14
- ldd [$key + 0x48], %f16
- ldd [$key + 0x40], %f18
- ldd [$key + 0x38], %f20
- ldd [$key + 0x30], %f22
- ldd [$key + 0x28], %f24
- ldd [$key + 0x20], %f26
- ldd [$key + 0x18], %f28
- ldd [$key + 0x10], %f30
- ldd [$key + 0x08], %f32
- ldd [$key + 0x00], %f34
- .Ldes_cbc_dec_loop:
- ldx [$inp + 0], %g4
- brz,pt $ileft, 4f
- nop
- ldx [$inp + 8], %g5
- sllx %g4, $ileft, %g4
- srlx %g5, $iright, %g5
- or %g5, %g4, %g4
- 4:
- movxtod %g4, %f0
- prefetch [$inp + 8+63], 20
- add $inp, 8, $inp
- prefetch [$out + 63], 22
- des_ip %f0, %f0
- des_round %f4, %f6, %f0, %f0
- des_round %f8, %f10, %f0, %f0
- des_round %f12, %f14, %f0, %f0
- des_round %f16, %f18, %f0, %f0
- des_round %f20, %f22, %f0, %f0
- des_round %f24, %f26, %f0, %f0
- des_round %f28, %f30, %f0, %f0
- des_round %f32, %f34, %f0, %f0
- des_iip %f0, %f0
- fxor %f2, %f0, %f0 ! ^= ivec
- movxtod %g4, %f2
- brnz,pn $omask, 2f
- sub $len, 1, $len
- std %f0, [$out + 0]
- brnz,pt $len, .Ldes_cbc_dec_loop
- add $out, 8, $out
- st %f2, [$ivec + 0] ! write out ivec
- retl
- st %f3, [$ivec + 4]
- .align 16
- 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
- ! and ~4x deterioration
- ! in inp==out case
- faligndata %f0, %f0, %f0 ! handle unaligned output
- stda %f0, [$out + $omask]0xc0 ! partial store
- add $out, 8, $out
- orn %g0, $omask, $omask
- stda %f0, [$out + $omask]0xc0 ! partial store
- brnz,pt $len, .Ldes_cbc_dec_loop+4
- orn %g0, $omask, $omask
- st %f2, [$ivec + 0] ! write out ivec
- retl
- st %f3, [$ivec + 4]
- .type des_t4_cbc_decrypt,#function
- .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
- ___
- # One might wonder why does one have back-to-back des_iip/des_ip
- # pairs between EDE passes. Indeed, aren't they inverse of each other?
- # They almost are. Outcome of the pair is 32-bit words being swapped
- # in target register. Consider pair of des_iip/des_ip as a way to
- # perform the due swap, it's actually fastest way in this case.
- $code.=<<___;
- .globl des_t4_ede3_cbc_encrypt
- .align 32
- des_t4_ede3_cbc_encrypt:
- cmp $len, 0
- be,pn $::size_t_cc, .Lcbc_abort
- nop
- ld [$ivec + 0], %f0 ! load ivec
- ld [$ivec + 4], %f1
- and $inp, 7, $ileft
- andn $inp, 7, $inp
- sll $ileft, 3, $ileft
- mov 0xff, $omask
- prefetch [$inp], 20
- prefetch [$inp + 63], 20
- sub %g0, $ileft, $iright
- and $out, 7, %g4
- alignaddrl $out, %g0, $out
- srl $omask, %g4, $omask
- srlx $len, 3, $len
- movrz %g4, 0, $omask
- prefetch [$out], 22
- ldd [$key + 0x00], %f4 ! load key schedule
- ldd [$key + 0x08], %f6
- ldd [$key + 0x10], %f8
- ldd [$key + 0x18], %f10
- ldd [$key + 0x20], %f12
- ldd [$key + 0x28], %f14
- ldd [$key + 0x30], %f16
- ldd [$key + 0x38], %f18
- ldd [$key + 0x40], %f20
- ldd [$key + 0x48], %f22
- ldd [$key + 0x50], %f24
- ldd [$key + 0x58], %f26
- ldd [$key + 0x60], %f28
- ldd [$key + 0x68], %f30
- ldd [$key + 0x70], %f32
- ldd [$key + 0x78], %f34
- .Ldes_ede3_cbc_enc_loop:
- ldx [$inp + 0], %g4
- brz,pt $ileft, 4f
- nop
- ldx [$inp + 8], %g5
- sllx %g4, $ileft, %g4
- srlx %g5, $iright, %g5
- or %g5, %g4, %g4
- 4:
- movxtod %g4, %f2
- prefetch [$inp + 8+63], 20
- add $inp, 8, $inp
- fxor %f2, %f0, %f0 ! ^= ivec
- prefetch [$out + 63], 22
- des_ip %f0, %f0
- des_round %f4, %f6, %f0, %f0
- des_round %f8, %f10, %f0, %f0
- des_round %f12, %f14, %f0, %f0
- des_round %f16, %f18, %f0, %f0
- ldd [$key + 0x100-0x08], %f36
- ldd [$key + 0x100-0x10], %f38
- des_round %f20, %f22, %f0, %f0
- ldd [$key + 0x100-0x18], %f40
- ldd [$key + 0x100-0x20], %f42
- des_round %f24, %f26, %f0, %f0
- ldd [$key + 0x100-0x28], %f44
- ldd [$key + 0x100-0x30], %f46
- des_round %f28, %f30, %f0, %f0
- ldd [$key + 0x100-0x38], %f48
- ldd [$key + 0x100-0x40], %f50
- des_round %f32, %f34, %f0, %f0
- ldd [$key + 0x100-0x48], %f52
- ldd [$key + 0x100-0x50], %f54
- des_iip %f0, %f0
- ldd [$key + 0x100-0x58], %f56
- ldd [$key + 0x100-0x60], %f58
- des_ip %f0, %f0
- ldd [$key + 0x100-0x68], %f60
- ldd [$key + 0x100-0x70], %f62
- des_round %f36, %f38, %f0, %f0
- ldd [$key + 0x100-0x78], %f36
- ldd [$key + 0x100-0x80], %f38
- des_round %f40, %f42, %f0, %f0
- des_round %f44, %f46, %f0, %f0
- des_round %f48, %f50, %f0, %f0
- ldd [$key + 0x100+0x00], %f40
- ldd [$key + 0x100+0x08], %f42
- des_round %f52, %f54, %f0, %f0
- ldd [$key + 0x100+0x10], %f44
- ldd [$key + 0x100+0x18], %f46
- des_round %f56, %f58, %f0, %f0
- ldd [$key + 0x100+0x20], %f48
- ldd [$key + 0x100+0x28], %f50
- des_round %f60, %f62, %f0, %f0
- ldd [$key + 0x100+0x30], %f52
- ldd [$key + 0x100+0x38], %f54
- des_round %f36, %f38, %f0, %f0
- ldd [$key + 0x100+0x40], %f56
- ldd [$key + 0x100+0x48], %f58
- des_iip %f0, %f0
- ldd [$key + 0x100+0x50], %f60
- ldd [$key + 0x100+0x58], %f62
- des_ip %f0, %f0
- ldd [$key + 0x100+0x60], %f36
- ldd [$key + 0x100+0x68], %f38
- des_round %f40, %f42, %f0, %f0
- ldd [$key + 0x100+0x70], %f40
- ldd [$key + 0x100+0x78], %f42
- des_round %f44, %f46, %f0, %f0
- des_round %f48, %f50, %f0, %f0
- des_round %f52, %f54, %f0, %f0
- des_round %f56, %f58, %f0, %f0
- des_round %f60, %f62, %f0, %f0
- des_round %f36, %f38, %f0, %f0
- des_round %f40, %f42, %f0, %f0
- des_iip %f0, %f0
- brnz,pn $omask, 2f
- sub $len, 1, $len
- std %f0, [$out + 0]
- brnz,pt $len, .Ldes_ede3_cbc_enc_loop
- add $out, 8, $out
- st %f0, [$ivec + 0] ! write out ivec
- retl
- st %f1, [$ivec + 4]
- .align 16
- 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
- ! and ~2x deterioration
- ! in inp==out case
- faligndata %f0, %f0, %f2 ! handle unaligned output
- stda %f2, [$out + $omask]0xc0 ! partial store
- add $out, 8, $out
- orn %g0, $omask, $omask
- stda %f2, [$out + $omask]0xc0 ! partial store
- brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
- orn %g0, $omask, $omask
- st %f0, [$ivec + 0] ! write out ivec
- retl
- st %f1, [$ivec + 4]
- .type des_t4_ede3_cbc_encrypt,#function
- .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
- .globl des_t4_ede3_cbc_decrypt
- .align 32
- des_t4_ede3_cbc_decrypt:
- cmp $len, 0
- be,pn $::size_t_cc, .Lcbc_abort
- nop
- ld [$ivec + 0], %f2 ! load ivec
- ld [$ivec + 4], %f3
- and $inp, 7, $ileft
- andn $inp, 7, $inp
- sll $ileft, 3, $ileft
- mov 0xff, $omask
- prefetch [$inp], 20
- prefetch [$inp + 63], 20
- sub %g0, $ileft, $iright
- and $out, 7, %g4
- alignaddrl $out, %g0, $out
- srl $omask, %g4, $omask
- srlx $len, 3, $len
- movrz %g4, 0, $omask
- prefetch [$out], 22
- ldd [$key + 0x100+0x78], %f4 ! load key schedule
- ldd [$key + 0x100+0x70], %f6
- ldd [$key + 0x100+0x68], %f8
- ldd [$key + 0x100+0x60], %f10
- ldd [$key + 0x100+0x58], %f12
- ldd [$key + 0x100+0x50], %f14
- ldd [$key + 0x100+0x48], %f16
- ldd [$key + 0x100+0x40], %f18
- ldd [$key + 0x100+0x38], %f20
- ldd [$key + 0x100+0x30], %f22
- ldd [$key + 0x100+0x28], %f24
- ldd [$key + 0x100+0x20], %f26
- ldd [$key + 0x100+0x18], %f28
- ldd [$key + 0x100+0x10], %f30
- ldd [$key + 0x100+0x08], %f32
- ldd [$key + 0x100+0x00], %f34
- .Ldes_ede3_cbc_dec_loop:
- ldx [$inp + 0], %g4
- brz,pt $ileft, 4f
- nop
- ldx [$inp + 8], %g5
- sllx %g4, $ileft, %g4
- srlx %g5, $iright, %g5
- or %g5, %g4, %g4
- 4:
- movxtod %g4, %f0
- prefetch [$inp + 8+63], 20
- add $inp, 8, $inp
- prefetch [$out + 63], 22
- des_ip %f0, %f0
- des_round %f4, %f6, %f0, %f0
- des_round %f8, %f10, %f0, %f0
- des_round %f12, %f14, %f0, %f0
- des_round %f16, %f18, %f0, %f0
- ldd [$key + 0x80+0x00], %f36
- ldd [$key + 0x80+0x08], %f38
- des_round %f20, %f22, %f0, %f0
- ldd [$key + 0x80+0x10], %f40
- ldd [$key + 0x80+0x18], %f42
- des_round %f24, %f26, %f0, %f0
- ldd [$key + 0x80+0x20], %f44
- ldd [$key + 0x80+0x28], %f46
- des_round %f28, %f30, %f0, %f0
- ldd [$key + 0x80+0x30], %f48
- ldd [$key + 0x80+0x38], %f50
- des_round %f32, %f34, %f0, %f0
- ldd [$key + 0x80+0x40], %f52
- ldd [$key + 0x80+0x48], %f54
- des_iip %f0, %f0
- ldd [$key + 0x80+0x50], %f56
- ldd [$key + 0x80+0x58], %f58
- des_ip %f0, %f0
- ldd [$key + 0x80+0x60], %f60
- ldd [$key + 0x80+0x68], %f62
- des_round %f36, %f38, %f0, %f0
- ldd [$key + 0x80+0x70], %f36
- ldd [$key + 0x80+0x78], %f38
- des_round %f40, %f42, %f0, %f0
- des_round %f44, %f46, %f0, %f0
- des_round %f48, %f50, %f0, %f0
- ldd [$key + 0x80-0x08], %f40
- ldd [$key + 0x80-0x10], %f42
- des_round %f52, %f54, %f0, %f0
- ldd [$key + 0x80-0x18], %f44
- ldd [$key + 0x80-0x20], %f46
- des_round %f56, %f58, %f0, %f0
- ldd [$key + 0x80-0x28], %f48
- ldd [$key + 0x80-0x30], %f50
- des_round %f60, %f62, %f0, %f0
- ldd [$key + 0x80-0x38], %f52
- ldd [$key + 0x80-0x40], %f54
- des_round %f36, %f38, %f0, %f0
- ldd [$key + 0x80-0x48], %f56
- ldd [$key + 0x80-0x50], %f58
- des_iip %f0, %f0
- ldd [$key + 0x80-0x58], %f60
- ldd [$key + 0x80-0x60], %f62
- des_ip %f0, %f0
- ldd [$key + 0x80-0x68], %f36
- ldd [$key + 0x80-0x70], %f38
- des_round %f40, %f42, %f0, %f0
- ldd [$key + 0x80-0x78], %f40
- ldd [$key + 0x80-0x80], %f42
- des_round %f44, %f46, %f0, %f0
- des_round %f48, %f50, %f0, %f0
- des_round %f52, %f54, %f0, %f0
- des_round %f56, %f58, %f0, %f0
- des_round %f60, %f62, %f0, %f0
- des_round %f36, %f38, %f0, %f0
- des_round %f40, %f42, %f0, %f0
- des_iip %f0, %f0
- fxor %f2, %f0, %f0 ! ^= ivec
- movxtod %g4, %f2
- brnz,pn $omask, 2f
- sub $len, 1, $len
- std %f0, [$out + 0]
- brnz,pt $len, .Ldes_ede3_cbc_dec_loop
- add $out, 8, $out
- st %f2, [$ivec + 0] ! write out ivec
- retl
- st %f3, [$ivec + 4]
- .align 16
- 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
- ! and ~3x deterioration
- ! in inp==out case
- faligndata %f0, %f0, %f0 ! handle unaligned output
- stda %f0, [$out + $omask]0xc0 ! partial store
- add $out, 8, $out
- orn %g0, $omask, $omask
- stda %f0, [$out + $omask]0xc0 ! partial store
- brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
- orn %g0, $omask, $omask
- st %f2, [$ivec + 0] ! write out ivec
- retl
- st %f3, [$ivec + 4]
- .type des_t4_ede3_cbc_decrypt,#function
- .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
- ___
- }
- $code.=<<___;
- .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"
- .align 4
- ___
- &emit_assembler();
- close STDOUT;
|