123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329 |
- #! /usr/bin/env perl
- # Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
- #
- # Licensed under the Apache License 2.0 (the "License"). You may not use
- # this file except in compliance with the License. You can obtain a copy
- # in the file LICENSE in the source distribution or at
- # https://www.openssl.org/source/license.html
- # ====================================================================
- # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
- # project. The module is, however, dual licensed under OpenSSL and
- # CRYPTOGAMS licenses depending on where you obtain it. For further
- # details see http://www.openssl.org/~appro/cryptogams/.
- # ====================================================================
- # SHA1 block procedure for Alpha.
- # On 21264 performance is 33% better than code generated by vendor
- # compiler, and 75% better than GCC [3.4], and in absolute terms is
- # 8.7 cycles per processed byte. Implementation features vectorized
- # byte swap, but not Xupdate.
- @X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7",
- "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15");
- $ctx="a0"; # $16
- $inp="a1";
- $num="a2";
- $A="a3";
- $B="a4"; # 20
- $C="a5";
- $D="t8";
- $E="t9"; @V=($A,$B,$C,$D,$E);
- $t0="t10"; # 24
- $t1="t11";
- $t2="ra";
- $t3="t12";
- $K="AT"; # 28
- sub BODY_00_19 {
- my ($i,$a,$b,$c,$d,$e)=@_;
- my $j=$i+1;
- $code.=<<___ if ($i==0);
- ldq_u @X[0],0+0($inp)
- ldq_u @X[1],0+7($inp)
- ___
- $code.=<<___ if (!($i&1) && $i<14);
- ldq_u @X[$i+2],($i+2)*4+0($inp)
- ldq_u @X[$i+3],($i+2)*4+7($inp)
- ___
- $code.=<<___ if (!($i&1) && $i<15);
- extql @X[$i],$inp,@X[$i]
- extqh @X[$i+1],$inp,@X[$i+1]
- or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched
- srl @X[$i],24,$t0 # vectorized byte swap
- srl @X[$i],8,$t2
- sll @X[$i],8,$t3
- sll @X[$i],24,@X[$i]
- zapnot $t0,0x11,$t0
- zapnot $t2,0x22,$t2
- zapnot @X[$i],0x88,@X[$i]
- or $t0,$t2,$t0
- zapnot $t3,0x44,$t3
- sll $a,5,$t1
- or @X[$i],$t0,@X[$i]
- addl $K,$e,$e
- and $b,$c,$t2
- zapnot $a,0xf,$a
- or @X[$i],$t3,@X[$i]
- srl $a,27,$t0
- bic $d,$b,$t3
- sll $b,30,$b
- extll @X[$i],4,@X[$i+1] # extract upper half
- or $t2,$t3,$t2
- addl @X[$i],$e,$e
- addl $t1,$e,$e
- srl $b,32,$t3
- zapnot @X[$i],0xf,@X[$i]
- addl $t0,$e,$e
- addl $t2,$e,$e
- or $t3,$b,$b
- ___
- $code.=<<___ if (($i&1) && $i<15);
- sll $a,5,$t1
- addl $K,$e,$e
- and $b,$c,$t2
- zapnot $a,0xf,$a
- srl $a,27,$t0
- addl @X[$i%16],$e,$e
- bic $d,$b,$t3
- sll $b,30,$b
- or $t2,$t3,$t2
- addl $t1,$e,$e
- srl $b,32,$t3
- zapnot @X[$i],0xf,@X[$i]
- addl $t0,$e,$e
- addl $t2,$e,$e
- or $t3,$b,$b
- ___
- $code.=<<___ if ($i>=15); # with forward Xupdate
- sll $a,5,$t1
- addl $K,$e,$e
- and $b,$c,$t2
- xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
- zapnot $a,0xf,$a
- addl @X[$i%16],$e,$e
- bic $d,$b,$t3
- xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
- srl $a,27,$t0
- addl $t1,$e,$e
- or $t2,$t3,$t2
- xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
- sll $b,30,$b
- addl $t0,$e,$e
- srl @X[$j%16],31,$t1
- addl $t2,$e,$e
- srl $b,32,$t3
- addl @X[$j%16],@X[$j%16],@X[$j%16]
- or $t3,$b,$b
- zapnot @X[$i%16],0xf,@X[$i%16]
- or $t1,@X[$j%16],@X[$j%16]
- ___
- }
- sub BODY_20_39 {
- my ($i,$a,$b,$c,$d,$e)=@_;
- my $j=$i+1;
- $code.=<<___ if ($i<79); # with forward Xupdate
- sll $a,5,$t1
- addl $K,$e,$e
- zapnot $a,0xf,$a
- xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
- sll $b,30,$t3
- addl $t1,$e,$e
- xor $b,$c,$t2
- xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
- srl $b,2,$b
- addl @X[$i%16],$e,$e
- xor $d,$t2,$t2
- xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
- srl @X[$j%16],31,$t1
- addl $t2,$e,$e
- srl $a,27,$t0
- addl @X[$j%16],@X[$j%16],@X[$j%16]
- or $t3,$b,$b
- addl $t0,$e,$e
- or $t1,@X[$j%16],@X[$j%16]
- ___
- $code.=<<___ if ($i<77);
- zapnot @X[$i%16],0xf,@X[$i%16]
- ___
- $code.=<<___ if ($i==79); # with context fetch
- sll $a,5,$t1
- addl $K,$e,$e
- zapnot $a,0xf,$a
- ldl @X[0],0($ctx)
- sll $b,30,$t3
- addl $t1,$e,$e
- xor $b,$c,$t2
- ldl @X[1],4($ctx)
- srl $b,2,$b
- addl @X[$i%16],$e,$e
- xor $d,$t2,$t2
- ldl @X[2],8($ctx)
- srl $a,27,$t0
- addl $t2,$e,$e
- ldl @X[3],12($ctx)
- or $t3,$b,$b
- addl $t0,$e,$e
- ldl @X[4],16($ctx)
- ___
- }
- sub BODY_40_59 {
- my ($i,$a,$b,$c,$d,$e)=@_;
- my $j=$i+1;
- $code.=<<___; # with forward Xupdate
- sll $a,5,$t1
- addl $K,$e,$e
- zapnot $a,0xf,$a
- xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
- srl $a,27,$t0
- and $b,$c,$t2
- and $b,$d,$t3
- xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
- sll $b,30,$b
- addl $t1,$e,$e
- xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
- srl @X[$j%16],31,$t1
- addl $t0,$e,$e
- or $t2,$t3,$t2
- and $c,$d,$t3
- or $t2,$t3,$t2
- srl $b,32,$t3
- addl @X[$i%16],$e,$e
- addl @X[$j%16],@X[$j%16],@X[$j%16]
- or $t3,$b,$b
- addl $t2,$e,$e
- or $t1,@X[$j%16],@X[$j%16]
- zapnot @X[$i%16],0xf,@X[$i%16]
- ___
- }
- $code=<<___;
- #ifdef __linux__
- #include <asm/regdef.h>
- #else
- #include <asm.h>
- #include <regdef.h>
- #endif
- .text
- .set noat
- .set noreorder
- .globl sha1_block_data_order
- .align 5
- .ent sha1_block_data_order
- sha1_block_data_order:
- lda sp,-64(sp)
- stq ra,0(sp)
- stq s0,8(sp)
- stq s1,16(sp)
- stq s2,24(sp)
- stq s3,32(sp)
- stq s4,40(sp)
- stq s5,48(sp)
- stq fp,56(sp)
- .mask 0x0400fe00,-64
- .frame sp,64,ra
- .prologue 0
- ldl $A,0($ctx)
- ldl $B,4($ctx)
- sll $num,6,$num
- ldl $C,8($ctx)
- ldl $D,12($ctx)
- ldl $E,16($ctx)
- addq $inp,$num,$num
- .Lloop:
- .set noreorder
- ldah $K,23170(zero)
- zapnot $B,0xf,$B
- lda $K,31129($K) # K_00_19
- ___
- for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
- $code.=<<___;
- ldah $K,28378(zero)
- lda $K,-5215($K) # K_20_39
- ___
- for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
- $code.=<<___;
- ldah $K,-28900(zero)
- lda $K,-17188($K) # K_40_59
- ___
- for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
- $code.=<<___;
- ldah $K,-13725(zero)
- lda $K,-15914($K) # K_60_79
- ___
- for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
- $code.=<<___;
- addl @X[0],$A,$A
- addl @X[1],$B,$B
- addl @X[2],$C,$C
- addl @X[3],$D,$D
- addl @X[4],$E,$E
- stl $A,0($ctx)
- stl $B,4($ctx)
- addq $inp,64,$inp
- stl $C,8($ctx)
- stl $D,12($ctx)
- stl $E,16($ctx)
- cmpult $inp,$num,$t1
- bne $t1,.Lloop
- .set noreorder
- ldq ra,0(sp)
- ldq s0,8(sp)
- ldq s1,16(sp)
- ldq s2,24(sp)
- ldq s3,32(sp)
- ldq s4,40(sp)
- ldq s5,48(sp)
- ldq fp,56(sp)
- lda sp,64(sp)
- ret (ra)
- .end sha1_block_data_order
- .ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
- .align 2
- ___
- $output=pop and open STDOUT,">$output";
- print $code;
- close STDOUT;
|