123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336 |
- #! /usr/bin/env perl
- # Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
- #
- # Licensed under the Apache License 2.0 (the "License"). You may not use
- # this file except in compliance with the License. You can obtain a copy
- # in the file LICENSE in the source distribution or at
- # https://www.openssl.org/source/license.html
- # ====================================================================
- # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
- # project. The module is, however, dual licensed under OpenSSL and
- # CRYPTOGAMS licenses depending on where you obtain it. For further
- # details see http://www.openssl.org/~appro/cryptogams/.
- # ====================================================================
- # RC4 for PA-RISC.
- # June 2009.
- #
- # Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
- # For reference, [4x] unrolled loop is >40% faster than folded one.
- # It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
- # is believed to be not sufficient to justify the effort...
- #
- # Special thanks to polarhome.com for providing HP-UX account.
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
- # $output is the last argument if it looks like a file (it has an extension)
- # $flavour is the first argument if it doesn't look like a file
- $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
- $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
- $output and open STDOUT,">$output";
- if ($flavour =~ /64/) {
- $LEVEL ="2.0W";
- $SIZE_T =8;
- $FRAME_MARKER =80;
- $SAVED_RP =16;
- $PUSH ="std";
- $PUSHMA ="std,ma";
- $POP ="ldd";
- $POPMB ="ldd,mb";
- } else {
- $LEVEL ="1.0";
- $SIZE_T =4;
- $FRAME_MARKER =48;
- $SAVED_RP =20;
- $PUSH ="stw";
- $PUSHMA ="stwm";
- $POP ="ldw";
- $POPMB ="ldwm";
- }
- $FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
- # [+ argument transfer]
- $SZ=1; # defaults to RC4_CHAR
- if (open CONF,"<${dir}../../opensslconf.h") {
- while(<CONF>) {
- if (m/#\s*define\s+RC4_INT\s+(.*)/) {
- $SZ = ($1=~/char$/) ? 1 : 4;
- last;
- }
- }
- close CONF;
- }
- if ($SZ==1) { # RC4_CHAR
- $LD="ldb";
- $LDX="ldbx";
- $MKX="addl";
- $ST="stb";
- } else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
- $LD="ldw";
- $LDX="ldwx,s";
- $MKX="sh2addl";
- $ST="stw";
- }
- $key="%r26";
- $len="%r25";
- $inp="%r24";
- $out="%r23";
- @XX=("%r19","%r20");
- @TX=("%r21","%r22");
- $YY="%r28";
- $TY="%r29";
- $acc="%r1";
- $ix="%r2";
- $iy="%r3";
- $dat0="%r4";
- $dat1="%r5";
- $rem="%r6";
- $mask="%r31";
- sub unrolledloopbody {
- for ($i=0;$i<4;$i++) {
- $code.=<<___;
- ldo 1($XX[0]),$XX[1]
- `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
- and $mask,$XX[1],$XX[1]
- $LDX $YY($key),$TY
- $MKX $YY,$key,$ix
- $LDX $XX[1]($key),$TX[1]
- $MKX $XX[0],$key,$iy
- $ST $TX[0],0($ix)
- comclr,<> $XX[1],$YY,%r0 ; conditional
- copy $TX[0],$TX[1] ; move
- `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
- $ST $TY,0($iy)
- addl $TX[0],$TY,$TY
- addl $TX[1],$YY,$YY
- and $mask,$TY,$TY
- and $mask,$YY,$YY
- ___
- push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
- } }
- sub foldedloop {
- my ($label,$count)=@_;
- $code.=<<___;
- $label
- $MKX $YY,$key,$iy
- $LDX $YY($key),$TY
- $MKX $XX[0],$key,$ix
- $ST $TX[0],0($iy)
- ldo 1($XX[0]),$XX[0]
- $ST $TY,0($ix)
- addl $TX[0],$TY,$TY
- ldbx $inp($out),$dat1
- and $mask,$TY,$TY
- and $mask,$XX[0],$XX[0]
- $LDX $TY($key),$acc
- $LDX $XX[0]($key),$TX[0]
- ldo 1($out),$out
- xor $dat1,$acc,$acc
- addl $TX[0],$YY,$YY
- stb $acc,-1($out)
- addib,<> -1,$count,$label ; $count is always small
- and $mask,$YY,$YY
- ___
- }
- $code=<<___;
- .LEVEL $LEVEL
- .SPACE \$TEXT\$
- .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
- .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
- RC4
- .PROC
- .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
- .ENTRY
- $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
- $PUSHMA %r3,$FRAME(%sp)
- $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
- $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
- $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
- cmpib,*= 0,$len,L\$abort
- sub $inp,$out,$inp ; distance between $inp and $out
- $LD `0*$SZ`($key),$XX[0]
- $LD `1*$SZ`($key),$YY
- ldo `2*$SZ`($key),$key
- ldi 0xff,$mask
- ldi 3,$dat0
- ldo 1($XX[0]),$XX[0] ; warm up loop
- and $mask,$XX[0],$XX[0]
- $LDX $XX[0]($key),$TX[0]
- addl $TX[0],$YY,$YY
- cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother?
- and $mask,$YY,$YY
- and,<> $out,$dat0,$rem ; is $out aligned?
- b L\$alignedout
- subi 4,$rem,$rem
- sub $len,$rem,$len
- ___
- &foldedloop("L\$alignout",$rem); # process till $out is aligned
- $code.=<<___;
- L\$alignedout ; $len is at least 4 here
- and,<> $inp,$dat0,$acc ; is $inp aligned?
- b L\$oop4
- sub $inp,$acc,$rem ; align $inp
- sh3addl $acc,%r0,$acc
- subi 32,$acc,$acc
- mtctl $acc,%cr11 ; load %sar with vshd align factor
- ldwx $rem($out),$dat0
- ldo 4($rem),$rem
- L\$oop4misalignedinp
- ___
- &unrolledloopbody();
- $code.=<<___;
- $LDX $TY($key),$ix
- ldwx $rem($out),$dat1
- ldo -4($len),$len
- or $ix,$acc,$acc ; last piece, no need to dep
- vshd $dat0,$dat1,$iy ; align data
- copy $dat1,$dat0
- xor $iy,$acc,$acc
- stw $acc,0($out)
- cmpib,*<< 3,$len,L\$oop4misalignedinp
- ldo 4($out),$out
- cmpib,*= 0,$len,L\$done
- nop
- b L\$oop1
- nop
- .ALIGN 8
- L\$oop4
- ___
- &unrolledloopbody();
- $code.=<<___;
- $LDX $TY($key),$ix
- ldwx $inp($out),$dat0
- ldo -4($len),$len
- or $ix,$acc,$acc ; last piece, no need to dep
- xor $dat0,$acc,$acc
- stw $acc,0($out)
- cmpib,*<< 3,$len,L\$oop4
- ldo 4($out),$out
- cmpib,*= 0,$len,L\$done
- nop
- ___
- &foldedloop("L\$oop1",$len);
- $code.=<<___;
- L\$done
- $POP `-$FRAME-$SAVED_RP`(%sp),%r2
- ldo -1($XX[0]),$XX[0] ; chill out loop
- sub $YY,$TX[0],$YY
- and $mask,$XX[0],$XX[0]
- and $mask,$YY,$YY
- $ST $XX[0],`-2*$SZ`($key)
- $ST $YY,`-1*$SZ`($key)
- $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
- $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
- $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
- L\$abort
- bv (%r2)
- .EXIT
- $POPMB -$FRAME(%sp),%r3
- .PROCEND
- ___
- $code.=<<___;
- .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
- .ALIGN 8
- RC4_set_key
- .PROC
- .CALLINFO NO_CALLS
- .ENTRY
- $ST %r0,`0*$SZ`($key)
- $ST %r0,`1*$SZ`($key)
- ldo `2*$SZ`($key),$key
- copy %r0,@XX[0]
- L\$1st
- $ST @XX[0],0($key)
- ldo 1(@XX[0]),@XX[0]
- bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256
- ldo $SZ($key),$key
- ldo `-256*$SZ`($key),$key ; rewind $key
- addl $len,$inp,$inp ; $inp to point at the end
- sub %r0,$len,%r23 ; inverse index
- copy %r0,@XX[0]
- copy %r0,@XX[1]
- ldi 0xff,$mask
- L\$2nd
- $LDX @XX[0]($key),@TX[0]
- ldbx %r23($inp),@TX[1]
- addi,nuv 1,%r23,%r23 ; increment and conditional
- sub %r0,$len,%r23 ; inverse index
- addl @TX[0],@XX[1],@XX[1]
- addl @TX[1],@XX[1],@XX[1]
- and $mask,@XX[1],@XX[1]
- $MKX @XX[0],$key,$TY
- $LDX @XX[1]($key),@TX[1]
- $MKX @XX[1],$key,$YY
- ldo 1(@XX[0]),@XX[0]
- $ST @TX[0],0($YY)
- bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256
- $ST @TX[1],0($TY)
- bv,n (%r2)
- .EXIT
- nop
- .PROCEND
- .EXPORT RC4_options,ENTRY
- .ALIGN 8
- RC4_options
- .PROC
- .CALLINFO NO_CALLS
- .ENTRY
- blr %r0,%r28
- ldi 3,%r1
- L\$pic
- andcm %r28,%r1,%r28
- bv (%r2)
- .EXIT
- ldo L\$opts-L\$pic(%r28),%r28
- .PROCEND
- .ALIGN 8
- L\$opts
- .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)"
- .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
- ___
- if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
- =~ /GNU assembler/) {
- $gnuas = 1;
- }
- foreach(split("\n",$code)) {
- s/\`([^\`]*)\`/eval $1/ge;
- s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
- s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
- s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
- s/cmpib,\*/comib,/ if ($SIZE_T==4);
- s/\bbv\b/bve/ if ($SIZE_T==8);
- print $_,"\n";
- }
- close STDOUT or die "error closing STDOUT: $!";
|