sha1-parisc.pl 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. #! /usr/bin/env perl
  2. # Copyright 2009-2018 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  10. # project. The module is, however, dual licensed under OpenSSL and
  11. # CRYPTOGAMS licenses depending on where you obtain it. For further
  12. # details see http://www.openssl.org/~appro/cryptogams/.
  13. # ====================================================================
  14. # SHA1 block procedure for PA-RISC.
  15. # June 2009.
  16. #
  17. # On PA-7100LC performance is >30% better than gcc 3.2 generated code
  18. # for aligned input and >50% better for unaligned. Compared to vendor
  19. # compiler on PA-8600 it's almost 60% faster in 64-bit build and just
  20. # few percent faster in 32-bit one (this for aligned input, data for
  21. # unaligned input is not available).
  22. #
  23. # Special thanks to polarhome.com for providing HP-UX account.
  24. # $output is the last argument if it looks like a file (it has an extension)
  25. # $flavour is the first argument if it doesn't look like a file
  26. $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  27. $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  28. $output and open STDOUT,">$output";
  29. if ($flavour =~ /64/) {
  30. $LEVEL ="2.0W";
  31. $SIZE_T =8;
  32. $FRAME_MARKER =80;
  33. $SAVED_RP =16;
  34. $PUSH ="std";
  35. $PUSHMA ="std,ma";
  36. $POP ="ldd";
  37. $POPMB ="ldd,mb";
  38. } else {
  39. $LEVEL ="1.0";
  40. $SIZE_T =4;
  41. $FRAME_MARKER =48;
  42. $SAVED_RP =20;
  43. $PUSH ="stw";
  44. $PUSHMA ="stwm";
  45. $POP ="ldw";
  46. $POPMB ="ldwm";
  47. }
  48. $FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
  49. # [+ argument transfer]
  50. $ctx="%r26"; # arg0
  51. $inp="%r25"; # arg1
  52. $num="%r24"; # arg2
  53. $t0="%r28";
  54. $t1="%r29";
  55. $K="%r31";
  56. @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
  57. "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
  58. @V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
  59. sub BODY_00_19 {
  60. my ($i,$a,$b,$c,$d,$e)=@_;
  61. my $j=$i+1;
  62. $code.=<<___ if ($i<15);
  63. addl $K,$e,$e ; $i
  64. shd $a,$a,27,$t1
  65. addl @X[$i],$e,$e
  66. and $c,$b,$t0
  67. addl $t1,$e,$e
  68. andcm $d,$b,$t1
  69. shd $b,$b,2,$b
  70. or $t1,$t0,$t0
  71. addl $t0,$e,$e
  72. ___
  73. $code.=<<___ if ($i>=15); # with forward Xupdate
  74. addl $K,$e,$e ; $i
  75. shd $a,$a,27,$t1
  76. xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
  77. addl @X[$i%16],$e,$e
  78. and $c,$b,$t0
  79. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  80. addl $t1,$e,$e
  81. andcm $d,$b,$t1
  82. shd $b,$b,2,$b
  83. or $t1,$t0,$t0
  84. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  85. add $t0,$e,$e
  86. shd @X[$j%16],@X[$j%16],31,@X[$j%16]
  87. ___
  88. }
  89. sub BODY_20_39 {
  90. my ($i,$a,$b,$c,$d,$e)=@_;
  91. my $j=$i+1;
  92. $code.=<<___ if ($i<79);
  93. xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i
  94. addl $K,$e,$e
  95. shd $a,$a,27,$t1
  96. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  97. addl @X[$i%16],$e,$e
  98. xor $b,$c,$t0
  99. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  100. addl $t1,$e,$e
  101. shd $b,$b,2,$b
  102. xor $d,$t0,$t0
  103. shd @X[$j%16],@X[$j%16],31,@X[$j%16]
  104. addl $t0,$e,$e
  105. ___
  106. $code.=<<___ if ($i==79); # with context load
  107. ldw 0($ctx),@X[0] ; $i
  108. addl $K,$e,$e
  109. shd $a,$a,27,$t1
  110. ldw 4($ctx),@X[1]
  111. addl @X[$i%16],$e,$e
  112. xor $b,$c,$t0
  113. ldw 8($ctx),@X[2]
  114. addl $t1,$e,$e
  115. shd $b,$b,2,$b
  116. xor $d,$t0,$t0
  117. ldw 12($ctx),@X[3]
  118. addl $t0,$e,$e
  119. ldw 16($ctx),@X[4]
  120. ___
  121. }
  122. sub BODY_40_59 {
  123. my ($i,$a,$b,$c,$d,$e)=@_;
  124. my $j=$i+1;
  125. $code.=<<___;
  126. shd $a,$a,27,$t1 ; $i
  127. addl $K,$e,$e
  128. xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
  129. xor $d,$c,$t0
  130. addl @X[$i%16],$e,$e
  131. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  132. and $b,$t0,$t0
  133. addl $t1,$e,$e
  134. shd $b,$b,2,$b
  135. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  136. addl $t0,$e,$e
  137. and $d,$c,$t1
  138. shd @X[$j%16],@X[$j%16],31,@X[$j%16]
  139. addl $t1,$e,$e
  140. ___
  141. }
  142. $code=<<___;
  143. .LEVEL $LEVEL
  144. .SPACE \$TEXT\$
  145. .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
  146. .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
  147. sha1_block_data_order
  148. .PROC
  149. .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16
  150. .ENTRY
  151. $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
  152. $PUSHMA %r3,$FRAME(%sp)
  153. $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
  154. $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
  155. $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
  156. $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
  157. $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
  158. $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
  159. $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
  160. $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
  161. $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
  162. $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
  163. $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
  164. $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
  165. $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
  166. ldw 0($ctx),$A
  167. ldw 4($ctx),$B
  168. ldw 8($ctx),$C
  169. ldw 12($ctx),$D
  170. ldw 16($ctx),$E
  171. extru $inp,31,2,$t0 ; t0=inp&3;
  172. sh3addl $t0,%r0,$t0 ; t0*=8;
  173. subi 32,$t0,$t0 ; t0=32-t0;
  174. mtctl $t0,%cr11 ; %sar=t0;
  175. L\$oop
  176. ldi 3,$t0
  177. andcm $inp,$t0,$t0 ; 64-bit neutral
  178. ___
  179. for ($i=0;$i<15;$i++) { # load input block
  180. $code.="\tldw `4*$i`($t0),@X[$i]\n"; }
  181. $code.=<<___;
  182. cmpb,*= $inp,$t0,L\$aligned
  183. ldw 60($t0),@X[15]
  184. ldw 64($t0),@X[16]
  185. ___
  186. for ($i=0;$i<16;$i++) { # align input
  187. $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; }
  188. $code.=<<___;
  189. L\$aligned
  190. ldil L'0x5a827000,$K ; K_00_19
  191. ldo 0x999($K),$K
  192. ___
  193. for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
  194. $code.=<<___;
  195. ldil L'0x6ed9e000,$K ; K_20_39
  196. ldo 0xba1($K),$K
  197. ___
  198. for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  199. $code.=<<___;
  200. ldil L'0x8f1bb000,$K ; K_40_59
  201. ldo 0xcdc($K),$K
  202. ___
  203. for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
  204. $code.=<<___;
  205. ldil L'0xca62c000,$K ; K_60_79
  206. ldo 0x1d6($K),$K
  207. ___
  208. for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  209. $code.=<<___;
  210. addl @X[0],$A,$A
  211. addl @X[1],$B,$B
  212. addl @X[2],$C,$C
  213. addl @X[3],$D,$D
  214. addl @X[4],$E,$E
  215. stw $A,0($ctx)
  216. stw $B,4($ctx)
  217. stw $C,8($ctx)
  218. stw $D,12($ctx)
  219. stw $E,16($ctx)
  220. addib,*<> -1,$num,L\$oop
  221. ldo 64($inp),$inp
  222. $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
  223. $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
  224. $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
  225. $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
  226. $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
  227. $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
  228. $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
  229. $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
  230. $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
  231. $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
  232. $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
  233. $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
  234. $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
  235. $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
  236. bv (%r2)
  237. .EXIT
  238. $POPMB -$FRAME(%sp),%r3
  239. .PROCEND
  240. .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
  241. ___
  242. if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
  243. =~ /GNU assembler/) {
  244. $gnuas = 1;
  245. }
  246. foreach(split("\n",$code)) {
  247. s/\`([^\`]*)\`/eval $1/ge;
  248. s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
  249. s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
  250. s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
  251. s/,\*/,/ if ($SIZE_T==4);
  252. s/\bbv\b/bve/ if ($SIZE_T==8);
  253. print $_,"\n";
  254. }
  255. close STDOUT;