sha1-parisc.pl 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. #! /usr/bin/env perl
  2. # Copyright 2009-2018 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the OpenSSL license (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  10. # project. The module is, however, dual licensed under OpenSSL and
  11. # CRYPTOGAMS licenses depending on where you obtain it. For further
  12. # details see http://www.openssl.org/~appro/cryptogams/.
  13. # ====================================================================
  14. # SHA1 block procedure for PA-RISC.
  15. # June 2009.
  16. #
  17. # On PA-7100LC performance is >30% better than gcc 3.2 generated code
  18. # for aligned input and >50% better for unaligned. Compared to vendor
  19. # compiler on PA-8600 it's almost 60% faster in 64-bit build and just
  20. # few percent faster in 32-bit one (this for aligned input, data for
  21. # unaligned input is not available).
  22. #
  23. # Special thanks to polarhome.com for providing HP-UX account.
  24. $flavour = shift;
  25. $output = shift;
  26. open STDOUT,">$output";
  27. if ($flavour =~ /64/) {
  28. $LEVEL ="2.0W";
  29. $SIZE_T =8;
  30. $FRAME_MARKER =80;
  31. $SAVED_RP =16;
  32. $PUSH ="std";
  33. $PUSHMA ="std,ma";
  34. $POP ="ldd";
  35. $POPMB ="ldd,mb";
  36. } else {
  37. $LEVEL ="1.0";
  38. $SIZE_T =4;
  39. $FRAME_MARKER =48;
  40. $SAVED_RP =20;
  41. $PUSH ="stw";
  42. $PUSHMA ="stwm";
  43. $POP ="ldw";
  44. $POPMB ="ldwm";
  45. }
  46. $FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
  47. # [+ argument transfer]
  48. $ctx="%r26"; # arg0
  49. $inp="%r25"; # arg1
  50. $num="%r24"; # arg2
  51. $t0="%r28";
  52. $t1="%r29";
  53. $K="%r31";
  54. @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
  55. "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
  56. @V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
  57. sub BODY_00_19 {
  58. my ($i,$a,$b,$c,$d,$e)=@_;
  59. my $j=$i+1;
  60. $code.=<<___ if ($i<15);
  61. addl $K,$e,$e ; $i
  62. shd $a,$a,27,$t1
  63. addl @X[$i],$e,$e
  64. and $c,$b,$t0
  65. addl $t1,$e,$e
  66. andcm $d,$b,$t1
  67. shd $b,$b,2,$b
  68. or $t1,$t0,$t0
  69. addl $t0,$e,$e
  70. ___
  71. $code.=<<___ if ($i>=15); # with forward Xupdate
  72. addl $K,$e,$e ; $i
  73. shd $a,$a,27,$t1
  74. xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
  75. addl @X[$i%16],$e,$e
  76. and $c,$b,$t0
  77. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  78. addl $t1,$e,$e
  79. andcm $d,$b,$t1
  80. shd $b,$b,2,$b
  81. or $t1,$t0,$t0
  82. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  83. add $t0,$e,$e
  84. shd @X[$j%16],@X[$j%16],31,@X[$j%16]
  85. ___
  86. }
  87. sub BODY_20_39 {
  88. my ($i,$a,$b,$c,$d,$e)=@_;
  89. my $j=$i+1;
  90. $code.=<<___ if ($i<79);
  91. xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i
  92. addl $K,$e,$e
  93. shd $a,$a,27,$t1
  94. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  95. addl @X[$i%16],$e,$e
  96. xor $b,$c,$t0
  97. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  98. addl $t1,$e,$e
  99. shd $b,$b,2,$b
  100. xor $d,$t0,$t0
  101. shd @X[$j%16],@X[$j%16],31,@X[$j%16]
  102. addl $t0,$e,$e
  103. ___
  104. $code.=<<___ if ($i==79); # with context load
  105. ldw 0($ctx),@X[0] ; $i
  106. addl $K,$e,$e
  107. shd $a,$a,27,$t1
  108. ldw 4($ctx),@X[1]
  109. addl @X[$i%16],$e,$e
  110. xor $b,$c,$t0
  111. ldw 8($ctx),@X[2]
  112. addl $t1,$e,$e
  113. shd $b,$b,2,$b
  114. xor $d,$t0,$t0
  115. ldw 12($ctx),@X[3]
  116. addl $t0,$e,$e
  117. ldw 16($ctx),@X[4]
  118. ___
  119. }
  120. sub BODY_40_59 {
  121. my ($i,$a,$b,$c,$d,$e)=@_;
  122. my $j=$i+1;
  123. $code.=<<___;
  124. shd $a,$a,27,$t1 ; $i
  125. addl $K,$e,$e
  126. xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
  127. xor $d,$c,$t0
  128. addl @X[$i%16],$e,$e
  129. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  130. and $b,$t0,$t0
  131. addl $t1,$e,$e
  132. shd $b,$b,2,$b
  133. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  134. addl $t0,$e,$e
  135. and $d,$c,$t1
  136. shd @X[$j%16],@X[$j%16],31,@X[$j%16]
  137. addl $t1,$e,$e
  138. ___
  139. }
  140. $code=<<___;
  141. .LEVEL $LEVEL
  142. .SPACE \$TEXT\$
  143. .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
  144. .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
  145. sha1_block_data_order
  146. .PROC
  147. .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16
  148. .ENTRY
  149. $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
  150. $PUSHMA %r3,$FRAME(%sp)
  151. $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
  152. $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
  153. $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
  154. $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
  155. $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
  156. $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
  157. $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
  158. $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
  159. $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
  160. $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
  161. $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
  162. $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
  163. $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
  164. ldw 0($ctx),$A
  165. ldw 4($ctx),$B
  166. ldw 8($ctx),$C
  167. ldw 12($ctx),$D
  168. ldw 16($ctx),$E
  169. extru $inp,31,2,$t0 ; t0=inp&3;
  170. sh3addl $t0,%r0,$t0 ; t0*=8;
  171. subi 32,$t0,$t0 ; t0=32-t0;
  172. mtctl $t0,%cr11 ; %sar=t0;
  173. L\$oop
  174. ldi 3,$t0
  175. andcm $inp,$t0,$t0 ; 64-bit neutral
  176. ___
  177. for ($i=0;$i<15;$i++) { # load input block
  178. $code.="\tldw `4*$i`($t0),@X[$i]\n"; }
  179. $code.=<<___;
  180. cmpb,*= $inp,$t0,L\$aligned
  181. ldw 60($t0),@X[15]
  182. ldw 64($t0),@X[16]
  183. ___
  184. for ($i=0;$i<16;$i++) { # align input
  185. $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; }
  186. $code.=<<___;
  187. L\$aligned
  188. ldil L'0x5a827000,$K ; K_00_19
  189. ldo 0x999($K),$K
  190. ___
  191. for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
  192. $code.=<<___;
  193. ldil L'0x6ed9e000,$K ; K_20_39
  194. ldo 0xba1($K),$K
  195. ___
  196. for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  197. $code.=<<___;
  198. ldil L'0x8f1bb000,$K ; K_40_59
  199. ldo 0xcdc($K),$K
  200. ___
  201. for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
  202. $code.=<<___;
  203. ldil L'0xca62c000,$K ; K_60_79
  204. ldo 0x1d6($K),$K
  205. ___
  206. for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  207. $code.=<<___;
  208. addl @X[0],$A,$A
  209. addl @X[1],$B,$B
  210. addl @X[2],$C,$C
  211. addl @X[3],$D,$D
  212. addl @X[4],$E,$E
  213. stw $A,0($ctx)
  214. stw $B,4($ctx)
  215. stw $C,8($ctx)
  216. stw $D,12($ctx)
  217. stw $E,16($ctx)
  218. addib,*<> -1,$num,L\$oop
  219. ldo 64($inp),$inp
  220. $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
  221. $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
  222. $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
  223. $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
  224. $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
  225. $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
  226. $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
  227. $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
  228. $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
  229. $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
  230. $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
  231. $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
  232. $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
  233. $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
  234. bv (%r2)
  235. .EXIT
  236. $POPMB -$FRAME(%sp),%r3
  237. .PROCEND
  238. .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
  239. ___
  240. if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
  241. =~ /GNU assembler/) {
  242. $gnuas = 1;
  243. }
  244. foreach(split("\n",$code)) {
  245. s/\`([^\`]*)\`/eval $1/ge;
  246. s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
  247. s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
  248. s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
  249. s/,\*/,/ if ($SIZE_T==4);
  250. s/\bbv\b/bve/ if ($SIZE_T==8);
  251. print $_,"\n";
  252. }
  253. close STDOUT;