2
0

sha1-alpha.pl 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. #! /usr/bin/env perl
  2. # Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. # ====================================================================
  9. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  10. # project. The module is, however, dual licensed under OpenSSL and
  11. # CRYPTOGAMS licenses depending on where you obtain it. For further
  12. # details see http://www.openssl.org/~appro/cryptogams/.
  13. # ====================================================================
  14. # SHA1 block procedure for Alpha.
  15. # On 21264 performance is 33% better than code generated by vendor
  16. # compiler, and 75% better than GCC [3.4], and in absolute terms is
  17. # 8.7 cycles per processed byte. Implementation features vectorized
  18. # byte swap, but not Xupdate.
  19. @X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7",
  20. "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15");
  21. $ctx="a0"; # $16
  22. $inp="a1";
  23. $num="a2";
  24. $A="a3";
  25. $B="a4"; # 20
  26. $C="a5";
  27. $D="t8";
  28. $E="t9"; @V=($A,$B,$C,$D,$E);
  29. $t0="t10"; # 24
  30. $t1="t11";
  31. $t2="ra";
  32. $t3="t12";
  33. $K="AT"; # 28
  34. sub BODY_00_19 {
  35. my ($i,$a,$b,$c,$d,$e)=@_;
  36. my $j=$i+1;
  37. $code.=<<___ if ($i==0);
  38. ldq_u @X[0],0+0($inp)
  39. ldq_u @X[1],0+7($inp)
  40. ___
  41. $code.=<<___ if (!($i&1) && $i<14);
  42. ldq_u @X[$i+2],($i+2)*4+0($inp)
  43. ldq_u @X[$i+3],($i+2)*4+7($inp)
  44. ___
  45. $code.=<<___ if (!($i&1) && $i<15);
  46. extql @X[$i],$inp,@X[$i]
  47. extqh @X[$i+1],$inp,@X[$i+1]
  48. or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched
  49. srl @X[$i],24,$t0 # vectorized byte swap
  50. srl @X[$i],8,$t2
  51. sll @X[$i],8,$t3
  52. sll @X[$i],24,@X[$i]
  53. zapnot $t0,0x11,$t0
  54. zapnot $t2,0x22,$t2
  55. zapnot @X[$i],0x88,@X[$i]
  56. or $t0,$t2,$t0
  57. zapnot $t3,0x44,$t3
  58. sll $a,5,$t1
  59. or @X[$i],$t0,@X[$i]
  60. addl $K,$e,$e
  61. and $b,$c,$t2
  62. zapnot $a,0xf,$a
  63. or @X[$i],$t3,@X[$i]
  64. srl $a,27,$t0
  65. bic $d,$b,$t3
  66. sll $b,30,$b
  67. extll @X[$i],4,@X[$i+1] # extract upper half
  68. or $t2,$t3,$t2
  69. addl @X[$i],$e,$e
  70. addl $t1,$e,$e
  71. srl $b,32,$t3
  72. zapnot @X[$i],0xf,@X[$i]
  73. addl $t0,$e,$e
  74. addl $t2,$e,$e
  75. or $t3,$b,$b
  76. ___
  77. $code.=<<___ if (($i&1) && $i<15);
  78. sll $a,5,$t1
  79. addl $K,$e,$e
  80. and $b,$c,$t2
  81. zapnot $a,0xf,$a
  82. srl $a,27,$t0
  83. addl @X[$i%16],$e,$e
  84. bic $d,$b,$t3
  85. sll $b,30,$b
  86. or $t2,$t3,$t2
  87. addl $t1,$e,$e
  88. srl $b,32,$t3
  89. zapnot @X[$i],0xf,@X[$i]
  90. addl $t0,$e,$e
  91. addl $t2,$e,$e
  92. or $t3,$b,$b
  93. ___
  94. $code.=<<___ if ($i>=15); # with forward Xupdate
  95. sll $a,5,$t1
  96. addl $K,$e,$e
  97. and $b,$c,$t2
  98. xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
  99. zapnot $a,0xf,$a
  100. addl @X[$i%16],$e,$e
  101. bic $d,$b,$t3
  102. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  103. srl $a,27,$t0
  104. addl $t1,$e,$e
  105. or $t2,$t3,$t2
  106. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  107. sll $b,30,$b
  108. addl $t0,$e,$e
  109. srl @X[$j%16],31,$t1
  110. addl $t2,$e,$e
  111. srl $b,32,$t3
  112. addl @X[$j%16],@X[$j%16],@X[$j%16]
  113. or $t3,$b,$b
  114. zapnot @X[$i%16],0xf,@X[$i%16]
  115. or $t1,@X[$j%16],@X[$j%16]
  116. ___
  117. }
  118. sub BODY_20_39 {
  119. my ($i,$a,$b,$c,$d,$e)=@_;
  120. my $j=$i+1;
  121. $code.=<<___ if ($i<79); # with forward Xupdate
  122. sll $a,5,$t1
  123. addl $K,$e,$e
  124. zapnot $a,0xf,$a
  125. xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
  126. sll $b,30,$t3
  127. addl $t1,$e,$e
  128. xor $b,$c,$t2
  129. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  130. srl $b,2,$b
  131. addl @X[$i%16],$e,$e
  132. xor $d,$t2,$t2
  133. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  134. srl @X[$j%16],31,$t1
  135. addl $t2,$e,$e
  136. srl $a,27,$t0
  137. addl @X[$j%16],@X[$j%16],@X[$j%16]
  138. or $t3,$b,$b
  139. addl $t0,$e,$e
  140. or $t1,@X[$j%16],@X[$j%16]
  141. ___
  142. $code.=<<___ if ($i<77);
  143. zapnot @X[$i%16],0xf,@X[$i%16]
  144. ___
  145. $code.=<<___ if ($i==79); # with context fetch
  146. sll $a,5,$t1
  147. addl $K,$e,$e
  148. zapnot $a,0xf,$a
  149. ldl @X[0],0($ctx)
  150. sll $b,30,$t3
  151. addl $t1,$e,$e
  152. xor $b,$c,$t2
  153. ldl @X[1],4($ctx)
  154. srl $b,2,$b
  155. addl @X[$i%16],$e,$e
  156. xor $d,$t2,$t2
  157. ldl @X[2],8($ctx)
  158. srl $a,27,$t0
  159. addl $t2,$e,$e
  160. ldl @X[3],12($ctx)
  161. or $t3,$b,$b
  162. addl $t0,$e,$e
  163. ldl @X[4],16($ctx)
  164. ___
  165. }
  166. sub BODY_40_59 {
  167. my ($i,$a,$b,$c,$d,$e)=@_;
  168. my $j=$i+1;
  169. $code.=<<___; # with forward Xupdate
  170. sll $a,5,$t1
  171. addl $K,$e,$e
  172. zapnot $a,0xf,$a
  173. xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
  174. srl $a,27,$t0
  175. and $b,$c,$t2
  176. and $b,$d,$t3
  177. xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
  178. sll $b,30,$b
  179. addl $t1,$e,$e
  180. xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
  181. srl @X[$j%16],31,$t1
  182. addl $t0,$e,$e
  183. or $t2,$t3,$t2
  184. and $c,$d,$t3
  185. or $t2,$t3,$t2
  186. srl $b,32,$t3
  187. addl @X[$i%16],$e,$e
  188. addl @X[$j%16],@X[$j%16],@X[$j%16]
  189. or $t3,$b,$b
  190. addl $t2,$e,$e
  191. or $t1,@X[$j%16],@X[$j%16]
  192. zapnot @X[$i%16],0xf,@X[$i%16]
  193. ___
  194. }
  195. $code=<<___;
  196. #ifdef __linux__
  197. #include <asm/regdef.h>
  198. #else
  199. #include <asm.h>
  200. #include <regdef.h>
  201. #endif
  202. .text
  203. .set noat
  204. .set noreorder
  205. .globl sha1_block_data_order
  206. .align 5
  207. .ent sha1_block_data_order
  208. sha1_block_data_order:
  209. lda sp,-64(sp)
  210. stq ra,0(sp)
  211. stq s0,8(sp)
  212. stq s1,16(sp)
  213. stq s2,24(sp)
  214. stq s3,32(sp)
  215. stq s4,40(sp)
  216. stq s5,48(sp)
  217. stq fp,56(sp)
  218. .mask 0x0400fe00,-64
  219. .frame sp,64,ra
  220. .prologue 0
  221. ldl $A,0($ctx)
  222. ldl $B,4($ctx)
  223. sll $num,6,$num
  224. ldl $C,8($ctx)
  225. ldl $D,12($ctx)
  226. ldl $E,16($ctx)
  227. addq $inp,$num,$num
  228. .Lloop:
  229. .set noreorder
  230. ldah $K,23170(zero)
  231. zapnot $B,0xf,$B
  232. lda $K,31129($K) # K_00_19
  233. ___
  234. for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
  235. $code.=<<___;
  236. ldah $K,28378(zero)
  237. lda $K,-5215($K) # K_20_39
  238. ___
  239. for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  240. $code.=<<___;
  241. ldah $K,-28900(zero)
  242. lda $K,-17188($K) # K_40_59
  243. ___
  244. for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
  245. $code.=<<___;
  246. ldah $K,-13725(zero)
  247. lda $K,-15914($K) # K_60_79
  248. ___
  249. for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
  250. $code.=<<___;
  251. addl @X[0],$A,$A
  252. addl @X[1],$B,$B
  253. addl @X[2],$C,$C
  254. addl @X[3],$D,$D
  255. addl @X[4],$E,$E
  256. stl $A,0($ctx)
  257. stl $B,4($ctx)
  258. addq $inp,64,$inp
  259. stl $C,8($ctx)
  260. stl $D,12($ctx)
  261. stl $E,16($ctx)
  262. cmpult $inp,$num,$t1
  263. bne $t1,.Lloop
  264. .set noreorder
  265. ldq ra,0(sp)
  266. ldq s0,8(sp)
  267. ldq s1,16(sp)
  268. ldq s2,24(sp)
  269. ldq s3,32(sp)
  270. ldq s4,40(sp)
  271. ldq s5,48(sp)
  272. ldq fp,56(sp)
  273. lda sp,64(sp)
  274. ret (ra)
  275. .end sha1_block_data_order
  276. .ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
  277. .align 2
  278. ___
  279. $output=pop and open STDOUT,">$output";
  280. print $code;
  281. close STDOUT;