sparcv9-gf2m.pl 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. #! /usr/bin/env perl
  2. # Copyright 2012-2021 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  11. # project. The module is, however, dual licensed under OpenSSL and
  12. # CRYPTOGAMS licenses depending on where you obtain it. For further
  13. # details see http://www.openssl.org/~appro/cryptogams/.
  14. # ====================================================================
  15. #
  16. # October 2012
  17. #
  18. # The module implements bn_GF2m_mul_2x2 polynomial multiplication used
  19. # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
  20. # the time being... Except that it has two code paths: one suitable
  21. # for all SPARCv9 processors and one for VIS3-capable ones. Former
  22. # delivers ~25-45% more, more for longer keys, heaviest DH and DSA
  23. # verify operations on venerable UltraSPARC II. On T4 VIS3 code is
  24. # ~100-230% faster than gcc-generated code and ~35-90% faster than
  25. # the pure SPARCv9 code path.
  26. $output = pop and open STDOUT,">$output";
  27. $locals=16*8;
  28. $tab="%l0";
  29. @T=("%g2","%g3");
  30. @i=("%g4","%g5");
  31. ($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
  32. ($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
  33. $code.=<<___;
  34. #ifndef __ASSEMBLER__
  35. # define __ASSEMBLER__ 1
  36. #endif
  37. #include "crypto/sparc_arch.h"
  38. #ifdef __arch64__
  39. .register %g2,#scratch
  40. .register %g3,#scratch
  41. #endif
  42. #ifdef __PIC__
  43. SPARC_PIC_THUNK(%g1)
  44. #endif
  45. .globl bn_GF2m_mul_2x2
  46. .align 16
  47. bn_GF2m_mul_2x2:
  48. SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
  49. ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0]
  50. andcc %g1, SPARCV9_VIS3, %g0
  51. bz,pn %icc,.Lsoftware
  52. nop
  53. sllx %o1, 32, %o1
  54. sllx %o3, 32, %o3
  55. or %o2, %o1, %o1
  56. or %o4, %o3, %o3
  57. .word 0x95b262ab ! xmulx %o1, %o3, %o2
  58. .word 0x99b262cb ! xmulxhi %o1, %o3, %o4
  59. srlx %o2, 32, %o1 ! 13 cycles later
  60. st %o2, [%o0+0]
  61. st %o1, [%o0+4]
  62. srlx %o4, 32, %o3
  63. st %o4, [%o0+8]
  64. retl
  65. st %o3, [%o0+12]
  66. .align 16
  67. .Lsoftware:
  68. save %sp,-STACK_FRAME-$locals,%sp
  69. sllx %i1,32,$a
  70. mov -1,$a12
  71. sllx %i3,32,$b
  72. or %i2,$a,$a
  73. srlx $a12,1,$a48 ! 0x7fff...
  74. or %i4,$b,$b
  75. srlx $a12,2,$a12 ! 0x3fff...
  76. add %sp,STACK_BIAS+STACK_FRAME,$tab
  77. sllx $a,2,$a4
  78. mov $a,$a1
  79. sllx $a,1,$a2
  80. srax $a4,63,@i[1] ! broadcast 61st bit
  81. and $a48,$a4,$a4 ! (a<<2)&0x7fff...
  82. srlx $a48,2,$a48
  83. srax $a2,63,@i[0] ! broadcast 62nd bit
  84. and $a12,$a2,$a2 ! (a<<1)&0x3fff...
  85. srax $a1,63,$lo ! broadcast 63rd bit
  86. and $a48,$a1,$a1 ! (a<<0)&0x1fff...
  87. sllx $a1,3,$a8
  88. and $b,$lo,$lo
  89. and $b,@i[0],@i[0]
  90. and $b,@i[1],@i[1]
  91. stx %g0,[$tab+0*8] ! tab[0]=0
  92. xor $a1,$a2,$a12
  93. stx $a1,[$tab+1*8] ! tab[1]=a1
  94. stx $a2,[$tab+2*8] ! tab[2]=a2
  95. xor $a4,$a8,$a48
  96. stx $a12,[$tab+3*8] ! tab[3]=a1^a2
  97. xor $a4,$a1,$a1
  98. stx $a4,[$tab+4*8] ! tab[4]=a4
  99. xor $a4,$a2,$a2
  100. stx $a1,[$tab+5*8] ! tab[5]=a1^a4
  101. xor $a4,$a12,$a12
  102. stx $a2,[$tab+6*8] ! tab[6]=a2^a4
  103. xor $a48,$a1,$a1
  104. stx $a12,[$tab+7*8] ! tab[7]=a1^a2^a4
  105. xor $a48,$a2,$a2
  106. stx $a8,[$tab+8*8] ! tab[8]=a8
  107. xor $a48,$a12,$a12
  108. stx $a1,[$tab+9*8] ! tab[9]=a1^a8
  109. xor $a4,$a1,$a1
  110. stx $a2,[$tab+10*8] ! tab[10]=a2^a8
  111. xor $a4,$a2,$a2
  112. stx $a12,[$tab+11*8] ! tab[11]=a1^a2^a8
  113. xor $a4,$a12,$a12
  114. stx $a48,[$tab+12*8] ! tab[12]=a4^a8
  115. srlx $lo,1,$hi
  116. stx $a1,[$tab+13*8] ! tab[13]=a1^a4^a8
  117. sllx $lo,63,$lo
  118. stx $a2,[$tab+14*8] ! tab[14]=a2^a4^a8
  119. srlx @i[0],2,@T[0]
  120. stx $a12,[$tab+15*8] ! tab[15]=a1^a2^a4^a8
  121. sllx @i[0],62,$a1
  122. sllx $b,3,@i[0]
  123. srlx @i[1],3,@T[1]
  124. and @i[0],`0xf<<3`,@i[0]
  125. sllx @i[1],61,$a2
  126. ldx [$tab+@i[0]],@i[0]
  127. srlx $b,4-3,@i[1]
  128. xor @T[0],$hi,$hi
  129. and @i[1],`0xf<<3`,@i[1]
  130. xor $a1,$lo,$lo
  131. ldx [$tab+@i[1]],@i[1]
  132. xor @T[1],$hi,$hi
  133. xor @i[0],$lo,$lo
  134. srlx $b,8-3,@i[0]
  135. xor $a2,$lo,$lo
  136. and @i[0],`0xf<<3`,@i[0]
  137. ___
  138. for($n=1;$n<14;$n++) {
  139. $code.=<<___;
  140. sllx @i[1],`$n*4`,@T[0]
  141. ldx [$tab+@i[0]],@i[0]
  142. srlx @i[1],`64-$n*4`,@T[1]
  143. xor @T[0],$lo,$lo
  144. srlx $b,`($n+2)*4`-3,@i[1]
  145. xor @T[1],$hi,$hi
  146. and @i[1],`0xf<<3`,@i[1]
  147. ___
  148. push(@i,shift(@i)); push(@T,shift(@T));
  149. }
  150. $code.=<<___;
  151. sllx @i[1],`$n*4`,@T[0]
  152. ldx [$tab+@i[0]],@i[0]
  153. srlx @i[1],`64-$n*4`,@T[1]
  154. xor @T[0],$lo,$lo
  155. sllx @i[0],`($n+1)*4`,@T[0]
  156. xor @T[1],$hi,$hi
  157. srlx @i[0],`64-($n+1)*4`,@T[1]
  158. xor @T[0],$lo,$lo
  159. xor @T[1],$hi,$hi
  160. srlx $lo,32,%i1
  161. st $lo,[%i0+0]
  162. st %i1,[%i0+4]
  163. srlx $hi,32,%i2
  164. st $hi,[%i0+8]
  165. st %i2,[%i0+12]
  166. ret
  167. restore
  168. .type bn_GF2m_mul_2x2,#function
  169. .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
  170. .asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
  171. .align 4
  172. ___
  173. $code =~ s/\`([^\`]*)\`/eval($1)/gem;
  174. print $code;
  175. close STDOUT or die "error closing STDOUT: $!";