2
0

ecp_nistp384-ppc64.pl 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. #! /usr/bin/env perl
  2. # Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Rohan McLure <rmclure@linux.ibm.com> for the OpenSSL
  11. # project.
  12. # ====================================================================
  13. #
  14. # p384 lower-level primitives for PPC64 using vector instructions.
  15. #
  16. use strict;
  17. use warnings;
  18. my $flavour = shift;
  19. my $output = "";
  20. while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
  21. if (!$output) {
  22. $output = "-";
  23. }
  24. my ($xlate, $dir);
  25. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  26. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  27. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  28. die "can't locate ppc-xlate.pl";
  29. open OUT,"| \"$^X\" $xlate $flavour $output";
  30. *STDOUT=*OUT;
  31. my $code = "";
  32. my ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12");
  33. my $vzero = "v32";
  34. sub startproc($)
  35. {
  36. my ($name) = @_;
  37. $code.=<<___;
  38. .globl ${name}
  39. .align 5
  40. ${name}:
  41. ___
  42. }
  43. sub endproc($)
  44. {
  45. my ($name) = @_;
  46. $code.=<<___;
  47. blr
  48. .size ${name},.-${name}
  49. ___
  50. }
  51. sub load_vrs($$)
  52. {
  53. my ($pointer, $reg_list) = @_;
  54. for (my $i = 0; $i <= 6; $i++) {
  55. my $offset = $i * 8;
  56. $code.=<<___;
  57. lxsd $reg_list->[$i],$offset($pointer)
  58. ___
  59. }
  60. $code.=<<___;
  61. ___
  62. }
  63. sub store_vrs($$)
  64. {
  65. my ($pointer, $reg_list) = @_;
  66. for (my $i = 0; $i <= 12; $i++) {
  67. my $offset = $i * 16;
  68. $code.=<<___;
  69. stxv $reg_list->[$i],$offset($pointer)
  70. ___
  71. }
  72. $code.=<<___;
  73. ___
  74. }
  75. $code.=<<___;
  76. .machine "any"
  77. .text
  78. ___
  79. {
  80. # mul/square common
  81. my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v42", "v43");
  82. my ($zero, $one) = ("r8", "r9");
  83. my $out = "v51";
  84. {
  85. #
  86. # p384_felem_mul
  87. #
  88. my ($in1p, $in2p) = ("r4", "r5");
  89. my @in1 = map("v$_",(44..50));
  90. my @in2 = map("v$_",(35..41));
  91. startproc("p384_felem_mul");
  92. $code.=<<___;
  93. vspltisw $vzero,0
  94. ___
  95. load_vrs($in1p, \@in1);
  96. load_vrs($in2p, \@in2);
  97. $code.=<<___;
  98. vmsumudm $out,$in1[0],$in2[0],$vzero
  99. stxv $out,0($outp)
  100. xxpermdi $t1,$in1[0],$in1[1],0b00
  101. xxpermdi $t2,$in2[1],$in2[0],0b00
  102. vmsumudm $out,$t1,$t2,$vzero
  103. stxv $out,16($outp)
  104. xxpermdi $t2,$in2[2],$in2[1],0b00
  105. vmsumudm $out,$t1,$t2,$vzero
  106. vmsumudm $out,$in1[2],$in2[0],$out
  107. stxv $out,32($outp)
  108. xxpermdi $t2,$in2[1],$in2[0],0b00
  109. xxpermdi $t3,$in1[2],$in1[3],0b00
  110. xxpermdi $t4,$in2[3],$in2[2],0b00
  111. vmsumudm $out,$t1,$t4,$vzero
  112. vmsumudm $out,$t3,$t2,$out
  113. stxv $out,48($outp)
  114. xxpermdi $t2,$in2[4],$in2[3],0b00
  115. xxpermdi $t4,$in2[2],$in2[1],0b00
  116. vmsumudm $out,$t1,$t2,$vzero
  117. vmsumudm $out,$t3,$t4,$out
  118. vmsumudm $out,$in1[4],$in2[0],$out
  119. stxv $out,64($outp)
  120. xxpermdi $t2,$in2[5],$in2[4],0b00
  121. xxpermdi $t4,$in2[3],$in2[2],0b00
  122. vmsumudm $out,$t1,$t2,$vzero
  123. vmsumudm $out,$t3,$t4,$out
  124. xxpermdi $t4,$in2[1],$in2[0],0b00
  125. xxpermdi $t1,$in1[4],$in1[5],0b00
  126. vmsumudm $out,$t1,$t4,$out
  127. stxv $out,80($outp)
  128. xxpermdi $t1,$in1[0],$in1[1],0b00
  129. xxpermdi $t2,$in2[6],$in2[5],0b00
  130. xxpermdi $t4,$in2[4],$in2[3],0b00
  131. vmsumudm $out,$t1,$t2,$vzero
  132. vmsumudm $out,$t3,$t4,$out
  133. xxpermdi $t2,$in2[2],$in2[1],0b00
  134. xxpermdi $t1,$in1[4],$in1[5],0b00
  135. vmsumudm $out,$t1,$t2,$out
  136. vmsumudm $out,$in1[6],$in2[0],$out
  137. stxv $out,96($outp)
  138. xxpermdi $t1,$in1[1],$in1[2],0b00
  139. xxpermdi $t2,$in2[6],$in2[5],0b00
  140. xxpermdi $t3,$in1[3],$in1[4],0b00
  141. vmsumudm $out,$t1,$t2,$vzero
  142. vmsumudm $out,$t3,$t4,$out
  143. xxpermdi $t3,$in2[2],$in2[1],0b00
  144. xxpermdi $t1,$in1[5],$in1[6],0b00
  145. vmsumudm $out,$t1,$t3,$out
  146. stxv $out,112($outp)
  147. xxpermdi $t1,$in1[2],$in1[3],0b00
  148. xxpermdi $t3,$in1[4],$in1[5],0b00
  149. vmsumudm $out,$t1,$t2,$vzero
  150. vmsumudm $out,$t3,$t4,$out
  151. vmsumudm $out,$in1[6],$in2[2],$out
  152. stxv $out,128($outp)
  153. xxpermdi $t1,$in1[3],$in1[4],0b00
  154. vmsumudm $out,$t1,$t2,$vzero
  155. xxpermdi $t1,$in1[5],$in1[6],0b00
  156. vmsumudm $out,$t1,$t4,$out
  157. stxv $out,144($outp)
  158. vmsumudm $out,$t3,$t2,$vzero
  159. vmsumudm $out,$in1[6],$in2[4],$out
  160. stxv $out,160($outp)
  161. vmsumudm $out,$t1,$t2,$vzero
  162. stxv $out,176($outp)
  163. vmsumudm $out,$in1[6],$in2[6],$vzero
  164. stxv $out,192($outp)
  165. ___
  166. endproc("p384_felem_mul");
  167. }
  168. {
  169. #
  170. # p384_felem_square
  171. #
  172. my ($inp) = ("r4");
  173. my @in = map("v$_",(44..50));
  174. my @inx2 = map("v$_",(35..41));
  175. startproc("p384_felem_square");
  176. $code.=<<___;
  177. vspltisw $vzero,0
  178. ___
  179. load_vrs($inp, \@in);
  180. $code.=<<___;
  181. li $zero,0
  182. li $one,1
  183. mtvsrdd $t1,$one,$zero
  184. ___
  185. for (my $i = 0; $i <= 6; $i++) {
  186. $code.=<<___;
  187. vsld $inx2[$i],$in[$i],$t1
  188. ___
  189. }
  190. $code.=<<___;
  191. vmsumudm $out,$in[0],$in[0],$vzero
  192. stxv $out,0($outp)
  193. vmsumudm $out,$in[0],$inx2[1],$vzero
  194. stxv $out,16($outp)
  195. vmsumudm $out,$in[0],$inx2[2],$vzero
  196. vmsumudm $out,$in[1],$in[1],$out
  197. stxv $out,32($outp)
  198. xxpermdi $t1,$in[0],$in[1],0b00
  199. xxpermdi $t2,$inx2[3],$inx2[2],0b00
  200. vmsumudm $out,$t1,$t2,$vzero
  201. stxv $out,48($outp)
  202. xxpermdi $t4,$inx2[4],$inx2[3],0b00
  203. vmsumudm $out,$t1,$t4,$vzero
  204. vmsumudm $out,$in[2],$in[2],$out
  205. stxv $out,64($outp)
  206. xxpermdi $t2,$inx2[5],$inx2[4],0b00
  207. vmsumudm $out,$t1,$t2,$vzero
  208. vmsumudm $out,$in[2],$inx2[3],$out
  209. stxv $out,80($outp)
  210. xxpermdi $t2,$inx2[6],$inx2[5],0b00
  211. vmsumudm $out,$t1,$t2,$vzero
  212. vmsumudm $out,$in[2],$inx2[4],$out
  213. vmsumudm $out,$in[3],$in[3],$out
  214. stxv $out,96($outp)
  215. xxpermdi $t3,$in[1],$in[2],0b00
  216. vmsumudm $out,$t3,$t2,$vzero
  217. vmsumudm $out,$in[3],$inx2[4],$out
  218. stxv $out,112($outp)
  219. xxpermdi $t1,$in[2],$in[3],0b00
  220. vmsumudm $out,$t1,$t2,$vzero
  221. vmsumudm $out,$in[4],$in[4],$out
  222. stxv $out,128($outp)
  223. xxpermdi $t1,$in[3],$in[4],0b00
  224. vmsumudm $out,$t1,$t2,$vzero
  225. stxv $out,144($outp)
  226. vmsumudm $out,$in[4],$inx2[6],$vzero
  227. vmsumudm $out,$in[5],$in[5],$out
  228. stxv $out,160($outp)
  229. vmsumudm $out,$in[5],$inx2[6],$vzero
  230. stxv $out,176($outp)
  231. vmsumudm $out,$in[6],$in[6],$vzero
  232. stxv $out,192($outp)
  233. ___
  234. endproc("p384_felem_square");
  235. }
  236. }
  237. $code =~ s/\`([^\`]*)\`/eval $1/gem;
  238. print $code;
  239. close STDOUT or die "error closing STDOUT: $!";