rc4-s390x.pl 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. #! /usr/bin/env perl
  2. # Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  11. # project. The module is, however, dual licensed under OpenSSL and
  12. # CRYPTOGAMS licenses depending on where you obtain it. For further
  13. # details see http://www.openssl.org/~appro/cryptogams/.
  14. # ====================================================================
  15. #
  16. # February 2009
  17. #
  18. # Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to
  19. # "cluster" Address Generation Interlocks, so that one pipeline stall
  20. # resolves several dependencies.
  21. # November 2010.
  22. #
  23. # Adapt for -m31 build. If kernel supports what's called "highgprs"
  24. # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
  25. # instructions and achieve "64-bit" performance even in 31-bit legacy
  26. # application context. The feature is not specific to any particular
  27. # processor, as long as it's "z-CPU". Latter implies that the code
  28. # remains z/Architecture specific. On z990 it was measured to perform
  29. # 50% better than code generated by gcc 4.3.
  30. # $output is the last argument if it looks like a file (it has an extension)
  31. # $flavour is the first argument if it doesn't look like a file
  32. $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  33. $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  34. if ($flavour =~ /3[12]/) {
  35. $SIZE_T=4;
  36. $g="";
  37. } else {
  38. $SIZE_T=8;
  39. $g="g";
  40. }
  41. $output and open STDOUT,">$output";
  42. $rp="%r14";
  43. $sp="%r15";
  44. $code=<<___;
  45. .text
  46. ___
  47. # void RC4(RC4_KEY *key,size_t len,const void *inp,void *out)
  48. {
  49. $acc="%r0";
  50. $cnt="%r1";
  51. $key="%r2";
  52. $len="%r3";
  53. $inp="%r4";
  54. $out="%r5";
  55. @XX=("%r6","%r7");
  56. @TX=("%r8","%r9");
  57. $YY="%r10";
  58. $TY="%r11";
  59. $code.=<<___;
  60. .globl RC4
  61. .type RC4,\@function
  62. .align 64
  63. RC4:
  64. stm${g} %r6,%r11,6*$SIZE_T($sp)
  65. ___
  66. $code.=<<___ if ($flavour =~ /3[12]/);
  67. llgfr $len,$len
  68. ___
  69. $code.=<<___;
  70. llgc $XX[0],0($key)
  71. llgc $YY,1($key)
  72. la $XX[0],1($XX[0])
  73. nill $XX[0],0xff
  74. srlg $cnt,$len,3
  75. ltgr $cnt,$cnt
  76. llgc $TX[0],2($XX[0],$key)
  77. jz .Lshort
  78. j .Loop8
  79. .align 64
  80. .Loop8:
  81. ___
  82. for ($i=0;$i<8;$i++) {
  83. $code.=<<___;
  84. la $YY,0($YY,$TX[0]) # $i
  85. nill $YY,255
  86. la $XX[1],1($XX[0])
  87. nill $XX[1],255
  88. ___
  89. $code.=<<___ if ($i==1);
  90. llgc $acc,2($TY,$key)
  91. ___
  92. $code.=<<___ if ($i>1);
  93. sllg $acc,$acc,8
  94. ic $acc,2($TY,$key)
  95. ___
  96. $code.=<<___;
  97. llgc $TY,2($YY,$key)
  98. stc $TX[0],2($YY,$key)
  99. llgc $TX[1],2($XX[1],$key)
  100. stc $TY,2($XX[0],$key)
  101. cr $XX[1],$YY
  102. jne .Lcmov$i
  103. la $TX[1],0($TX[0])
  104. .Lcmov$i:
  105. la $TY,0($TY,$TX[0])
  106. nill $TY,255
  107. ___
  108. push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
  109. }
  110. $code.=<<___;
  111. lg $TX[1],0($inp)
  112. sllg $acc,$acc,8
  113. la $inp,8($inp)
  114. ic $acc,2($TY,$key)
  115. xgr $acc,$TX[1]
  116. stg $acc,0($out)
  117. la $out,8($out)
  118. brctg $cnt,.Loop8
  119. .Lshort:
  120. lghi $acc,7
  121. ngr $len,$acc
  122. jz .Lexit
  123. j .Loop1
  124. .align 16
  125. .Loop1:
  126. la $YY,0($YY,$TX[0])
  127. nill $YY,255
  128. llgc $TY,2($YY,$key)
  129. stc $TX[0],2($YY,$key)
  130. stc $TY,2($XX[0],$key)
  131. ar $TY,$TX[0]
  132. ahi $XX[0],1
  133. nill $TY,255
  134. nill $XX[0],255
  135. llgc $acc,0($inp)
  136. la $inp,1($inp)
  137. llgc $TY,2($TY,$key)
  138. llgc $TX[0],2($XX[0],$key)
  139. xr $acc,$TY
  140. stc $acc,0($out)
  141. la $out,1($out)
  142. brct $len,.Loop1
  143. .Lexit:
  144. ahi $XX[0],-1
  145. stc $XX[0],0($key)
  146. stc $YY,1($key)
  147. lm${g} %r6,%r11,6*$SIZE_T($sp)
  148. br $rp
  149. .size RC4,.-RC4
  150. .string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
  151. ___
  152. }
  153. # void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp)
  154. {
  155. $cnt="%r0";
  156. $idx="%r1";
  157. $key="%r2";
  158. $len="%r3";
  159. $inp="%r4";
  160. $acc="%r5";
  161. $dat="%r6";
  162. $ikey="%r7";
  163. $iinp="%r8";
  164. $code.=<<___;
  165. .globl RC4_set_key
  166. .type RC4_set_key,\@function
  167. .align 64
  168. RC4_set_key:
  169. stm${g} %r6,%r8,6*$SIZE_T($sp)
  170. lhi $cnt,256
  171. la $idx,0
  172. sth $idx,0($key)
  173. .align 4
  174. .L1stloop:
  175. stc $idx,2($idx,$key)
  176. la $idx,1($idx)
  177. brct $cnt,.L1stloop
  178. lghi $ikey,-256
  179. lr $cnt,$len
  180. la $iinp,0
  181. la $idx,0
  182. .align 16
  183. .L2ndloop:
  184. llgc $acc,2+256($ikey,$key)
  185. llgc $dat,0($iinp,$inp)
  186. la $idx,0($idx,$acc)
  187. la $ikey,1($ikey)
  188. la $idx,0($idx,$dat)
  189. nill $idx,255
  190. la $iinp,1($iinp)
  191. tml $ikey,255
  192. llgc $dat,2($idx,$key)
  193. stc $dat,2+256-1($ikey,$key)
  194. stc $acc,2($idx,$key)
  195. jz .Ldone
  196. brct $cnt,.L2ndloop
  197. lr $cnt,$len
  198. la $iinp,0
  199. j .L2ndloop
  200. .Ldone:
  201. lm${g} %r6,%r8,6*$SIZE_T($sp)
  202. br $rp
  203. .size RC4_set_key,.-RC4_set_key
  204. ___
  205. }
  206. # const char *RC4_options()
  207. $code.=<<___;
  208. .globl RC4_options
  209. .type RC4_options,\@function
  210. .align 16
  211. RC4_options:
  212. larl %r2,.Loptions
  213. br %r14
  214. .size RC4_options,.-RC4_options
  215. .section .rodata
  216. .Loptions:
  217. .align 8
  218. .string "rc4(8x,char)"
  219. ___
  220. print $code;
  221. close STDOUT or die "error closing STDOUT: $!"; # force flush