keccak1600-s390x.pl 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. #!/usr/bin/env perl
  2. # Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the OpenSSL license (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  11. # project. The module is, however, dual licensed under OpenSSL and
  12. # CRYPTOGAMS licenses depending on where you obtain it. For further
  13. # details see http://www.openssl.org/~appro/cryptogams/.
  14. # ====================================================================
  15. #
  16. # Keccak-1600 for s390x.
  17. #
  18. # June 2017.
  19. #
  20. # Below code is [lane complementing] KECCAK_2X implementation (see
  21. # sha/keccak1600.c) with C[5] and D[5] held in register bank. Though
  22. # instead of actually unrolling the loop pair-wise I simply flip
  23. # pointers to T[][] and A[][] at the end of round. Since number of
  24. # rounds is even, last round writes to A[][] and everything works out.
  25. # In the nutshell it's transliteration of x86_64 module, because both
  26. # architectures have similar capabilities/limitations. Performance
  27. # measurement is problematic as I don't have access to an idle system.
  28. # It looks like z13 processes one byte [out of long message] in ~14
  29. # cycles. At least the result is consistent with estimate based on
  30. # amount of instruction and assumed instruction issue rate. It's ~2.5x
  31. # faster than compiler-generated code.
  32. $flavour = shift;
  33. if ($flavour =~ /3[12]/) {
  34. $SIZE_T=4;
  35. $g="";
  36. } else {
  37. $SIZE_T=8;
  38. $g="g";
  39. }
  40. while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
  41. open STDOUT,">$output";
  42. my @A = map([ 8*$_, 8*($_+1), 8*($_+2), 8*($_+3), 8*($_+4) ], (0,5,10,15,20));
  43. my @C = map("%r$_",(0,1,5..7));
  44. my @D = map("%r$_",(8..12));
  45. my @T = map("%r$_",(13..14));
  46. my ($src,$dst,$iotas) = map("%r$_",(2..4));
  47. my $sp = "%r15";
  48. $stdframe=16*$SIZE_T+4*8;
  49. $frame=$stdframe+25*8;
  50. my @rhotates = ([ 0, 1, 62, 28, 27 ],
  51. [ 36, 44, 6, 55, 20 ],
  52. [ 3, 10, 43, 25, 39 ],
  53. [ 41, 45, 15, 21, 8 ],
  54. [ 18, 2, 61, 56, 14 ]);
  55. { my @C = @C; # copy, because we mess them up...
  56. my @D = @D;
  57. $code.=<<___;
  58. .text
  59. .type __KeccakF1600,\@function
  60. .align 32
  61. __KeccakF1600:
  62. st${g} %r14,$SIZE_T*14($sp)
  63. lg @C[0],$A[4][0]($src)
  64. lg @C[1],$A[4][1]($src)
  65. lg @C[2],$A[4][2]($src)
  66. lg @C[3],$A[4][3]($src)
  67. lg @C[4],$A[4][4]($src)
  68. larl $iotas,iotas
  69. j .Loop
  70. .align 16
  71. .Loop:
  72. lg @D[0],$A[0][0]($src)
  73. lg @D[1],$A[1][1]($src)
  74. lg @D[2],$A[2][2]($src)
  75. lg @D[3],$A[3][3]($src)
  76. xgr @C[0],@D[0]
  77. xg @C[1],$A[0][1]($src)
  78. xg @C[2],$A[0][2]($src)
  79. xg @C[3],$A[0][3]($src)
  80. lgr @D[4],@C[4]
  81. xg @C[4],$A[0][4]($src)
  82. xg @C[0],$A[1][0]($src)
  83. xgr @C[1],@D[1]
  84. xg @C[2],$A[1][2]($src)
  85. xg @C[3],$A[1][3]($src)
  86. xg @C[4],$A[1][4]($src)
  87. xg @C[0],$A[2][0]($src)
  88. xg @C[1],$A[2][1]($src)
  89. xgr @C[2],@D[2]
  90. xg @C[3],$A[2][3]($src)
  91. xg @C[4],$A[2][4]($src)
  92. xg @C[0],$A[3][0]($src)
  93. xg @C[1],$A[3][1]($src)
  94. xg @C[2],$A[3][2]($src)
  95. xgr @C[3],@D[3]
  96. xg @C[4],$A[3][4]($src)
  97. lgr @T[0],@C[2]
  98. rllg @C[2],@C[2],1
  99. xgr @C[2],@C[0] # D[1] = ROL64(C[2], 1) ^ C[0]
  100. rllg @C[0],@C[0],1
  101. xgr @C[0],@C[3] # D[4] = ROL64(C[0], 1) ^ C[3]
  102. rllg @C[3],@C[3],1
  103. xgr @C[3],@C[1] # D[2] = ROL64(C[3], 1) ^ C[1]
  104. rllg @C[1],@C[1],1
  105. xgr @C[1],@C[4] # D[0] = ROL64(C[1], 1) ^ C[4]
  106. rllg @C[4],@C[4],1
  107. xgr @C[4],@T[0] # D[3] = ROL64(C[4], 1) ^ C[2]
  108. ___
  109. (@D[0..4], @C) = (@C[1..4,0], @D);
  110. $code.=<<___;
  111. xgr @C[1],@D[1]
  112. xgr @C[2],@D[2]
  113. xgr @C[3],@D[3]
  114. rllg @C[1],@C[1],$rhotates[1][1]
  115. xgr @C[4],@D[4]
  116. rllg @C[2],@C[2],$rhotates[2][2]
  117. xgr @C[0],@D[0]
  118. lgr @T[0],@C[1]
  119. ogr @C[1],@C[2]
  120. rllg @C[3],@C[3],$rhotates[3][3]
  121. xgr @C[1],@C[0] # C[0] ^ ( C[1] | C[2])
  122. rllg @C[4],@C[4],$rhotates[4][4]
  123. xg @C[1],0($iotas)
  124. la $iotas,8($iotas)
  125. stg @C[1],$A[0][0]($dst) # R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i]
  126. lgr @T[1],@C[4]
  127. ngr @C[4],@C[3]
  128. lghi @C[1],-1 # no 'not' instruction :-(
  129. xgr @C[4],@C[2] # C[2] ^ ( C[4] & C[3])
  130. xgr @C[2],@C[1] # not @C[2]
  131. stg @C[4],$A[0][2]($dst) # R[0][2] = C[2] ^ ( C[4] & C[3])
  132. ogr @C[2],@C[3]
  133. xgr @C[2],@T[0] # C[1] ^ (~C[2] | C[3])
  134. ngr @T[0],@C[0]
  135. stg @C[2],$A[0][1]($dst) # R[0][1] = C[1] ^ (~C[2] | C[3])
  136. xgr @T[0],@T[1] # C[4] ^ ( C[1] & C[0])
  137. ogr @T[1],@C[0]
  138. stg @T[0],$A[0][4]($dst) # R[0][4] = C[4] ^ ( C[1] & C[0])
  139. xgr @T[1],@C[3] # C[3] ^ ( C[4] | C[0])
  140. stg @T[1],$A[0][3]($dst) # R[0][3] = C[3] ^ ( C[4] | C[0])
  141. lg @C[0],$A[0][3]($src)
  142. lg @C[4],$A[4][2]($src)
  143. lg @C[3],$A[3][1]($src)
  144. lg @C[1],$A[1][4]($src)
  145. lg @C[2],$A[2][0]($src)
  146. xgr @C[0],@D[3]
  147. xgr @C[4],@D[2]
  148. rllg @C[0],@C[0],$rhotates[0][3]
  149. xgr @C[3],@D[1]
  150. rllg @C[4],@C[4],$rhotates[4][2]
  151. xgr @C[1],@D[4]
  152. rllg @C[3],@C[3],$rhotates[3][1]
  153. xgr @C[2],@D[0]
  154. lgr @T[0],@C[0]
  155. ogr @C[0],@C[4]
  156. rllg @C[1],@C[1],$rhotates[1][4]
  157. xgr @C[0],@C[3] # C[3] ^ (C[0] | C[4])
  158. rllg @C[2],@C[2],$rhotates[2][0]
  159. stg @C[0],$A[1][3]($dst) # R[1][3] = C[3] ^ (C[0] | C[4])
  160. lgr @T[1],@C[1]
  161. ngr @C[1],@T[0]
  162. lghi @C[0],-1 # no 'not' instruction :-(
  163. xgr @C[1],@C[4] # C[4] ^ (C[1] & C[0])
  164. xgr @C[4],@C[0] # not @C[4]
  165. stg @C[1],$A[1][4]($dst) # R[1][4] = C[4] ^ (C[1] & C[0])
  166. ogr @C[4],@C[3]
  167. xgr @C[4],@C[2] # C[2] ^ (~C[4] | C[3])
  168. ngr @C[3],@C[2]
  169. stg @C[4],$A[1][2]($dst) # R[1][2] = C[2] ^ (~C[4] | C[3])
  170. xgr @C[3],@T[1] # C[1] ^ (C[3] & C[2])
  171. ogr @T[1],@C[2]
  172. stg @C[3],$A[1][1]($dst) # R[1][1] = C[1] ^ (C[3] & C[2])
  173. xgr @T[1],@T[0] # C[0] ^ (C[1] | C[2])
  174. stg @T[1],$A[1][0]($dst) # R[1][0] = C[0] ^ (C[1] | C[2])
  175. lg @C[2],$A[2][3]($src)
  176. lg @C[3],$A[3][4]($src)
  177. lg @C[1],$A[1][2]($src)
  178. lg @C[4],$A[4][0]($src)
  179. lg @C[0],$A[0][1]($src)
  180. xgr @C[2],@D[3]
  181. xgr @C[3],@D[4]
  182. rllg @C[2],@C[2],$rhotates[2][3]
  183. xgr @C[1],@D[2]
  184. rllg @C[3],@C[3],$rhotates[3][4]
  185. xgr @C[4],@D[0]
  186. rllg @C[1],@C[1],$rhotates[1][2]
  187. xgr @C[0],@D[1]
  188. lgr @T[0],@C[2]
  189. ngr @C[2],@C[3]
  190. rllg @C[4],@C[4],$rhotates[4][0]
  191. xgr @C[2],@C[1] # C[1] ^ ( C[2] & C[3])
  192. lghi @T[1],-1 # no 'not' instruction :-(
  193. stg @C[2],$A[2][1]($dst) # R[2][1] = C[1] ^ ( C[2] & C[3])
  194. xgr @C[3],@T[1] # not @C[3]
  195. lgr @T[1],@C[4]
  196. ngr @C[4],@C[3]
  197. rllg @C[0],@C[0],$rhotates[0][1]
  198. xgr @C[4],@T[0] # C[2] ^ ( C[4] & ~C[3])
  199. ogr @T[0],@C[1]
  200. stg @C[4],$A[2][2]($dst) # R[2][2] = C[2] ^ ( C[4] & ~C[3])
  201. xgr @T[0],@C[0] # C[0] ^ ( C[2] | C[1])
  202. ngr @C[1],@C[0]
  203. stg @T[0],$A[2][0]($dst) # R[2][0] = C[0] ^ ( C[2] | C[1])
  204. xgr @C[1],@T[1] # C[4] ^ ( C[1] & C[0])
  205. ogr @C[0],@T[1]
  206. stg @C[1],$A[2][4]($dst) # R[2][4] = C[4] ^ ( C[1] & C[0])
  207. xgr @C[0],@C[3] # ~C[3] ^ ( C[0] | C[4])
  208. stg @C[0],$A[2][3]($dst) # R[2][3] = ~C[3] ^ ( C[0] | C[4])
  209. lg @C[2],$A[2][1]($src)
  210. lg @C[3],$A[3][2]($src)
  211. lg @C[1],$A[1][0]($src)
  212. lg @C[4],$A[4][3]($src)
  213. lg @C[0],$A[0][4]($src)
  214. xgr @C[2],@D[1]
  215. xgr @C[3],@D[2]
  216. rllg @C[2],@C[2],$rhotates[2][1]
  217. xgr @C[1],@D[0]
  218. rllg @C[3],@C[3],$rhotates[3][2]
  219. xgr @C[4],@D[3]
  220. rllg @C[1],@C[1],$rhotates[1][0]
  221. xgr @C[0],@D[4]
  222. rllg @C[4],@C[4],$rhotates[4][3]
  223. lgr @T[0],@C[2]
  224. ogr @C[2],@C[3]
  225. lghi @T[1],-1 # no 'not' instruction :-(
  226. xgr @C[2],@C[1] # C[1] ^ ( C[2] | C[3])
  227. xgr @C[3],@T[1] # not @C[3]
  228. stg @C[2],$A[3][1]($dst) # R[3][1] = C[1] ^ ( C[2] | C[3])
  229. lgr @T[1],@C[4]
  230. ogr @C[4],@C[3]
  231. rllg @C[0],@C[0],$rhotates[0][4]
  232. xgr @C[4],@T[0] # C[2] ^ ( C[4] | ~C[3])
  233. ngr @T[0],@C[1]
  234. stg @C[4],$A[3][2]($dst) # R[3][2] = C[2] ^ ( C[4] | ~C[3])
  235. xgr @T[0],@C[0] # C[0] ^ ( C[2] & C[1])
  236. ogr @C[1],@C[0]
  237. stg @T[0],$A[3][0]($dst) # R[3][0] = C[0] ^ ( C[2] & C[1])
  238. xgr @C[1],@T[1] # C[4] ^ ( C[1] | C[0])
  239. ngr @C[0],@T[1]
  240. stg @C[1],$A[3][4]($dst) # R[3][4] = C[4] ^ ( C[1] | C[0])
  241. xgr @C[0],@C[3] # ~C[3] ^ ( C[0] & C[4])
  242. stg @C[0],$A[3][3]($dst) # R[3][3] = ~C[3] ^ ( C[0] & C[4])
  243. xg @D[2],$A[0][2]($src)
  244. xg @D[3],$A[1][3]($src)
  245. xg @D[1],$A[4][1]($src)
  246. xg @D[4],$A[2][4]($src)
  247. xgr $dst,$src # xchg $dst,$src
  248. rllg @D[2],@D[2],$rhotates[0][2]
  249. xg @D[0],$A[3][0]($src)
  250. rllg @D[3],@D[3],$rhotates[1][3]
  251. xgr $src,$dst
  252. rllg @D[1],@D[1],$rhotates[4][1]
  253. xgr $dst,$src
  254. rllg @D[4],@D[4],$rhotates[2][4]
  255. ___
  256. @C = @D[2..4,0,1];
  257. $code.=<<___;
  258. lgr @T[0],@C[0]
  259. ngr @C[0],@C[1]
  260. lghi @T[1],-1 # no 'not' instruction :-(
  261. xgr @C[0],@C[4] # C[4] ^ ( C[0] & C[1])
  262. xgr @C[1],@T[1] # not @C[1]
  263. stg @C[0],$A[4][4]($src) # R[4][4] = C[4] ^ ( C[0] & C[1])
  264. lgr @T[1],@C[2]
  265. ngr @C[2],@C[1]
  266. rllg @D[0],@D[0],$rhotates[3][0]
  267. xgr @C[2],@T[0] # C[0] ^ ( C[2] & ~C[1])
  268. ogr @T[0],@C[4]
  269. stg @C[2],$A[4][0]($src) # R[4][0] = C[0] ^ ( C[2] & ~C[1])
  270. xgr @T[0],@C[3] # C[3] ^ ( C[0] | C[4])
  271. ngr @C[4],@C[3]
  272. stg @T[0],$A[4][3]($src) # R[4][3] = C[3] ^ ( C[0] | C[4])
  273. xgr @C[4],@T[1] # C[2] ^ ( C[4] & C[3])
  274. ogr @C[3],@T[1]
  275. stg @C[4],$A[4][2]($src) # R[4][2] = C[2] ^ ( C[4] & C[3])
  276. xgr @C[3],@C[1] # ~C[1] ^ ( C[2] | C[3])
  277. lgr @C[1],@C[0] # harmonize with the loop top
  278. lgr @C[0],@T[0]
  279. stg @C[3],$A[4][1]($src) # R[4][1] = ~C[1] ^ ( C[2] | C[3])
  280. tmll $iotas,255
  281. jnz .Loop
  282. l${g} %r14,$SIZE_T*14($sp)
  283. br %r14
  284. .size __KeccakF1600,.-__KeccakF1600
  285. ___
  286. }
  287. {
  288. $code.=<<___;
  289. .type KeccakF1600,\@function
  290. .align 32
  291. KeccakF1600:
  292. .LKeccakF1600:
  293. lghi %r1,-$frame
  294. stm${g} %r6,%r15,$SIZE_T*6($sp)
  295. lgr %r0,$sp
  296. la $sp,0(%r1,$sp)
  297. st${g} %r0,0($sp)
  298. lghi @D[0],-1 # no 'not' instruction :-(
  299. lghi @D[1],-1
  300. lghi @D[2],-1
  301. lghi @D[3],-1
  302. lghi @D[4],-1
  303. lghi @T[0],-1
  304. xg @D[0],$A[0][1]($src)
  305. xg @D[1],$A[0][2]($src)
  306. xg @D[2],$A[1][3]($src)
  307. xg @D[3],$A[2][2]($src)
  308. xg @D[4],$A[3][2]($src)
  309. xg @T[0],$A[4][0]($src)
  310. stmg @D[0],@D[1],$A[0][1]($src)
  311. stg @D[2],$A[1][3]($src)
  312. stg @D[3],$A[2][2]($src)
  313. stg @D[4],$A[3][2]($src)
  314. stg @T[0],$A[4][0]($src)
  315. la $dst,$stdframe($sp)
  316. bras %r14,__KeccakF1600
  317. lghi @D[0],-1 # no 'not' instruction :-(
  318. lghi @D[1],-1
  319. lghi @D[2],-1
  320. lghi @D[3],-1
  321. lghi @D[4],-1
  322. lghi @T[0],-1
  323. xg @D[0],$A[0][1]($src)
  324. xg @D[1],$A[0][2]($src)
  325. xg @D[2],$A[1][3]($src)
  326. xg @D[3],$A[2][2]($src)
  327. xg @D[4],$A[3][2]($src)
  328. xg @T[0],$A[4][0]($src)
  329. stmg @D[0],@D[1],$A[0][1]($src)
  330. stg @D[2],$A[1][3]($src)
  331. stg @D[3],$A[2][2]($src)
  332. stg @D[4],$A[3][2]($src)
  333. stg @T[0],$A[4][0]($src)
  334. lm${g} %r6,%r15,$frame+6*$SIZE_T($sp)
  335. br %r14
  336. .size KeccakF1600,.-KeccakF1600
  337. ___
  338. }
  339. { my ($A_flat,$inp,$len,$bsz) = map("%r$_",(2..5));
  340. $code.=<<___;
  341. .globl SHA3_absorb
  342. .type SHA3_absorb,\@function
  343. .align 32
  344. SHA3_absorb:
  345. lghi %r1,-$frame
  346. stm${g} %r5,%r15,$SIZE_T*5($sp)
  347. lgr %r0,$sp
  348. la $sp,0(%r1,$sp)
  349. st${g} %r0,0($sp)
  350. lghi @D[0],-1 # no 'not' instruction :-(
  351. lghi @D[1],-1
  352. lghi @D[2],-1
  353. lghi @D[3],-1
  354. lghi @D[4],-1
  355. lghi @T[0],-1
  356. xg @D[0],$A[0][1]($src)
  357. xg @D[1],$A[0][2]($src)
  358. xg @D[2],$A[1][3]($src)
  359. xg @D[3],$A[2][2]($src)
  360. xg @D[4],$A[3][2]($src)
  361. xg @T[0],$A[4][0]($src)
  362. stmg @D[0],@D[1],$A[0][1]($src)
  363. stg @D[2],$A[1][3]($src)
  364. stg @D[3],$A[2][2]($src)
  365. stg @D[4],$A[3][2]($src)
  366. stg @T[0],$A[4][0]($src)
  367. .Loop_absorb:
  368. cl${g}r $len,$bsz
  369. jl .Ldone_absorb
  370. srl${g} $bsz,3
  371. la %r1,0($A_flat)
  372. .Lblock_absorb:
  373. lrvg %r0,0($inp)
  374. la $inp,8($inp)
  375. xg %r0,0(%r1)
  376. a${g}hi $len,-8
  377. stg %r0,0(%r1)
  378. la %r1,8(%r1)
  379. brct $bsz,.Lblock_absorb
  380. stm${g} $inp,$len,$frame+3*$SIZE_T($sp)
  381. la $dst,$stdframe($sp)
  382. bras %r14,__KeccakF1600
  383. lm${g} $inp,$bsz,$frame+3*$SIZE_T($sp)
  384. j .Loop_absorb
  385. .align 16
  386. .Ldone_absorb:
  387. lghi @D[0],-1 # no 'not' instruction :-(
  388. lghi @D[1],-1
  389. lghi @D[2],-1
  390. lghi @D[3],-1
  391. lghi @D[4],-1
  392. lghi @T[0],-1
  393. xg @D[0],$A[0][1]($src)
  394. xg @D[1],$A[0][2]($src)
  395. xg @D[2],$A[1][3]($src)
  396. xg @D[3],$A[2][2]($src)
  397. xg @D[4],$A[3][2]($src)
  398. xg @T[0],$A[4][0]($src)
  399. stmg @D[0],@D[1],$A[0][1]($src)
  400. stg @D[2],$A[1][3]($src)
  401. stg @D[3],$A[2][2]($src)
  402. stg @D[4],$A[3][2]($src)
  403. stg @T[0],$A[4][0]($src)
  404. lgr %r2,$len # return value
  405. lm${g} %r6,%r15,$frame+6*$SIZE_T($sp)
  406. br %r14
  407. .size SHA3_absorb,.-SHA3_absorb
  408. ___
  409. }
  410. { my ($A_flat,$out,$len,$bsz) = map("%r$_",(2..5));
  411. $code.=<<___;
  412. .globl SHA3_squeeze
  413. .type SHA3_squeeze,\@function
  414. .align 32
  415. SHA3_squeeze:
  416. srl${g} $bsz,3
  417. st${g} %r14,2*$SIZE_T($sp)
  418. lghi %r14,8
  419. st${g} $bsz,5*$SIZE_T($sp)
  420. la %r1,0($A_flat)
  421. j .Loop_squeeze
  422. .align 16
  423. .Loop_squeeze:
  424. cl${g}r $len,%r14
  425. jl .Ltail_squeeze
  426. lrvg %r0,0(%r1)
  427. la %r1,8(%r1)
  428. stg %r0,0($out)
  429. la $out,8($out)
  430. a${g}hi $len,-8 # len -= 8
  431. jz .Ldone_squeeze
  432. brct $bsz,.Loop_squeeze # bsz--
  433. stm${g} $out,$len,3*$SIZE_T($sp)
  434. bras %r14,.LKeccakF1600
  435. lm${g} $out,$bsz,3*$SIZE_T($sp)
  436. lghi %r14,8
  437. la %r1,0($A_flat)
  438. j .Loop_squeeze
  439. .Ltail_squeeze:
  440. lg %r0,0(%r1)
  441. .Loop_tail_squeeze:
  442. stc %r0,0($out)
  443. la $out,1($out)
  444. srlg %r0,8
  445. brct $len,.Loop_tail_squeeze
  446. .Ldone_squeeze:
  447. l${g} %r14,2*$SIZE_T($sp)
  448. br %r14
  449. .size SHA3_squeeze,.-SHA3_squeeze
  450. ___
  451. }
  452. $code.=<<___;
  453. .align 256
  454. .quad 0,0,0,0,0,0,0,0
  455. .type iotas,\@object
  456. iotas:
  457. .quad 0x0000000000000001
  458. .quad 0x0000000000008082
  459. .quad 0x800000000000808a
  460. .quad 0x8000000080008000
  461. .quad 0x000000000000808b
  462. .quad 0x0000000080000001
  463. .quad 0x8000000080008081
  464. .quad 0x8000000000008009
  465. .quad 0x000000000000008a
  466. .quad 0x0000000000000088
  467. .quad 0x0000000080008009
  468. .quad 0x000000008000000a
  469. .quad 0x000000008000808b
  470. .quad 0x800000000000008b
  471. .quad 0x8000000000008089
  472. .quad 0x8000000000008003
  473. .quad 0x8000000000008002
  474. .quad 0x8000000000000080
  475. .quad 0x000000000000800a
  476. .quad 0x800000008000000a
  477. .quad 0x8000000080008081
  478. .quad 0x8000000000008080
  479. .quad 0x0000000080000001
  480. .quad 0x8000000080008008
  481. .size iotas,.-iotas
  482. .asciz "Keccak-1600 absorb and squeeze for s390x, CRYPTOGAMS by <appro\@openssl.org>"
  483. ___
  484. # unlike 32-bit shift 64-bit one takes three arguments
  485. $code =~ s/(srlg\s+)(%r[0-9]+),/$1$2,$2,/gm;
  486. print $code;
  487. close STDOUT;