sha256-riscv64-zvkb-zvknha_or_zvknhb.pl 11 KB


  1. #! /usr/bin/env perl
  2. # This file is dual-licensed, meaning that you can use it under your
  3. # choice of either of the following two licenses:
  4. #
  5. # Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
  6. #
  7. # Licensed under the Apache License 2.0 (the "License"). You can obtain
  8. # a copy in the file LICENSE in the source distribution or at
  9. # https://www.openssl.org/source/license.html
  10. #
  11. # or
  12. #
  13. # Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
  14. # Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
  15. # All rights reserved.
  16. #
  17. # Redistribution and use in source and binary forms, with or without
  18. # modification, are permitted provided that the following conditions
  19. # are met:
  20. # 1. Redistributions of source code must retain the above copyright
  21. # notice, this list of conditions and the following disclaimer.
  22. # 2. Redistributions in binary form must reproduce the above copyright
  23. # notice, this list of conditions and the following disclaimer in the
  24. # documentation and/or other materials provided with the distribution.
  25. #
  26. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  27. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  28. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  29. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  30. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  31. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  32. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  33. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  34. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  35. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  36. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  37. # The generated code of this file depends on the following RISC-V extensions:
  38. # - RV64I
  39. # - RISC-V Vector ('V') with VLEN >= 128
  40. # - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
  41. # - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb')
  42. use strict;
  43. use warnings;
  44. use FindBin qw($Bin);
  45. use lib "$Bin";
  46. use lib "$Bin/../../perlasm";
  47. use riscv;
  48. # $output is the last argument if it looks like a file (it has an extension)
  49. # $flavour is the first argument if it doesn't look like a file
  50. my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  51. my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  52. $output and open STDOUT,">$output";
  53. my $code=<<___;
  54. .text
  55. ___
  56. my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7,
  57. $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15,
  58. $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23,
  59. $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31,
  60. ) = map("v$_",(0..31));
  61. my $K256 = "K256";
  62. # Function arguments
  63. my ($H, $INP, $LEN, $KT, $H2, $INDEX_PATTERN) = ("a0", "a1", "a2", "a3", "t3", "t4");
  64. sub sha_256_load_constant {
  65. my $code=<<___;
  66. la $KT, $K256 # Load round constants K256
  67. @{[vle32_v $V10, $KT]}
  68. addi $KT, $KT, 16
  69. @{[vle32_v $V11, $KT]}
  70. addi $KT, $KT, 16
  71. @{[vle32_v $V12, $KT]}
  72. addi $KT, $KT, 16
  73. @{[vle32_v $V13, $KT]}
  74. addi $KT, $KT, 16
  75. @{[vle32_v $V14, $KT]}
  76. addi $KT, $KT, 16
  77. @{[vle32_v $V15, $KT]}
  78. addi $KT, $KT, 16
  79. @{[vle32_v $V16, $KT]}
  80. addi $KT, $KT, 16
  81. @{[vle32_v $V17, $KT]}
  82. addi $KT, $KT, 16
  83. @{[vle32_v $V18, $KT]}
  84. addi $KT, $KT, 16
  85. @{[vle32_v $V19, $KT]}
  86. addi $KT, $KT, 16
  87. @{[vle32_v $V20, $KT]}
  88. addi $KT, $KT, 16
  89. @{[vle32_v $V21, $KT]}
  90. addi $KT, $KT, 16
  91. @{[vle32_v $V22, $KT]}
  92. addi $KT, $KT, 16
  93. @{[vle32_v $V23, $KT]}
  94. addi $KT, $KT, 16
  95. @{[vle32_v $V24, $KT]}
  96. addi $KT, $KT, 16
  97. @{[vle32_v $V25, $KT]}
  98. ___
  99. return $code;
  100. }
  101. ################################################################################
  102. # void sha256_block_data_order_zvkb_zvknha_or_zvknhb(void *c, const void *p, size_t len)
  103. $code .= <<___;
  104. .p2align 2
  105. .globl sha256_block_data_order_zvkb_zvknha_or_zvknhb
  106. .type sha256_block_data_order_zvkb_zvknha_or_zvknhb,\@function
  107. sha256_block_data_order_zvkb_zvknha_or_zvknhb:
  108. @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
  109. @{[sha_256_load_constant]}
  110. # H is stored as {a,b,c,d},{e,f,g,h}, but we need {f,e,b,a},{h,g,d,c}
  111. # The dst vtype is e32m1 and the index vtype is e8mf4.
  112. # We use index-load with the following index pattern at v26.
  113. # i8 index:
  114. # 20, 16, 4, 0
  115. # Instead of setting the i8 index, we could use a single 32bit
  116. # little-endian value to cover the 4xi8 index.
  117. # i32 value:
  118. # 0x 00 04 10 14
  119. li $INDEX_PATTERN, 0x00041014
  120. @{[vsetivli "zero", 1, "e32", "m1", "ta", "ma"]}
  121. @{[vmv_v_x $V26, $INDEX_PATTERN]}
  122. addi $H2, $H, 8
  123. # Use index-load to get {f,e,b,a},{h,g,d,c}
  124. @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
  125. @{[vluxei8_v $V6, $H, $V26]}
  126. @{[vluxei8_v $V7, $H2, $V26]}
  127. # Setup v0 mask for the vmerge to replace the first word (idx==0) in key-scheduling.
  128. # The AVL is 4 in SHA, so we could use a single e8(8 element masking) for masking.
  129. @{[vsetivli "zero", 1, "e8", "m1", "ta", "ma"]}
  130. @{[vmv_v_i $V0, 0x01]}
  131. @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
  132. L_round_loop:
  133. # Decrement length by 1
  134. add $LEN, $LEN, -1
  135. # Keep the current state as we need it later: H' = H+{a',b',c',...,h'}.
  136. @{[vmv_v_v $V30, $V6]}
  137. @{[vmv_v_v $V31, $V7]}
  138. # Load the 512-bits of the message block in v1-v4 and perform
  139. # an endian swap on each 4 bytes element.
  140. @{[vle32_v $V1, $INP]}
  141. @{[vrev8_v $V1, $V1]}
  142. add $INP, $INP, 16
  143. @{[vle32_v $V2, $INP]}
  144. @{[vrev8_v $V2, $V2]}
  145. add $INP, $INP, 16
  146. @{[vle32_v $V3, $INP]}
  147. @{[vrev8_v $V3, $V3]}
  148. add $INP, $INP, 16
  149. @{[vle32_v $V4, $INP]}
  150. @{[vrev8_v $V4, $V4]}
  151. add $INP, $INP, 16
  152. # Quad-round 0 (+0, Wt from oldest to newest in v1->v2->v3->v4)
  153. @{[vadd_vv $V5, $V10, $V1]}
  154. @{[vsha2cl_vv $V7, $V6, $V5]}
  155. @{[vsha2ch_vv $V6, $V7, $V5]}
  156. @{[vmerge_vvm $V5, $V3, $V2, $V0]}
  157. @{[vsha2ms_vv $V1, $V5, $V4]} # Generate W[19:16]
  158. # Quad-round 1 (+1, v2->v3->v4->v1)
  159. @{[vadd_vv $V5, $V11, $V2]}
  160. @{[vsha2cl_vv $V7, $V6, $V5]}
  161. @{[vsha2ch_vv $V6, $V7, $V5]}
  162. @{[vmerge_vvm $V5, $V4, $V3, $V0]}
  163. @{[vsha2ms_vv $V2, $V5, $V1]} # Generate W[23:20]
  164. # Quad-round 2 (+2, v3->v4->v1->v2)
  165. @{[vadd_vv $V5, $V12, $V3]}
  166. @{[vsha2cl_vv $V7, $V6, $V5]}
  167. @{[vsha2ch_vv $V6, $V7, $V5]}
  168. @{[vmerge_vvm $V5, $V1, $V4, $V0]}
  169. @{[vsha2ms_vv $V3, $V5, $V2]} # Generate W[27:24]
  170. # Quad-round 3 (+3, v4->v1->v2->v3)
  171. @{[vadd_vv $V5, $V13, $V4]}
  172. @{[vsha2cl_vv $V7, $V6, $V5]}
  173. @{[vsha2ch_vv $V6, $V7, $V5]}
  174. @{[vmerge_vvm $V5, $V2, $V1, $V0]}
  175. @{[vsha2ms_vv $V4, $V5, $V3]} # Generate W[31:28]
  176. # Quad-round 4 (+0, v1->v2->v3->v4)
  177. @{[vadd_vv $V5, $V14, $V1]}
  178. @{[vsha2cl_vv $V7, $V6, $V5]}
  179. @{[vsha2ch_vv $V6, $V7, $V5]}
  180. @{[vmerge_vvm $V5, $V3, $V2, $V0]}
  181. @{[vsha2ms_vv $V1, $V5, $V4]} # Generate W[35:32]
  182. # Quad-round 5 (+1, v2->v3->v4->v1)
  183. @{[vadd_vv $V5, $V15, $V2]}
  184. @{[vsha2cl_vv $V7, $V6, $V5]}
  185. @{[vsha2ch_vv $V6, $V7, $V5]}
  186. @{[vmerge_vvm $V5, $V4, $V3, $V0]}
  187. @{[vsha2ms_vv $V2, $V5, $V1]} # Generate W[39:36]
  188. # Quad-round 6 (+2, v3->v4->v1->v2)
  189. @{[vadd_vv $V5, $V16, $V3]}
  190. @{[vsha2cl_vv $V7, $V6, $V5]}
  191. @{[vsha2ch_vv $V6, $V7, $V5]}
  192. @{[vmerge_vvm $V5, $V1, $V4, $V0]}
  193. @{[vsha2ms_vv $V3, $V5, $V2]} # Generate W[43:40]
  194. # Quad-round 7 (+3, v4->v1->v2->v3)
  195. @{[vadd_vv $V5, $V17, $V4]}
  196. @{[vsha2cl_vv $V7, $V6, $V5]}
  197. @{[vsha2ch_vv $V6, $V7, $V5]}
  198. @{[vmerge_vvm $V5, $V2, $V1, $V0]}
  199. @{[vsha2ms_vv $V4, $V5, $V3]} # Generate W[47:44]
  200. # Quad-round 8 (+0, v1->v2->v3->v4)
  201. @{[vadd_vv $V5, $V18, $V1]}
  202. @{[vsha2cl_vv $V7, $V6, $V5]}
  203. @{[vsha2ch_vv $V6, $V7, $V5]}
  204. @{[vmerge_vvm $V5, $V3, $V2, $V0]}
  205. @{[vsha2ms_vv $V1, $V5, $V4]} # Generate W[51:48]
  206. # Quad-round 9 (+1, v2->v3->v4->v1)
  207. @{[vadd_vv $V5, $V19, $V2]}
  208. @{[vsha2cl_vv $V7, $V6, $V5]}
  209. @{[vsha2ch_vv $V6, $V7, $V5]}
  210. @{[vmerge_vvm $V5, $V4, $V3, $V0]}
  211. @{[vsha2ms_vv $V2, $V5, $V1]} # Generate W[55:52]
  212. # Quad-round 10 (+2, v3->v4->v1->v2)
  213. @{[vadd_vv $V5, $V20, $V3]}
  214. @{[vsha2cl_vv $V7, $V6, $V5]}
  215. @{[vsha2ch_vv $V6, $V7, $V5]}
  216. @{[vmerge_vvm $V5, $V1, $V4, $V0]}
  217. @{[vsha2ms_vv $V3, $V5, $V2]} # Generate W[59:56]
  218. # Quad-round 11 (+3, v4->v1->v2->v3)
  219. @{[vadd_vv $V5, $V21, $V4]}
  220. @{[vsha2cl_vv $V7, $V6, $V5]}
  221. @{[vsha2ch_vv $V6, $V7, $V5]}
  222. @{[vmerge_vvm $V5, $V2, $V1, $V0]}
  223. @{[vsha2ms_vv $V4, $V5, $V3]} # Generate W[63:60]
  224. # Quad-round 12 (+0, v1->v2->v3->v4)
  225. # Note that we stop generating new message schedule words (Wt, v1-13)
  226. # as we already generated all the words we end up consuming (i.e., W[63:60]).
  227. @{[vadd_vv $V5, $V22, $V1]}
  228. @{[vsha2cl_vv $V7, $V6, $V5]}
  229. @{[vsha2ch_vv $V6, $V7, $V5]}
  230. # Quad-round 13 (+1, v2->v3->v4->v1)
  231. @{[vadd_vv $V5, $V23, $V2]}
  232. @{[vsha2cl_vv $V7, $V6, $V5]}
  233. @{[vsha2ch_vv $V6, $V7, $V5]}
  234. # Quad-round 14 (+2, v3->v4->v1->v2)
  235. @{[vadd_vv $V5, $V24, $V3]}
  236. @{[vsha2cl_vv $V7, $V6, $V5]}
  237. @{[vsha2ch_vv $V6, $V7, $V5]}
  238. # Quad-round 15 (+3, v4->v1->v2->v3)
  239. @{[vadd_vv $V5, $V25, $V4]}
  240. @{[vsha2cl_vv $V7, $V6, $V5]}
  241. @{[vsha2ch_vv $V6, $V7, $V5]}
  242. # H' = H+{a',b',c',...,h'}
  243. @{[vadd_vv $V6, $V30, $V6]}
  244. @{[vadd_vv $V7, $V31, $V7]}
  245. bnez $LEN, L_round_loop
  246. # Store {f,e,b,a},{h,g,d,c} back to {a,b,c,d},{e,f,g,h}.
  247. @{[vsuxei8_v $V6, $H, $V26]}
  248. @{[vsuxei8_v $V7, $H2, $V26]}
  249. ret
  250. .size sha256_block_data_order_zvkb_zvknha_or_zvknhb,.-sha256_block_data_order_zvkb_zvknha_or_zvknhb
  251. .p2align 2
  252. .type $K256,\@object
  253. $K256:
  254. .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
  255. .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
  256. .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
  257. .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
  258. .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
  259. .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
  260. .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
  261. .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
  262. .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
  263. .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
  264. .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
  265. .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
  266. .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
  267. .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
  268. .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
  269. .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
  270. .size $K256,.-$K256
  271. ___
  272. print $code;
  273. close STDOUT or die "error closing STDOUT: $!";