aes-riscv64-zvkb-zvkned.pl 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. #! /usr/bin/env perl
  2. # This file is dual-licensed, meaning that you can use it under your
  3. # choice of either of the following two licenses:
  4. #
  5. # Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
  6. #
  7. # Licensed under the Apache License 2.0 (the "License"). You can obtain
  8. # a copy in the file LICENSE in the source distribution or at
  9. # https://www.openssl.org/source/license.html
  10. #
  11. # or
  12. #
  13. # Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
  14. # All rights reserved.
  15. #
  16. # Redistribution and use in source and binary forms, with or without
  17. # modification, are permitted provided that the following conditions
  18. # are met:
  19. # 1. Redistributions of source code must retain the above copyright
  20. # notice, this list of conditions and the following disclaimer.
  21. # 2. Redistributions in binary form must reproduce the above copyright
  22. # notice, this list of conditions and the following disclaimer in the
  23. # documentation and/or other materials provided with the distribution.
  24. #
  25. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36. # - RV64I
  37. # - RISC-V Vector ('V') with VLEN >= 128
  38. # - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
  39. # - RISC-V Vector AES block cipher extension ('Zvkned')
  40. # - RISC-V Zicclsm(Main memory supports misaligned loads/stores)
  41. use strict;
  42. use warnings;
  43. use FindBin qw($Bin);
  44. use lib "$Bin";
  45. use lib "$Bin/../../perlasm";
  46. use riscv;
  47. # $output is the last argument if it looks like a file (it has an extension)
  48. # $flavour is the first argument if it doesn't look like a file
  49. my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  50. my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  51. $output and open STDOUT,">$output";
  52. my $code=<<___;
  53. .text
  54. ___
  55. ################################################################################
  56. # void rv64i_zvkb_zvkned_ctr32_encrypt_blocks(const unsigned char *in,
  57. # unsigned char *out, size_t blocks,
  58. # const void *key,
  59. # const unsigned char ivec[16]);
  60. {
  61. my ($INP, $OUTP, $BLOCK_NUM, $KEYP, $IVP) = ("a0", "a1", "a2", "a3", "a4");
  62. my ($T0, $T1, $T2, $T3) = ("t0", "t1", "t2", "t3");
  63. my ($VL) = ("t4");
  64. my ($LEN32) = ("t5");
  65. my ($CTR) = ("t6");
  66. my ($MASK) = ("v0");
  67. my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7,
  68. $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15,
  69. $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23,
  70. $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31,
  71. ) = map("v$_",(0..31));
  72. # Prepare the AES ctr input data into v16.
  73. sub init_aes_ctr_input {
  74. my $code=<<___;
  75. # Setup mask into v0
  76. # The mask pattern for 4*N-th elements
  77. # mask v0: [000100010001....]
  78. # Note:
  79. # We could setup the mask just for the maximum element length instead of
  80. # the VLMAX.
  81. li $T0, 0b10001000
  82. @{[vsetvli $T2, "zero", "e8", "m1", "ta", "ma"]}
  83. @{[vmv_v_x $MASK, $T0]}
  84. # Load IV.
  85. # v31:[IV0, IV1, IV2, big-endian count]
  86. @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
  87. @{[vle32_v $V31, $IVP]}
  88. # Convert the big-endian counter into little-endian.
  89. @{[vsetivli "zero", 4, "e32", "m1", "ta", "mu"]}
  90. @{[vrev8_v $V31, $V31, $MASK]}
  91. # Splat the IV to v16
  92. @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]}
  93. @{[vmv_v_i $V16, 0]}
  94. @{[vaesz_vs $V16, $V31]}
  95. # Prepare the ctr pattern into v20
  96. # v20: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...]
  97. @{[viota_m $V20, $MASK, $MASK]}
  98. # v16:[IV0, IV1, IV2, count+0, IV0, IV1, IV2, count+1, ...]
  99. @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
  100. @{[vadd_vv $V16, $V16, $V20, $MASK]}
  101. ___
  102. return $code;
  103. }
  104. $code .= <<___;
  105. .p2align 3
  106. .globl rv64i_zvkb_zvkned_ctr32_encrypt_blocks
  107. .type rv64i_zvkb_zvkned_ctr32_encrypt_blocks,\@function
  108. rv64i_zvkb_zvkned_ctr32_encrypt_blocks:
  109. beqz $BLOCK_NUM, 1f
  110. # Load number of rounds
  111. lwu $T0, 240($KEYP)
  112. li $T1, 14
  113. li $T2, 12
  114. li $T3, 10
  115. slli $LEN32, $BLOCK_NUM, 2
  116. beq $T0, $T1, ctr32_encrypt_blocks_256
  117. beq $T0, $T2, ctr32_encrypt_blocks_192
  118. beq $T0, $T3, ctr32_encrypt_blocks_128
  119. 1:
  120. ret
  121. .size rv64i_zvkb_zvkned_ctr32_encrypt_blocks,.-rv64i_zvkb_zvkned_ctr32_encrypt_blocks
  122. ___
  123. $code .= <<___;
  124. .p2align 3
  125. ctr32_encrypt_blocks_128:
  126. # Load all 11 round keys to v1-v11 registers.
  127. @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
  128. @{[vle32_v $V1, $KEYP]}
  129. addi $KEYP, $KEYP, 16
  130. @{[vle32_v $V2, $KEYP]}
  131. addi $KEYP, $KEYP, 16
  132. @{[vle32_v $V3, $KEYP]}
  133. addi $KEYP, $KEYP, 16
  134. @{[vle32_v $V4, $KEYP]}
  135. addi $KEYP, $KEYP, 16
  136. @{[vle32_v $V5, $KEYP]}
  137. addi $KEYP, $KEYP, 16
  138. @{[vle32_v $V6, $KEYP]}
  139. addi $KEYP, $KEYP, 16
  140. @{[vle32_v $V7, $KEYP]}
  141. addi $KEYP, $KEYP, 16
  142. @{[vle32_v $V8, $KEYP]}
  143. addi $KEYP, $KEYP, 16
  144. @{[vle32_v $V9, $KEYP]}
  145. addi $KEYP, $KEYP, 16
  146. @{[vle32_v $V10, $KEYP]}
  147. addi $KEYP, $KEYP, 16
  148. @{[vle32_v $V11, $KEYP]}
  149. @{[init_aes_ctr_input]}
  150. ##### AES body
  151. j 2f
  152. 1:
  153. @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
  154. # Increase ctr in v16.
  155. @{[vadd_vx $V16, $V16, $CTR, $MASK]}
  156. 2:
  157. # Load plaintext into v20
  158. @{[vle32_v $V20, $INP]}
  159. slli $T0, $VL, 2
  160. srli $CTR, $VL, 2
  161. sub $LEN32, $LEN32, $VL
  162. add $INP, $INP, $T0
  163. # Prepare the AES ctr input into v24.
  164. # The ctr data uses big-endian form.
  165. @{[vmv_v_v $V24, $V16]}
  166. @{[vrev8_v $V24, $V24, $MASK]}
  167. @{[vaesz_vs $V24, $V1]}
  168. @{[vaesem_vs $V24, $V2]}
  169. @{[vaesem_vs $V24, $V3]}
  170. @{[vaesem_vs $V24, $V4]}
  171. @{[vaesem_vs $V24, $V5]}
  172. @{[vaesem_vs $V24, $V6]}
  173. @{[vaesem_vs $V24, $V7]}
  174. @{[vaesem_vs $V24, $V8]}
  175. @{[vaesem_vs $V24, $V9]}
  176. @{[vaesem_vs $V24, $V10]}
  177. @{[vaesef_vs $V24, $V11]}
  178. # ciphertext
  179. @{[vxor_vv $V24, $V24, $V20]}
  180. # Store the ciphertext.
  181. @{[vse32_v $V24, $OUTP]}
  182. add $OUTP, $OUTP, $T0
  183. bnez $LEN32, 1b
  184. ret
  185. .size ctr32_encrypt_blocks_128,.-ctr32_encrypt_blocks_128
  186. ___
  187. $code .= <<___;
  188. .p2align 3
  189. ctr32_encrypt_blocks_192:
  190. # Load all 13 round keys to v1-v13 registers.
  191. @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
  192. @{[vle32_v $V1, $KEYP]}
  193. addi $KEYP, $KEYP, 16
  194. @{[vle32_v $V2, $KEYP]}
  195. addi $KEYP, $KEYP, 16
  196. @{[vle32_v $V3, $KEYP]}
  197. addi $KEYP, $KEYP, 16
  198. @{[vle32_v $V4, $KEYP]}
  199. addi $KEYP, $KEYP, 16
  200. @{[vle32_v $V5, $KEYP]}
  201. addi $KEYP, $KEYP, 16
  202. @{[vle32_v $V6, $KEYP]}
  203. addi $KEYP, $KEYP, 16
  204. @{[vle32_v $V7, $KEYP]}
  205. addi $KEYP, $KEYP, 16
  206. @{[vle32_v $V8, $KEYP]}
  207. addi $KEYP, $KEYP, 16
  208. @{[vle32_v $V9, $KEYP]}
  209. addi $KEYP, $KEYP, 16
  210. @{[vle32_v $V10, $KEYP]}
  211. addi $KEYP, $KEYP, 16
  212. @{[vle32_v $V11, $KEYP]}
  213. addi $KEYP, $KEYP, 16
  214. @{[vle32_v $V12, $KEYP]}
  215. addi $KEYP, $KEYP, 16
  216. @{[vle32_v $V13, $KEYP]}
  217. @{[init_aes_ctr_input]}
  218. ##### AES body
  219. j 2f
  220. 1:
  221. @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
  222. # Increase ctr in v16.
  223. @{[vadd_vx $V16, $V16, $CTR, $MASK]}
  224. 2:
  225. # Load plaintext into v20
  226. @{[vle32_v $V20, $INP]}
  227. slli $T0, $VL, 2
  228. srli $CTR, $VL, 2
  229. sub $LEN32, $LEN32, $VL
  230. add $INP, $INP, $T0
  231. # Prepare the AES ctr input into v24.
  232. # The ctr data uses big-endian form.
  233. @{[vmv_v_v $V24, $V16]}
  234. @{[vrev8_v $V24, $V24, $MASK]}
  235. @{[vaesz_vs $V24, $V1]}
  236. @{[vaesem_vs $V24, $V2]}
  237. @{[vaesem_vs $V24, $V3]}
  238. @{[vaesem_vs $V24, $V4]}
  239. @{[vaesem_vs $V24, $V5]}
  240. @{[vaesem_vs $V24, $V6]}
  241. @{[vaesem_vs $V24, $V7]}
  242. @{[vaesem_vs $V24, $V8]}
  243. @{[vaesem_vs $V24, $V9]}
  244. @{[vaesem_vs $V24, $V10]}
  245. @{[vaesem_vs $V24, $V11]}
  246. @{[vaesem_vs $V24, $V12]}
  247. @{[vaesef_vs $V24, $V13]}
  248. # ciphertext
  249. @{[vxor_vv $V24, $V24, $V20]}
  250. # Store the ciphertext.
  251. @{[vse32_v $V24, $OUTP]}
  252. add $OUTP, $OUTP, $T0
  253. bnez $LEN32, 1b
  254. ret
  255. .size ctr32_encrypt_blocks_192,.-ctr32_encrypt_blocks_192
  256. ___
  257. $code .= <<___;
  258. .p2align 3
  259. ctr32_encrypt_blocks_256:
  260. # Load all 15 round keys to v1-v15 registers.
  261. @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
  262. @{[vle32_v $V1, $KEYP]}
  263. addi $KEYP, $KEYP, 16
  264. @{[vle32_v $V2, $KEYP]}
  265. addi $KEYP, $KEYP, 16
  266. @{[vle32_v $V3, $KEYP]}
  267. addi $KEYP, $KEYP, 16
  268. @{[vle32_v $V4, $KEYP]}
  269. addi $KEYP, $KEYP, 16
  270. @{[vle32_v $V5, $KEYP]}
  271. addi $KEYP, $KEYP, 16
  272. @{[vle32_v $V6, $KEYP]}
  273. addi $KEYP, $KEYP, 16
  274. @{[vle32_v $V7, $KEYP]}
  275. addi $KEYP, $KEYP, 16
  276. @{[vle32_v $V8, $KEYP]}
  277. addi $KEYP, $KEYP, 16
  278. @{[vle32_v $V9, $KEYP]}
  279. addi $KEYP, $KEYP, 16
  280. @{[vle32_v $V10, $KEYP]}
  281. addi $KEYP, $KEYP, 16
  282. @{[vle32_v $V11, $KEYP]}
  283. addi $KEYP, $KEYP, 16
  284. @{[vle32_v $V12, $KEYP]}
  285. addi $KEYP, $KEYP, 16
  286. @{[vle32_v $V13, $KEYP]}
  287. addi $KEYP, $KEYP, 16
  288. @{[vle32_v $V14, $KEYP]}
  289. addi $KEYP, $KEYP, 16
  290. @{[vle32_v $V15, $KEYP]}
  291. @{[init_aes_ctr_input]}
  292. ##### AES body
  293. j 2f
  294. 1:
  295. @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
  296. # Increase ctr in v16.
  297. @{[vadd_vx $V16, $V16, $CTR, $MASK]}
  298. 2:
  299. # Load plaintext into v20
  300. @{[vle32_v $V20, $INP]}
  301. slli $T0, $VL, 2
  302. srli $CTR, $VL, 2
  303. sub $LEN32, $LEN32, $VL
  304. add $INP, $INP, $T0
  305. # Prepare the AES ctr input into v24.
  306. # The ctr data uses big-endian form.
  307. @{[vmv_v_v $V24, $V16]}
  308. @{[vrev8_v $V24, $V24, $MASK]}
  309. @{[vaesz_vs $V24, $V1]}
  310. @{[vaesem_vs $V24, $V2]}
  311. @{[vaesem_vs $V24, $V3]}
  312. @{[vaesem_vs $V24, $V4]}
  313. @{[vaesem_vs $V24, $V5]}
  314. @{[vaesem_vs $V24, $V6]}
  315. @{[vaesem_vs $V24, $V7]}
  316. @{[vaesem_vs $V24, $V8]}
  317. @{[vaesem_vs $V24, $V9]}
  318. @{[vaesem_vs $V24, $V10]}
  319. @{[vaesem_vs $V24, $V11]}
  320. @{[vaesem_vs $V24, $V12]}
  321. @{[vaesem_vs $V24, $V13]}
  322. @{[vaesem_vs $V24, $V14]}
  323. @{[vaesef_vs $V24, $V15]}
  324. # ciphertext
  325. @{[vxor_vv $V24, $V24, $V20]}
  326. # Store the ciphertext.
  327. @{[vse32_v $V24, $OUTP]}
  328. add $OUTP, $OUTP, $T0
  329. bnez $LEN32, 1b
  330. ret
  331. .size ctr32_encrypt_blocks_256,.-ctr32_encrypt_blocks_256
  332. ___
  333. }
  334. print $code;
  335. close STDOUT or die "error closing STDOUT: $!";