aes-riscv64-zvkned.pl 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. #! /usr/bin/env perl
  2. # This file is dual-licensed, meaning that you can use it under your
  3. # choice of either of the following two licenses:
  4. #
  5. # Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
  6. #
  7. # Licensed under the Apache License 2.0 (the "License"). You can obtain
  8. # a copy in the file LICENSE in the source distribution or at
  9. # https://www.openssl.org/source/license.html
  10. #
  11. # or
  12. #
  13. # Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
  14. # All rights reserved.
  15. #
  16. # Redistribution and use in source and binary forms, with or without
  17. # modification, are permitted provided that the following conditions
  18. # are met:
  19. # 1. Redistributions of source code must retain the above copyright
  20. # notice, this list of conditions and the following disclaimer.
  21. # 2. Redistributions in binary form must reproduce the above copyright
  22. # notice, this list of conditions and the following disclaimer in the
  23. # documentation and/or other materials provided with the distribution.
  24. #
  25. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36. # - RV64I
  37. # - RISC-V vector ('V') with VLEN >= 128
  38. # - RISC-V vector crypto AES extension ('Zvkned')
  39. use strict;
  40. use warnings;
  41. use FindBin qw($Bin);
  42. use lib "$Bin";
  43. use lib "$Bin/../../perlasm";
  44. use riscv;
  45. # $output is the last argument if it looks like a file (it has an extension)
  46. # $flavour is the first argument if it doesn't look like a file
  47. my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  48. my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  49. $output and open STDOUT,">$output";
  50. my $code=<<___;
  51. .text
  52. ___
  53. ################################################################################
  54. # int rv64i_zvkned_set_encrypt_key(const unsigned char *userKey, const int bits,
  55. # AES_KEY *key)
  56. # int rv64i_zvkned_set_decrypt_key(const unsigned char *userKey, const int bits,
  57. # AES_KEY *key)
  58. {
  59. my ($UKEY,$BITS,$KEYP) = ("a0", "a1", "a2");
  60. my ($T0,$T1,$T4) = ("t1", "t2", "t4");
  61. my ($v0, $v1, $v2, $v3, $v4, $v5, $v6,
  62. $v7, $v8, $v9, $v10, $v11, $v12,
  63. $v13, $v14, $v15, $v16, $v17, $v18,
  64. $v19, $v20, $v21, $v22, $v23, $v24,
  65. ) = map("v$_",(0..24));
  66. $code .= <<___;
  67. .p2align 3
  68. .globl rv64i_zvkned_set_encrypt_key
  69. .type rv64i_zvkned_set_encrypt_key,\@function
  70. rv64i_zvkned_set_encrypt_key:
  71. beqz $UKEY, L_fail_m1
  72. beqz $KEYP, L_fail_m1
  73. # Get proper routine for key size
  74. li $T0, 256
  75. beq $BITS, $T0, L_set_key_256
  76. li $T0, 128
  77. beq $BITS, $T0, L_set_key_128
  78. j L_fail_m2
  79. .size rv64i_zvkned_set_encrypt_key,.-rv64i_zvkned_set_encrypt_key
  80. ___
  81. $code .= <<___;
  82. .p2align 3
  83. .globl rv64i_zvkned_set_decrypt_key
  84. .type rv64i_zvkned_set_decrypt_key,\@function
  85. rv64i_zvkned_set_decrypt_key:
  86. beqz $UKEY, L_fail_m1
  87. beqz $KEYP, L_fail_m1
  88. # Get proper routine for key size
  89. li $T0, 256
  90. beq $BITS, $T0, L_set_key_256
  91. li $T0, 128
  92. beq $BITS, $T0, L_set_key_128
  93. j L_fail_m2
  94. .size rv64i_zvkned_set_decrypt_key,.-rv64i_zvkned_set_decrypt_key
  95. ___
  96. $code .= <<___;
  97. .p2align 3
  98. L_set_key_128:
  99. # Store the number of rounds
  100. li $T1, 10
  101. sw $T1, 240($KEYP)
  102. @{[vsetivli__x0_4_e32_m1_tu_mu]}
  103. # Load the key
  104. @{[vle32_v $v10, ($UKEY)]}
  105. # Generate keys for round 2-11 into registers v11-v20.
  106. @{[vaeskf1_vi $v11, $v10, 1]} # v11 <- rk2 (w[ 4, 7])
  107. @{[vaeskf1_vi $v12, $v11, 2]} # v12 <- rk3 (w[ 8,11])
  108. @{[vaeskf1_vi $v13, $v12, 3]} # v13 <- rk4 (w[12,15])
  109. @{[vaeskf1_vi $v14, $v13, 4]} # v14 <- rk5 (w[16,19])
  110. @{[vaeskf1_vi $v15, $v14, 5]} # v15 <- rk6 (w[20,23])
  111. @{[vaeskf1_vi $v16, $v15, 6]} # v16 <- rk7 (w[24,27])
  112. @{[vaeskf1_vi $v17, $v16, 7]} # v17 <- rk8 (w[28,31])
  113. @{[vaeskf1_vi $v18, $v17, 8]} # v18 <- rk9 (w[32,35])
  114. @{[vaeskf1_vi $v19, $v18, 9]} # v19 <- rk10 (w[36,39])
  115. @{[vaeskf1_vi $v20, $v19, 10]} # v20 <- rk11 (w[40,43])
  116. # Store the round keys
  117. @{[vse32_v $v10, ($KEYP)]}
  118. addi $KEYP, $KEYP, 16
  119. @{[vse32_v $v11, ($KEYP)]}
  120. addi $KEYP, $KEYP, 16
  121. @{[vse32_v $v12, ($KEYP)]}
  122. addi $KEYP, $KEYP, 16
  123. @{[vse32_v $v13, ($KEYP)]}
  124. addi $KEYP, $KEYP, 16
  125. @{[vse32_v $v14, ($KEYP)]}
  126. addi $KEYP, $KEYP, 16
  127. @{[vse32_v $v15, ($KEYP)]}
  128. addi $KEYP, $KEYP, 16
  129. @{[vse32_v $v16, ($KEYP)]}
  130. addi $KEYP, $KEYP, 16
  131. @{[vse32_v $v17, ($KEYP)]}
  132. addi $KEYP, $KEYP, 16
  133. @{[vse32_v $v18, ($KEYP)]}
  134. addi $KEYP, $KEYP, 16
  135. @{[vse32_v $v19, ($KEYP)]}
  136. addi $KEYP, $KEYP, 16
  137. @{[vse32_v $v20, ($KEYP)]}
  138. li a0, 1
  139. ret
  140. .size L_set_key_128,.-L_set_key_128
  141. ___
  142. $code .= <<___;
  143. .p2align 3
  144. L_set_key_256:
  145. # Store the number of rounds
  146. li $T1, 14
  147. sw $T1, 240($KEYP)
  148. @{[vsetivli__x0_4_e32_m1_tu_mu]}
  149. # Load the key
  150. @{[vle32_v $v10, ($UKEY)]}
  151. addi $UKEY, $UKEY, 16
  152. @{[vle32_v $v11, ($UKEY)]}
  153. @{[vmv_v_v $v12, $v10]}
  154. @{[vaeskf2_vi $v12, $v11, 2]}
  155. @{[vmv_v_v $v13, $v11]}
  156. @{[vaeskf2_vi $v13, $v12, 3]}
  157. @{[vmv_v_v $v14, $v12]}
  158. @{[vaeskf2_vi $v14, $v13, 4]}
  159. @{[vmv_v_v $v15, $v13]}
  160. @{[vaeskf2_vi $v15, $v14, 5]}
  161. @{[vmv_v_v $v16, $v14]}
  162. @{[vaeskf2_vi $v16, $v15, 6]}
  163. @{[vmv_v_v $v17, $v15]}
  164. @{[vaeskf2_vi $v17, $v16, 7]}
  165. @{[vmv_v_v $v18, $v16]}
  166. @{[vaeskf2_vi $v18, $v17, 8]}
  167. @{[vmv_v_v $v19, $v17]}
  168. @{[vaeskf2_vi $v19, $v18, 9]}
  169. @{[vmv_v_v $v20, $v18]}
  170. @{[vaeskf2_vi $v20, $v19, 10]}
  171. @{[vmv_v_v $v21, $v19]}
  172. @{[vaeskf2_vi $v21, $v20, 11]}
  173. @{[vmv_v_v $v22, $v20]}
  174. @{[vaeskf2_vi $v22, $v21, 12]}
  175. @{[vmv_v_v $v23, $v21]}
  176. @{[vaeskf2_vi $v23, $v22, 13]}
  177. @{[vmv_v_v $v24, $v22]}
  178. @{[vaeskf2_vi $v24, $v23, 14]}
  179. @{[vse32_v $v10, ($KEYP)]}
  180. addi $KEYP, $KEYP, 16
  181. @{[vse32_v $v11, ($KEYP)]}
  182. addi $KEYP, $KEYP, 16
  183. @{[vse32_v $v12, ($KEYP)]}
  184. addi $KEYP, $KEYP, 16
  185. @{[vse32_v $v13, ($KEYP)]}
  186. addi $KEYP, $KEYP, 16
  187. @{[vse32_v $v14, ($KEYP)]}
  188. addi $KEYP, $KEYP, 16
  189. @{[vse32_v $v15, ($KEYP)]}
  190. addi $KEYP, $KEYP, 16
  191. @{[vse32_v $v16, ($KEYP)]}
  192. addi $KEYP, $KEYP, 16
  193. @{[vse32_v $v17, ($KEYP)]}
  194. addi $KEYP, $KEYP, 16
  195. @{[vse32_v $v18, ($KEYP)]}
  196. addi $KEYP, $KEYP, 16
  197. @{[vse32_v $v19, ($KEYP)]}
  198. addi $KEYP, $KEYP, 16
  199. @{[vse32_v $v20, ($KEYP)]}
  200. addi $KEYP, $KEYP, 16
  201. @{[vse32_v $v21, ($KEYP)]}
  202. addi $KEYP, $KEYP, 16
  203. @{[vse32_v $v22, ($KEYP)]}
  204. addi $KEYP, $KEYP, 16
  205. @{[vse32_v $v23, ($KEYP)]}
  206. addi $KEYP, $KEYP, 16
  207. @{[vse32_v $v24, ($KEYP)]}
  208. li a0, 1
  209. ret
  210. .size L_set_key_256,.-L_set_key_256
  211. ___
  212. }
  213. ################################################################################
  214. # void rv64i_zvkned_encrypt(const unsigned char *in, unsigned char *out,
  215. # const AES_KEY *key);
  216. {
  217. my ($INP,$OUTP,$KEYP) = ("a0", "a1", "a2");
  218. my ($T0,$T1, $rounds, $T6) = ("a3", "a4", "t5", "t6");
  219. my ($v0, $v1, $v2, $v3, $v4, $v5, $v6,
  220. $v7, $v8, $v9, $v10, $v11, $v12,
  221. $v13, $v14, $v15, $v16, $v17, $v18,
  222. $v19, $v20, $v21, $v22, $v23, $v24,
  223. ) = map("v$_",(0..24));
  224. $code .= <<___;
  225. .p2align 3
  226. .globl rv64i_zvkned_encrypt
  227. .type rv64i_zvkned_encrypt,\@function
  228. rv64i_zvkned_encrypt:
  229. # Load number of rounds
  230. lwu $rounds, 240($KEYP)
  231. # Get proper routine for key size
  232. li $T6, 14
  233. beq $rounds, $T6, L_enc_256
  234. li $T6, 10
  235. beq $rounds, $T6, L_enc_128
  236. j L_fail_m2
  237. .size rv64i_zvkned_encrypt,.-rv64i_zvkned_encrypt
  238. ___
  239. $code .= <<___;
  240. .p2align 3
  241. L_enc_128:
  242. @{[vsetivli__x0_4_e32_m1_tu_mu]}
  243. @{[vle32_v $v10, ($KEYP)]}
  244. addi $KEYP, $KEYP, 16
  245. @{[vle32_v $v11, ($KEYP)]}
  246. addi $KEYP, $KEYP, 16
  247. @{[vle32_v $v12, ($KEYP)]}
  248. addi $KEYP, $KEYP, 16
  249. @{[vle32_v $v13, ($KEYP)]}
  250. addi $KEYP, $KEYP, 16
  251. @{[vle32_v $v14, ($KEYP)]}
  252. addi $KEYP, $KEYP, 16
  253. @{[vle32_v $v15, ($KEYP)]}
  254. addi $KEYP, $KEYP, 16
  255. @{[vle32_v $v16, ($KEYP)]}
  256. addi $KEYP, $KEYP, 16
  257. @{[vle32_v $v17, ($KEYP)]}
  258. addi $KEYP, $KEYP, 16
  259. @{[vle32_v $v18, ($KEYP)]}
  260. addi $KEYP, $KEYP, 16
  261. @{[vle32_v $v19, ($KEYP)]}
  262. addi $KEYP, $KEYP, 16
  263. @{[vle32_v $v20, ($KEYP)]}
  264. @{[vle32_v $v1, ($INP)]}
  265. @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3]
  266. @{[vaesem_vs $v1, $v11]} # with round key w[ 4, 7]
  267. @{[vaesem_vs $v1, $v12]} # with round key w[ 8,11]
  268. @{[vaesem_vs $v1, $v13]} # with round key w[12,15]
  269. @{[vaesem_vs $v1, $v14]} # with round key w[16,19]
  270. @{[vaesem_vs $v1, $v15]} # with round key w[20,23]
  271. @{[vaesem_vs $v1, $v16]} # with round key w[24,27]
  272. @{[vaesem_vs $v1, $v17]} # with round key w[28,31]
  273. @{[vaesem_vs $v1, $v18]} # with round key w[32,35]
  274. @{[vaesem_vs $v1, $v19]} # with round key w[36,39]
  275. @{[vaesef_vs $v1, $v20]} # with round key w[40,43]
  276. @{[vse32_v $v1, ($OUTP)]}
  277. ret
  278. .size L_enc_128,.-L_enc_128
  279. ___
  280. $code .= <<___;
  281. .p2align 3
  282. L_enc_256:
  283. @{[vsetivli__x0_4_e32_m1_tu_mu]}
  284. @{[vle32_v $v10, ($KEYP)]}
  285. addi $KEYP, $KEYP, 16
  286. @{[vle32_v $v11, ($KEYP)]}
  287. addi $KEYP, $KEYP, 16
  288. @{[vle32_v $v12, ($KEYP)]}
  289. addi $KEYP, $KEYP, 16
  290. @{[vle32_v $v13, ($KEYP)]}
  291. addi $KEYP, $KEYP, 16
  292. @{[vle32_v $v14, ($KEYP)]}
  293. addi $KEYP, $KEYP, 16
  294. @{[vle32_v $v15, ($KEYP)]}
  295. addi $KEYP, $KEYP, 16
  296. @{[vle32_v $v16, ($KEYP)]}
  297. addi $KEYP, $KEYP, 16
  298. @{[vle32_v $v17, ($KEYP)]}
  299. addi $KEYP, $KEYP, 16
  300. @{[vle32_v $v18, ($KEYP)]}
  301. addi $KEYP, $KEYP, 16
  302. @{[vle32_v $v19, ($KEYP)]}
  303. addi $KEYP, $KEYP, 16
  304. @{[vle32_v $v20, ($KEYP)]}
  305. addi $KEYP, $KEYP, 16
  306. @{[vle32_v $v21, ($KEYP)]}
  307. addi $KEYP, $KEYP, 16
  308. @{[vle32_v $v22, ($KEYP)]}
  309. addi $KEYP, $KEYP, 16
  310. @{[vle32_v $v23, ($KEYP)]}
  311. addi $KEYP, $KEYP, 16
  312. @{[vle32_v $v24, ($KEYP)]}
  313. @{[vle32_v $v1, ($INP)]}
  314. @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3]
  315. @{[vaesem_vs $v1, $v11]}
  316. @{[vaesem_vs $v1, $v12]}
  317. @{[vaesem_vs $v1, $v13]}
  318. @{[vaesem_vs $v1, $v14]}
  319. @{[vaesem_vs $v1, $v15]}
  320. @{[vaesem_vs $v1, $v16]}
  321. @{[vaesem_vs $v1, $v17]}
  322. @{[vaesem_vs $v1, $v18]}
  323. @{[vaesem_vs $v1, $v19]}
  324. @{[vaesem_vs $v1, $v20]}
  325. @{[vaesem_vs $v1, $v21]}
  326. @{[vaesem_vs $v1, $v22]}
  327. @{[vaesem_vs $v1, $v23]}
  328. @{[vaesef_vs $v1, $v24]}
  329. @{[vse32_v $v1, ($OUTP)]}
  330. ret
  331. .size L_enc_256,.-L_enc_256
  332. ___
  333. }
  334. ################################################################################
  335. # void rv64i_zvkned_decrypt(const unsigned char *in, unsigned char *out,
  336. # const AES_KEY *key);
  337. {
  338. my ($INP,$OUTP,$KEYP) = ("a0", "a1", "a2");
  339. my ($T0,$T1, $rounds, $T6) = ("a3", "a4", "t5", "t6");
  340. my ($v0, $v1, $v2, $v3, $v4, $v5, $v6,
  341. $v7, $v8, $v9, $v10, $v11, $v12,
  342. $v13, $v14, $v15, $v16, $v17, $v18,
  343. $v19, $v20, $v21, $v22, $v23, $v24,
  344. ) = map("v$_",(0..24));
  345. $code .= <<___;
  346. .p2align 3
  347. .globl rv64i_zvkned_decrypt
  348. .type rv64i_zvkned_decrypt,\@function
  349. rv64i_zvkned_decrypt:
  350. # Load number of rounds
  351. lwu $rounds, 240($KEYP)
  352. # Get proper routine for key size
  353. li $T6, 14
  354. beq $rounds, $T6, L_dec_256
  355. li $T6, 10
  356. beq $rounds, $T6, L_dec_128
  357. j L_fail_m2
  358. .size rv64i_zvkned_decrypt,.-rv64i_zvkned_decrypt
  359. ___
  360. $code .= <<___;
  361. .p2align 3
  362. L_dec_128:
  363. @{[vsetivli__x0_4_e32_m1_tu_mu]}
  364. @{[vle32_v $v10, ($KEYP)]}
  365. addi $KEYP, $KEYP, 16
  366. @{[vle32_v $v11, ($KEYP)]}
  367. addi $KEYP, $KEYP, 16
  368. @{[vle32_v $v12, ($KEYP)]}
  369. addi $KEYP, $KEYP, 16
  370. @{[vle32_v $v13, ($KEYP)]}
  371. addi $KEYP, $KEYP, 16
  372. @{[vle32_v $v14, ($KEYP)]}
  373. addi $KEYP, $KEYP, 16
  374. @{[vle32_v $v15, ($KEYP)]}
  375. addi $KEYP, $KEYP, 16
  376. @{[vle32_v $v16, ($KEYP)]}
  377. addi $KEYP, $KEYP, 16
  378. @{[vle32_v $v17, ($KEYP)]}
  379. addi $KEYP, $KEYP, 16
  380. @{[vle32_v $v18, ($KEYP)]}
  381. addi $KEYP, $KEYP, 16
  382. @{[vle32_v $v19, ($KEYP)]}
  383. addi $KEYP, $KEYP, 16
  384. @{[vle32_v $v20, ($KEYP)]}
  385. @{[vle32_v $v1, ($INP)]}
  386. @{[vaesz_vs $v1, $v20]} # with round key w[43,47]
  387. @{[vaesdm_vs $v1, $v19]} # with round key w[36,39]
  388. @{[vaesdm_vs $v1, $v18]} # with round key w[32,35]
  389. @{[vaesdm_vs $v1, $v17]} # with round key w[28,31]
  390. @{[vaesdm_vs $v1, $v16]} # with round key w[24,27]
  391. @{[vaesdm_vs $v1, $v15]} # with round key w[20,23]
  392. @{[vaesdm_vs $v1, $v14]} # with round key w[16,19]
  393. @{[vaesdm_vs $v1, $v13]} # with round key w[12,15]
  394. @{[vaesdm_vs $v1, $v12]} # with round key w[ 8,11]
  395. @{[vaesdm_vs $v1, $v11]} # with round key w[ 4, 7]
  396. @{[vaesdf_vs $v1, $v10]} # with round key w[ 0, 3]
  397. @{[vse32_v $v1, ($OUTP)]}
  398. ret
  399. .size L_dec_128,.-L_dec_128
  400. ___
  401. $code .= <<___;
  402. .p2align 3
  403. L_dec_256:
  404. @{[vsetivli__x0_4_e32_m1_tu_mu]}
  405. @{[vle32_v $v10, ($KEYP)]}
  406. addi $KEYP, $KEYP, 16
  407. @{[vle32_v $v11, ($KEYP)]}
  408. addi $KEYP, $KEYP, 16
  409. @{[vle32_v $v12, ($KEYP)]}
  410. addi $KEYP, $KEYP, 16
  411. @{[vle32_v $v13, ($KEYP)]}
  412. addi $KEYP, $KEYP, 16
  413. @{[vle32_v $v14, ($KEYP)]}
  414. addi $KEYP, $KEYP, 16
  415. @{[vle32_v $v15, ($KEYP)]}
  416. addi $KEYP, $KEYP, 16
  417. @{[vle32_v $v16, ($KEYP)]}
  418. addi $KEYP, $KEYP, 16
  419. @{[vle32_v $v17, ($KEYP)]}
  420. addi $KEYP, $KEYP, 16
  421. @{[vle32_v $v18, ($KEYP)]}
  422. addi $KEYP, $KEYP, 16
  423. @{[vle32_v $v19, ($KEYP)]}
  424. addi $KEYP, $KEYP, 16
  425. @{[vle32_v $v20, ($KEYP)]}
  426. addi $KEYP, $KEYP, 16
  427. @{[vle32_v $v21, ($KEYP)]}
  428. addi $KEYP, $KEYP, 16
  429. @{[vle32_v $v22, ($KEYP)]}
  430. addi $KEYP, $KEYP, 16
  431. @{[vle32_v $v23, ($KEYP)]}
  432. addi $KEYP, $KEYP, 16
  433. @{[vle32_v $v24, ($KEYP)]}
  434. @{[vle32_v $v1, ($INP)]}
  435. @{[vaesz_vs $v1, $v24]} # with round key w[56,59]
  436. @{[vaesdm_vs $v1, $v23]} # with round key w[52,55]
  437. @{[vaesdm_vs $v1, $v22]} # with round key w[48,51]
  438. @{[vaesdm_vs $v1, $v21]} # with round key w[44,47]
  439. @{[vaesdm_vs $v1, $v20]} # with round key w[40,43]
  440. @{[vaesdm_vs $v1, $v19]} # with round key w[36,39]
  441. @{[vaesdm_vs $v1, $v18]} # with round key w[32,35]
  442. @{[vaesdm_vs $v1, $v17]} # with round key w[28,31]
  443. @{[vaesdm_vs $v1, $v16]} # with round key w[24,27]
  444. @{[vaesdm_vs $v1, $v15]} # with round key w[20,23]
  445. @{[vaesdm_vs $v1, $v14]} # with round key w[16,19]
  446. @{[vaesdm_vs $v1, $v13]} # with round key w[12,15]
  447. @{[vaesdm_vs $v1, $v12]} # with round key w[ 8,11]
  448. @{[vaesdm_vs $v1, $v11]} # with round key w[ 4, 7]
  449. @{[vaesdf_vs $v1, $v10]} # with round key w[ 0, 3]
  450. @{[vse32_v $v1, ($OUTP)]}
  451. ret
  452. .size L_dec_256,.-L_dec_256
  453. ___
  454. }
  455. $code .= <<___;
  456. L_fail_m1:
  457. li a0, -1
  458. ret
  459. .size L_fail_m1,.-L_fail_m1
  460. L_fail_m2:
  461. li a0, -2
  462. ret
  463. .size L_fail_m2,.-L_fail_m2
  464. ___
  465. print $code;
  466. close STDOUT or die "error closing STDOUT: $!";