#! /usr/bin/env perl # This file is dual-licensed, meaning that you can use it under your # choice of either of the following two licenses: # # Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the Apache License 2.0 (the "License"). You can obtain # a copy in the file LICENSE in the source distribution or at # https://www.openssl.org/source/license.html # # or # # Copyright (c) 2023, Jerry Shih # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # - RV64I # - RISC-V Vector ('V') with VLEN >= 128 # - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') # - RISC-V Vector AES block cipher extension ('Zvkned') # - RISC-V Zicclsm(Main memory supports misaligned loads/stores) use strict; use warnings; use FindBin qw($Bin); use lib "$Bin"; use lib "$Bin/../../perlasm"; use riscv; # $output is the last argument if it looks like a file (it has an extension) # $flavour is the first argument if it doesn't look like a file my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; $output and open STDOUT,">$output"; my $code=<<___; .text ___ ################################################################################ # void rv64i_zvkb_zvkned_ctr32_encrypt_blocks(const unsigned char *in, # unsigned char *out, size_t blocks, # const void *key, # const unsigned char ivec[16]); { my ($INP, $OUTP, $BLOCK_NUM, $KEYP, $IVP) = ("a0", "a1", "a2", "a3", "a4"); my ($T0, $T1, $T2, $T3) = ("t0", "t1", "t2", "t3"); my ($VL) = ("t4"); my ($LEN32) = ("t5"); my ($CTR) = ("t6"); my ($MASK) = ("v0"); my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7, $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15, $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23, $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31, ) = map("v$_",(0..31)); # Prepare the AES ctr input data into v16. sub init_aes_ctr_input { my $code=<<___; # Setup mask into v0 # The mask pattern for 4*N-th elements # mask v0: [000100010001....] # Note: # We could setup the mask just for the maximum element length instead of # the VLMAX. li $T0, 0b10001000 @{[vsetvli $T2, "zero", "e8", "m1", "ta", "ma"]} @{[vmv_v_x $MASK, $T0]} # Load IV. # v31:[IV0, IV1, IV2, big-endian count] @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} @{[vle32_v $V31, $IVP]} # Convert the big-endian counter into little-endian. @{[vsetivli "zero", 4, "e32", "m1", "ta", "mu"]} @{[vrev8_v $V31, $V31, $MASK]} # Splat the IV to v16 @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]} @{[vmv_v_i $V16, 0]} @{[vaesz_vs $V16, $V31]} # Prepare the ctr pattern into v20 # v20: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...] @{[viota_m $V20, $MASK, $MASK]} # v16:[IV0, IV1, IV2, count+0, IV0, IV1, IV2, count+1, ...] @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} @{[vadd_vv $V16, $V16, $V20, $MASK]} ___ return $code; } $code .= <<___; .p2align 3 .globl rv64i_zvkb_zvkned_ctr32_encrypt_blocks .type rv64i_zvkb_zvkned_ctr32_encrypt_blocks,\@function rv64i_zvkb_zvkned_ctr32_encrypt_blocks: beqz $BLOCK_NUM, 1f # Load number of rounds lwu $T0, 240($KEYP) li $T1, 14 li $T2, 12 li $T3, 10 slli $LEN32, $BLOCK_NUM, 2 beq $T0, $T1, ctr32_encrypt_blocks_256 beq $T0, $T2, ctr32_encrypt_blocks_192 beq $T0, $T3, ctr32_encrypt_blocks_128 1: ret .size rv64i_zvkb_zvkned_ctr32_encrypt_blocks,.-rv64i_zvkb_zvkned_ctr32_encrypt_blocks ___ $code .= <<___; .p2align 3 ctr32_encrypt_blocks_128: # Load all 11 round keys to v1-v11 registers. @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} @{[vle32_v $V1, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V2, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V3, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V4, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V5, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V6, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V7, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V8, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V9, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V10, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V11, $KEYP]} @{[init_aes_ctr_input]} ##### AES body j 2f 1: @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} # Increase ctr in v16. @{[vadd_vx $V16, $V16, $CTR, $MASK]} 2: # Load plaintext into v20 @{[vle32_v $V20, $INP]} slli $T0, $VL, 2 srli $CTR, $VL, 2 sub $LEN32, $LEN32, $VL add $INP, $INP, $T0 # Prepare the AES ctr input into v24. # The ctr data uses big-endian form. @{[vmv_v_v $V24, $V16]} @{[vrev8_v $V24, $V24, $MASK]} @{[vaesz_vs $V24, $V1]} @{[vaesem_vs $V24, $V2]} @{[vaesem_vs $V24, $V3]} @{[vaesem_vs $V24, $V4]} @{[vaesem_vs $V24, $V5]} @{[vaesem_vs $V24, $V6]} @{[vaesem_vs $V24, $V7]} @{[vaesem_vs $V24, $V8]} @{[vaesem_vs $V24, $V9]} @{[vaesem_vs $V24, $V10]} @{[vaesef_vs $V24, $V11]} # ciphertext @{[vxor_vv $V24, $V24, $V20]} # Store the ciphertext. @{[vse32_v $V24, $OUTP]} add $OUTP, $OUTP, $T0 bnez $LEN32, 1b ret .size ctr32_encrypt_blocks_128,.-ctr32_encrypt_blocks_128 ___ $code .= <<___; .p2align 3 ctr32_encrypt_blocks_192: # Load all 13 round keys to v1-v13 registers. @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} @{[vle32_v $V1, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V2, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V3, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V4, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V5, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V6, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V7, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V8, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V9, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V10, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V11, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V12, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V13, $KEYP]} @{[init_aes_ctr_input]} ##### AES body j 2f 1: @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} # Increase ctr in v16. @{[vadd_vx $V16, $V16, $CTR, $MASK]} 2: # Load plaintext into v20 @{[vle32_v $V20, $INP]} slli $T0, $VL, 2 srli $CTR, $VL, 2 sub $LEN32, $LEN32, $VL add $INP, $INP, $T0 # Prepare the AES ctr input into v24. # The ctr data uses big-endian form. @{[vmv_v_v $V24, $V16]} @{[vrev8_v $V24, $V24, $MASK]} @{[vaesz_vs $V24, $V1]} @{[vaesem_vs $V24, $V2]} @{[vaesem_vs $V24, $V3]} @{[vaesem_vs $V24, $V4]} @{[vaesem_vs $V24, $V5]} @{[vaesem_vs $V24, $V6]} @{[vaesem_vs $V24, $V7]} @{[vaesem_vs $V24, $V8]} @{[vaesem_vs $V24, $V9]} @{[vaesem_vs $V24, $V10]} @{[vaesem_vs $V24, $V11]} @{[vaesem_vs $V24, $V12]} @{[vaesef_vs $V24, $V13]} # ciphertext @{[vxor_vv $V24, $V24, $V20]} # Store the ciphertext. @{[vse32_v $V24, $OUTP]} add $OUTP, $OUTP, $T0 bnez $LEN32, 1b ret .size ctr32_encrypt_blocks_192,.-ctr32_encrypt_blocks_192 ___ $code .= <<___; .p2align 3 ctr32_encrypt_blocks_256: # Load all 15 round keys to v1-v15 registers. @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} @{[vle32_v $V1, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V2, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V3, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V4, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V5, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V6, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V7, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V8, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V9, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V10, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V11, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V12, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V13, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V14, $KEYP]} addi $KEYP, $KEYP, 16 @{[vle32_v $V15, $KEYP]} @{[init_aes_ctr_input]} ##### AES body j 2f 1: @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} # Increase ctr in v16. @{[vadd_vx $V16, $V16, $CTR, $MASK]} 2: # Load plaintext into v20 @{[vle32_v $V20, $INP]} slli $T0, $VL, 2 srli $CTR, $VL, 2 sub $LEN32, $LEN32, $VL add $INP, $INP, $T0 # Prepare the AES ctr input into v24. # The ctr data uses big-endian form. @{[vmv_v_v $V24, $V16]} @{[vrev8_v $V24, $V24, $MASK]} @{[vaesz_vs $V24, $V1]} @{[vaesem_vs $V24, $V2]} @{[vaesem_vs $V24, $V3]} @{[vaesem_vs $V24, $V4]} @{[vaesem_vs $V24, $V5]} @{[vaesem_vs $V24, $V6]} @{[vaesem_vs $V24, $V7]} @{[vaesem_vs $V24, $V8]} @{[vaesem_vs $V24, $V9]} @{[vaesem_vs $V24, $V10]} @{[vaesem_vs $V24, $V11]} @{[vaesem_vs $V24, $V12]} @{[vaesem_vs $V24, $V13]} @{[vaesem_vs $V24, $V14]} @{[vaesef_vs $V24, $V15]} # ciphertext @{[vxor_vv $V24, $V24, $V20]} # Store the ciphertext. @{[vse32_v $V24, $OUTP]} add $OUTP, $OUTP, $T0 bnez $LEN32, 1b ret .size ctr32_encrypt_blocks_256,.-ctr32_encrypt_blocks_256 ___ } print $code; close STDOUT or die "error closing STDOUT: $!";