Explorar o código

riscv: GCM: Provide a Zvkg-based implementation

The upcoming RISC-V vector crypto extensions feature
a Zvkg extension, that provides a vghmac.vv instruction.
This patch provides an implementation that utilizes this
extension if available.

Tested on QEMU and no regressions observed.

Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Paul Dale <pauli@openssl.org>
Reviewed-by: Hugo Landau <hlandau@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/21923)
Christoph Müllner hai 1 ano
pai
achega
5191bcc816

+ 166 - 0
crypto/modes/asm/ghash-riscv64-zvkg.pl

@@ -0,0 +1,166 @@
+#! /usr/bin/env perl
+# This file is dual-licensed, meaning that you can use it under your
+# choice of either of the following two licenses:
+#
+# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License"). You can obtain
+# a copy in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+#
+# or
+#
+# Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# - RV64I
+# - RISC-V vector ('V') with VLEN >= 128
+# - RISC-V vector crypto GHASH extension ('Zvkg')
+
+use strict;
+use warnings;
+
+use FindBin qw($Bin);
+use lib "$Bin";
+use lib "$Bin/../../perlasm";
+use riscv;
+
+# $output is the last argument if it looks like a file (it has an extension)
+# $flavour is the first argument if it doesn't look like a file
+my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
+my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
+
+$output and open STDOUT,">$output";
+
+my $code=<<___;
+.text
+___
+
+################################################################################
+# void gcm_init_rv64i_zvkg(u128 Htable[16], const u64 H[2]);
+# void gcm_init_rv64i_zvkg_zvbb(u128 Htable[16], const u64 H[2]);
+#
+# input: H: 128-bit H - secret parameter E(K, 0^128)
+# output: Htable: Copy of secret parameter (in normalized byte order)
+#
+# All callers of this function revert the byte-order unconditionally
+# on little-endian machines. So we need to revert the byte-order back.
+{
+my ($Htable,$H,$VAL0,$VAL1,$TMP0) = ("a0","a1","a2","a3","t0");
+
+$code .= <<___;
+.p2align 3
+.globl gcm_init_rv64i_zvkg
+.type gcm_init_rv64i_zvkg,\@function
+gcm_init_rv64i_zvkg:
+    ld      $VAL0, 0($H)
+    ld      $VAL1, 8($H)
+    @{[sd_rev8_rv64i $VAL0, $Htable, 0, $TMP0]}
+    @{[sd_rev8_rv64i $VAL1, $Htable, 8, $TMP0]}
+    ret
+.size gcm_init_rv64i_zvkg,.-gcm_init_rv64i_zvkg
+___
+}
+
+{
+my ($Htable,$H,$V0) = ("a0","a1","v0");
+
+$code .= <<___;
+.p2align 3
+.globl gcm_init_rv64i_zvkg_zvbb
+.type gcm_init_rv64i_zvkg_zvbb,\@function
+gcm_init_rv64i_zvkg_zvbb:
+    @{[vsetivli__x0_2_e64_m1_tu_mu]} # vsetivli x0, 2, e64, m1, tu, mu
+    @{[vle64_v $V0, $H]}             # vle64.v v0, (a1)
+    @{[vrev8_v $V0, $V0]}            # vrev8.v v0, v0
+    @{[vse64_v $V0, $Htable]}        # vse64.v v0, (a0)
+    ret
+.size gcm_init_rv64i_zvkg_zvbb,.-gcm_init_rv64i_zvkg_zvbb
+___
+}
+
+################################################################################
+# void gcm_gmult_rv64i_zvkg(u64 Xi[2], const u128 Htable[16]);
+#
+# input: Xi: current hash value
+#        Htable: copy of H
+# output: Xi: next hash value Xi
+{
+my ($Xi,$Htable) = ("a0","a1");
+my ($VD,$VS2) = ("v1","v2");
+
+$code .= <<___;
+.p2align 3
+.globl gcm_gmult_rv64i_zvkg
+.type gcm_gmult_rv64i_zvkg,\@function
+gcm_gmult_rv64i_zvkg:
+    @{[vsetivli__x0_4_e32_m1_tu_mu]}
+    @{[vle32_v $VS2, $Htable]}
+    @{[vle32_v $VD, $Xi]}
+    @{[vgmul_vv $VD, $VS2]}
+    @{[vse32_v $VD, $Xi]}
+    ret
+.size gcm_gmult_rv64i_zvkg,.-gcm_gmult_rv64i_zvkg
+___
+}
+
+################################################################################
+# void gcm_ghash_rv64i_zvkg(u64 Xi[2], const u128 Htable[16],
+#                           const u8 *inp, size_t len);
+#
+# input: Xi: current hash value
+#        Htable: copy of H
+#        inp: pointer to input data
+#        len: length of input data in bytes (mutiple of block size)
+# output: Xi: Xi+1 (next hash value Xi)
+{
+my ($Xi,$Htable,$inp,$len) = ("a0","a1","a2","a3");
+my ($vXi,$vH,$vinp,$Vzero) = ("v1","v2","v3","v4");
+
+$code .= <<___;
+.p2align 3
+.globl gcm_ghash_rv64i_zvkg
+.type gcm_ghash_rv64i_zvkg,\@function
+gcm_ghash_rv64i_zvkg:
+    @{[vsetivli__x0_4_e32_m1_tu_mu]}
+    @{[vle32_v $vH, $Htable]}
+    @{[vle32_v $vXi, $Xi]}
+
+Lstep:
+    @{[vle32_v $vinp, $inp]}
+    add $inp, $inp, 16
+    add $len, $len, -16
+    @{[vghsh_vv $vXi, $vH, $vinp]}
+    bnez $len, Lstep
+
+    @{[vse32_v $vXi, $Xi]}
+    ret
+
+.size gcm_ghash_rv64i_zvkg,.-gcm_ghash_rv64i_zvkg
+___
+}
+
+print $code;
+
+close STDOUT or die "error closing STDOUT: $!";

+ 2 - 1
crypto/modes/build.info

@@ -43,7 +43,7 @@ IF[{- !$disabled{asm} -}]
   $MODESASM_c64xplus=ghash-c64xplus.s
   $MODESDEF_c64xplus=GHASH_ASM
 
-  $MODESASM_riscv64=ghash-riscv64.s ghash-riscv64-zvbb-zvbc.s
+  $MODESASM_riscv64=ghash-riscv64.s ghash-riscv64-zvbb-zvbc.s ghash-riscv64-zvkg.s
   $MODESDEF_riscv64=GHASH_ASM
 
   # Now that we have defined all the arch specific variables, use the
@@ -92,3 +92,4 @@ INCLUDE[ghash-s390x.o]=..
 GENERATE[ghash-c64xplus.S]=asm/ghash-c64xplus.pl
 GENERATE[ghash-riscv64.s]=asm/ghash-riscv64.pl
 GENERATE[ghash-riscv64-zvbb-zvbc.s]=asm/ghash-riscv64-zvbb-zvbc.pl
+GENERATE[ghash-riscv64-zvkg.s]=asm/ghash-riscv64-zvkg.pl

+ 14 - 1
crypto/modes/gcm128.c

@@ -418,6 +418,12 @@ void gcm_init_rv64i_zvbb_zvbc(u128 Htable[16], const u64 Xi[2]);
 void gcm_gmult_rv64i_zvbb_zvbc(u64 Xi[2], const u128 Htable[16]);
 void gcm_ghash_rv64i_zvbb_zvbc(u64 Xi[2], const u128 Htable[16],
                                const u8 *inp, size_t len);
+/* Zvkg (vector crypto with vgmul.vv and vghsh.vv). */
+void gcm_init_rv64i_zvkg(u128 Htable[16], const u64 Xi[2]);
+void gcm_init_rv64i_zvkg_zvbb(u128 Htable[16], const u64 Xi[2]);
+void gcm_gmult_rv64i_zvkg(u64 Xi[2], const u128 Htable[16]);
+void gcm_ghash_rv64i_zvkg(u64 Xi[2], const u128 Htable[16],
+                          const u8 *inp, size_t len);
 # endif
 #endif
 
@@ -517,7 +523,14 @@ static void gcm_get_funcs(struct gcm_funcs_st *ctx)
     ctx->gmult = gcm_gmult_4bit;
     ctx->ghash = gcm_ghash_4bit;
 
-    if (RISCV_HAS_ZVBB() && RISCV_HAS_ZVBC() && riscv_vlen() >= 128) {
+    if (RISCV_HAS_ZVKG() && riscv_vlen() >= 128) {
+        if (RISCV_HAS_ZVBB())
+            ctx->ginit = gcm_init_rv64i_zvkg_zvbb;
+        else
+            ctx->ginit = gcm_init_rv64i_zvkg;
+        ctx->gmult = gcm_gmult_rv64i_zvkg;
+        ctx->ghash = gcm_ghash_rv64i_zvkg;
+    } else if (RISCV_HAS_ZVBB() && RISCV_HAS_ZVBC() && riscv_vlen() >= 128) {
         ctx->ginit = gcm_init_rv64i_zvbb_zvbc;
         ctx->gmult = gcm_gmult_rv64i_zvbb_zvbc;
         ctx->ghash = gcm_ghash_rv64i_zvbb_zvbc;

+ 40 - 0
crypto/perlasm/riscv.pm

@@ -281,6 +281,14 @@ sub rev8 {
 
 # Vector instructions
 
+sub vle32_v {
+    # vle32.v vd, (rs1)
+    my $template = 0b0000001_00000_00000_110_00000_0000111;
+    my $vd = read_vreg shift;
+    my $rs1 = read_reg shift;
+    return ".word ".($template | ($rs1 << 15) | ($vd << 7));
+}
+
 sub vle64_v {
     # vle64.v vd, (rs1)
     my $template = 0b0000001_00000_00000_111_00000_0000111;
@@ -332,6 +340,14 @@ sub vor_vv_v0t {
     return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
 }
 
+sub vse32_v {
+    # vse32.v vd, (rs1)
+    my $template = 0b0000001_00000_00000_110_00000_0100111;
+    my $vd = read_vreg shift;
+    my $rs1 = read_reg shift;
+    return ".word ".($template | ($rs1 << 15) | ($vd << 7));
+}
+
 sub vse64_v {
     # vse64.v vd, (rs1)
     my $template = 0b0000001_00000_00000_111_00000_0100111;
@@ -345,6 +361,11 @@ sub vsetivli__x0_2_e64_m1_tu_mu {
     return ".word 0xc1817057";
 }
 
+sub vsetivli__x0_4_e32_m1_tu_mu {
+    # vsetivli x0, 4, e32, m1, tu, mu
+    return ".word 0xc1027057";
+}
+
 sub vslidedown_vi {
     # vslidedown.vi vd, vs2, uimm
     my $template = 0b0011111_00000_00000_011_00000_1010111;
@@ -458,4 +479,23 @@ sub vclmul_vx {
     return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7));
 }
 
+## Zvkg instructions
+
+sub vghsh_vv {
+    # vghsh.vv vd, vs2, vs1
+    my $template = 0b1011001_00000_00000_010_00000_1110111;
+    my $vd = read_vreg shift;
+    my $vs2 = read_vreg shift;
+    my $vs1 = read_vreg shift;
+    return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
+}
+
+sub vgmul_vv {
+    # vgmul.vv vd, vs2
+    my $template = 0b1010001_00000_10001_010_00000_1110111;
+    my $vd = read_vreg shift;
+    my $vs2 = read_vreg shift;
+    return ".word ".($template | ($vs2 << 20) | ($vd << 7));
+}
+
 1;

+ 1 - 0
include/crypto/riscv_arch.def

@@ -35,6 +35,7 @@ RISCV_DEFINE_CAP(ZKT, 0, 13)
 RISCV_DEFINE_CAP(V, 0, 14)
 RISCV_DEFINE_CAP(ZVBB, 0, 15)
 RISCV_DEFINE_CAP(ZVBC, 0, 16)
+RISCV_DEFINE_CAP(ZVKG, 0, 17)
 
 /*
  * In the future ...