123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428 |
- #! /usr/bin/env perl
- # This file is dual-licensed, meaning that you can use it under your
- # choice of either of the following two licenses:
- #
- # Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved.
- #
- # Licensed under the Apache License 2.0 (the "License"). You can obtain
- # a copy in the file LICENSE in the source distribution or at
- # https://www.openssl.org/source/license.html
- #
- # or
- #
- # Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions
- # are met:
- # 1. Redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer.
- # 2. Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- use strict;
- use warnings;
- use FindBin qw($Bin);
- use lib "$Bin";
- use lib "$Bin/../../perlasm";
- use riscv;
- # $output is the last argument if it looks like a file (it has an extension)
- # $flavour is the first argument if it doesn't look like a file
- my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
- my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
- $output and open STDOUT,">$output";
- my $code=<<___;
- .text
- ___
- ################################################################################
- # void gcm_init_rv64i_zbc(u128 Htable[16], const u64 H[2]);
- # void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 H[2]);
- # void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 H[2]);
- #
- # input: H: 128-bit H - secret parameter E(K, 0^128)
- # output: Htable: Preprocessed key data for gcm_gmult_rv64i_zbc* and
- # gcm_ghash_rv64i_zbc*
- #
- # All callers of this function revert the byte-order unconditionally
- # on little-endian machines. So we need to revert the byte-order back.
- # Additionally we reverse the bits of each byte.
- {
- my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2");
- $code .= <<___;
- .p2align 3
- .globl gcm_init_rv64i_zbc
- .type gcm_init_rv64i_zbc,\@function
- gcm_init_rv64i_zbc:
- ld $VAL0,0($H)
- ld $VAL1,8($H)
- @{[brev8_rv64i $VAL0, $TMP0, $TMP1, $TMP2]}
- @{[brev8_rv64i $VAL1, $TMP0, $TMP1, $TMP2]}
- @{[sd_rev8_rv64i $VAL0, $Htable, 0, $TMP0]}
- @{[sd_rev8_rv64i $VAL1, $Htable, 8, $TMP0]}
- ret
- .size gcm_init_rv64i_zbc,.-gcm_init_rv64i_zbc
- ___
- }
- {
- my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2");
- $code .= <<___;
- .p2align 3
- .globl gcm_init_rv64i_zbc__zbb
- .type gcm_init_rv64i_zbc__zbb,\@function
- gcm_init_rv64i_zbc__zbb:
- ld $VAL0,0($H)
- ld $VAL1,8($H)
- @{[brev8_rv64i $VAL0, $TMP0, $TMP1, $TMP2]}
- @{[brev8_rv64i $VAL1, $TMP0, $TMP1, $TMP2]}
- @{[rev8 $VAL0, $VAL0]}
- @{[rev8 $VAL1, $VAL1]}
- sd $VAL0,0($Htable)
- sd $VAL1,8($Htable)
- ret
- .size gcm_init_rv64i_zbc__zbb,.-gcm_init_rv64i_zbc__zbb
- ___
- }
- {
- my ($Htable,$H,$TMP0,$TMP1) = ("a0","a1","t0","t1");
- $code .= <<___;
- .p2align 3
- .globl gcm_init_rv64i_zbc__zbkb
- .type gcm_init_rv64i_zbc__zbkb,\@function
- gcm_init_rv64i_zbc__zbkb:
- ld $TMP0,0($H)
- ld $TMP1,8($H)
- @{[brev8 $TMP0, $TMP0]}
- @{[brev8 $TMP1, $TMP1]}
- @{[rev8 $TMP0, $TMP0]}
- @{[rev8 $TMP1, $TMP1]}
- sd $TMP0,0($Htable)
- sd $TMP1,8($Htable)
- ret
- .size gcm_init_rv64i_zbc__zbkb,.-gcm_init_rv64i_zbc__zbkb
- ___
- }
- ################################################################################
- # void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]);
- # void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]);
- #
- # input: Xi: current hash value
- # Htable: copy of H
- # output: Xi: next hash value Xi
- #
- # Compute GMULT (Xi*H mod f) using the Zbc (clmul) and Zbb (basic bit manip)
- # extensions. Using the no-Karatsuba approach and clmul for the final reduction.
- # This results in an implementation with minimized number of instructions.
- # HW with clmul latencies higher than 2 cycles might observe a performance
- # improvement with Karatsuba. HW with clmul latencies higher than 6 cycles
- # might observe a performance improvement with additionally converting the
- # reduction to shift&xor. For a full discussion of this estimates see
- # https://github.com/riscv/riscv-crypto/blob/master/doc/supp/gcm-mode-cmul.adoc
- {
- my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7");
- my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
- $code .= <<___;
- .p2align 3
- .globl gcm_gmult_rv64i_zbc
- .type gcm_gmult_rv64i_zbc,\@function
- gcm_gmult_rv64i_zbc:
- # Load Xi and bit-reverse it
- ld $x0, 0($Xi)
- ld $x1, 8($Xi)
- @{[brev8_rv64i $x0, $z0, $z1, $z2]}
- @{[brev8_rv64i $x1, $z0, $z1, $z2]}
- # Load the key (already bit-reversed)
- ld $y0, 0($Htable)
- ld $y1, 8($Htable)
- # Load the reduction constant
- la $polymod, Lpolymod
- lbu $polymod, 0($polymod)
- # Multiplication (without Karatsuba)
- @{[clmulh $z3, $x1, $y1]}
- @{[clmul $z2, $x1, $y1]}
- @{[clmulh $t1, $x0, $y1]}
- @{[clmul $z1, $x0, $y1]}
- xor $z2, $z2, $t1
- @{[clmulh $t1, $x1, $y0]}
- @{[clmul $t0, $x1, $y0]}
- xor $z2, $z2, $t1
- xor $z1, $z1, $t0
- @{[clmulh $t1, $x0, $y0]}
- @{[clmul $z0, $x0, $y0]}
- xor $z1, $z1, $t1
- # Reduction with clmul
- @{[clmulh $t1, $z3, $polymod]}
- @{[clmul $t0, $z3, $polymod]}
- xor $z2, $z2, $t1
- xor $z1, $z1, $t0
- @{[clmulh $t1, $z2, $polymod]}
- @{[clmul $t0, $z2, $polymod]}
- xor $x1, $z1, $t1
- xor $x0, $z0, $t0
- # Bit-reverse Xi back and store it
- @{[brev8_rv64i $x0, $z0, $z1, $z2]}
- @{[brev8_rv64i $x1, $z0, $z1, $z2]}
- sd $x0, 0($Xi)
- sd $x1, 8($Xi)
- ret
- .size gcm_gmult_rv64i_zbc,.-gcm_gmult_rv64i_zbc
- ___
- }
- {
- my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7");
- my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
- $code .= <<___;
- .p2align 3
- .globl gcm_gmult_rv64i_zbc__zbkb
- .type gcm_gmult_rv64i_zbc__zbkb,\@function
- gcm_gmult_rv64i_zbc__zbkb:
- # Load Xi and bit-reverse it
- ld $x0, 0($Xi)
- ld $x1, 8($Xi)
- @{[brev8 $x0, $x0]}
- @{[brev8 $x1, $x1]}
- # Load the key (already bit-reversed)
- ld $y0, 0($Htable)
- ld $y1, 8($Htable)
- # Load the reduction constant
- la $polymod, Lpolymod
- lbu $polymod, 0($polymod)
- # Multiplication (without Karatsuba)
- @{[clmulh $z3, $x1, $y1]}
- @{[clmul $z2, $x1, $y1]}
- @{[clmulh $t1, $x0, $y1]}
- @{[clmul $z1, $x0, $y1]}
- xor $z2, $z2, $t1
- @{[clmulh $t1, $x1, $y0]}
- @{[clmul $t0, $x1, $y0]}
- xor $z2, $z2, $t1
- xor $z1, $z1, $t0
- @{[clmulh $t1, $x0, $y0]}
- @{[clmul $z0, $x0, $y0]}
- xor $z1, $z1, $t1
- # Reduction with clmul
- @{[clmulh $t1, $z3, $polymod]}
- @{[clmul $t0, $z3, $polymod]}
- xor $z2, $z2, $t1
- xor $z1, $z1, $t0
- @{[clmulh $t1, $z2, $polymod]}
- @{[clmul $t0, $z2, $polymod]}
- xor $x1, $z1, $t1
- xor $x0, $z0, $t0
- # Bit-reverse Xi back and store it
- @{[brev8 $x0, $x0]}
- @{[brev8 $x1, $x1]}
- sd $x0, 0($Xi)
- sd $x1, 8($Xi)
- ret
- .size gcm_gmult_rv64i_zbc__zbkb,.-gcm_gmult_rv64i_zbc__zbkb
- ___
- }
- ################################################################################
- # void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16],
- # const u8 *inp, size_t len);
- # void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16],
- # const u8 *inp, size_t len);
- #
- # input: Xi: current hash value
- # Htable: copy of H
- # inp: pointer to input data
- # len: length of input data in bytes (multiple of block size)
- # output: Xi: Xi+1 (next hash value Xi)
- {
- my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7");
- my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
- $code .= <<___;
- .p2align 3
- .globl gcm_ghash_rv64i_zbc
- .type gcm_ghash_rv64i_zbc,\@function
- gcm_ghash_rv64i_zbc:
- # Load Xi and bit-reverse it
- ld $x0, 0($Xi)
- ld $x1, 8($Xi)
- @{[brev8_rv64i $x0, $z0, $z1, $z2]}
- @{[brev8_rv64i $x1, $z0, $z1, $z2]}
- # Load the key (already bit-reversed)
- ld $y0, 0($Htable)
- ld $y1, 8($Htable)
- # Load the reduction constant
- la $polymod, Lpolymod
- lbu $polymod, 0($polymod)
- Lstep:
- # Load the input data, bit-reverse them, and XOR them with Xi
- ld $t0, 0($inp)
- ld $t1, 8($inp)
- add $inp, $inp, 16
- add $len, $len, -16
- @{[brev8_rv64i $t0, $z0, $z1, $z2]}
- @{[brev8_rv64i $t1, $z0, $z1, $z2]}
- xor $x0, $x0, $t0
- xor $x1, $x1, $t1
- # Multiplication (without Karatsuba)
- @{[clmulh $z3, $x1, $y1]}
- @{[clmul $z2, $x1, $y1]}
- @{[clmulh $t1, $x0, $y1]}
- @{[clmul $z1, $x0, $y1]}
- xor $z2, $z2, $t1
- @{[clmulh $t1, $x1, $y0]}
- @{[clmul $t0, $x1, $y0]}
- xor $z2, $z2, $t1
- xor $z1, $z1, $t0
- @{[clmulh $t1, $x0, $y0]}
- @{[clmul $z0, $x0, $y0]}
- xor $z1, $z1, $t1
- # Reduction with clmul
- @{[clmulh $t1, $z3, $polymod]}
- @{[clmul $t0, $z3, $polymod]}
- xor $z2, $z2, $t1
- xor $z1, $z1, $t0
- @{[clmulh $t1, $z2, $polymod]}
- @{[clmul $t0, $z2, $polymod]}
- xor $x1, $z1, $t1
- xor $x0, $z0, $t0
- # Iterate over all blocks
- bnez $len, Lstep
- # Bit-reverse final Xi back and store it
- @{[brev8_rv64i $x0, $z0, $z1, $z2]}
- @{[brev8_rv64i $x1, $z0, $z1, $z2]}
- sd $x0, 0($Xi)
- sd $x1, 8($Xi)
- ret
- .size gcm_ghash_rv64i_zbc,.-gcm_ghash_rv64i_zbc
- ___
- }
- {
- my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7");
- my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
- $code .= <<___;
- .p2align 3
- .globl gcm_ghash_rv64i_zbc__zbkb
- .type gcm_ghash_rv64i_zbc__zbkb,\@function
- gcm_ghash_rv64i_zbc__zbkb:
- # Load Xi and bit-reverse it
- ld $x0, 0($Xi)
- ld $x1, 8($Xi)
- @{[brev8 $x0, $x0]}
- @{[brev8 $x1, $x1]}
- # Load the key (already bit-reversed)
- ld $y0, 0($Htable)
- ld $y1, 8($Htable)
- # Load the reduction constant
- la $polymod, Lpolymod
- lbu $polymod, 0($polymod)
- Lstep_zkbk:
- # Load the input data, bit-reverse them, and XOR them with Xi
- ld $t0, 0($inp)
- ld $t1, 8($inp)
- add $inp, $inp, 16
- add $len, $len, -16
- @{[brev8 $t0, $t0]}
- @{[brev8 $t1, $t1]}
- xor $x0, $x0, $t0
- xor $x1, $x1, $t1
- # Multiplication (without Karatsuba)
- @{[clmulh $z3, $x1, $y1]}
- @{[clmul $z2, $x1, $y1]}
- @{[clmulh $t1, $x0, $y1]}
- @{[clmul $z1, $x0, $y1]}
- xor $z2, $z2, $t1
- @{[clmulh $t1, $x1, $y0]}
- @{[clmul $t0, $x1, $y0]}
- xor $z2, $z2, $t1
- xor $z1, $z1, $t0
- @{[clmulh $t1, $x0, $y0]}
- @{[clmul $z0, $x0, $y0]}
- xor $z1, $z1, $t1
- # Reduction with clmul
- @{[clmulh $t1, $z3, $polymod]}
- @{[clmul $t0, $z3, $polymod]}
- xor $z2, $z2, $t1
- xor $z1, $z1, $t0
- @{[clmulh $t1, $z2, $polymod]}
- @{[clmul $t0, $z2, $polymod]}
- xor $x1, $z1, $t1
- xor $x0, $z0, $t0
- # Iterate over all blocks
- bnez $len, Lstep_zkbk
- # Bit-reverse final Xi back and store it
- @{[brev8 $x0, $x0]}
- @{[brev8 $x1, $x1]}
- sd $x0, 0($Xi)
- sd $x1, 8($Xi)
- ret
- .size gcm_ghash_rv64i_zbc__zbkb,.-gcm_ghash_rv64i_zbc__zbkb
- ___
- }
- $code .= <<___;
- .p2align 3
- Lbrev8_const:
- .dword 0xAAAAAAAAAAAAAAAA
- .dword 0xCCCCCCCCCCCCCCCC
- .dword 0xF0F0F0F0F0F0F0F0
- .size Lbrev8_const,.-Lbrev8_const
- Lpolymod:
- .byte 0x87
- .size Lpolymod,.-Lpolymod
- ___
- print $code;
- close STDOUT or die "error closing STDOUT: $!";
|