123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089 |
- #! /usr/bin/env perl
- # This file is dual-licensed, meaning that you can use it under your
- # choice of either of the following two licenses:
- #
- # Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved.
- #
- # Licensed under the Apache License 2.0 (the "License"). You can obtain
- # a copy in the file LICENSE in the source distribution or at
- # https://www.openssl.org/source/license.html
- #
- # or
- #
- # Copyright (c) 2022, Hongren (Zenithal) Zheng <i@zenithal.me>
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions
- # are met:
- # 1. Redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer.
- # 2. Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- # $output is the last argument if it looks like a file (it has an extension)
- # $flavour is the first argument if it doesn't look like a file
- $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
- $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
- $output and open STDOUT,">$output";
- ################################################################################
- # Utility functions to help with keeping track of which registers to stack/
- # unstack when entering / exiting routines.
- ################################################################################
- {
- # Callee-saved registers
- my @callee_saved = map("x$_",(2,8,9,18..27));
- # Caller-saved registers
- my @caller_saved = map("x$_",(1,5..7,10..17,28..31));
- my @must_save;
- sub use_reg {
- my $reg = shift;
- if (grep(/^$reg$/, @callee_saved)) {
- push(@must_save, $reg);
- } elsif (!grep(/^$reg$/, @caller_saved)) {
- # Register is not usable!
- die("Unusable register ".$reg);
- }
- return $reg;
- }
- sub use_regs {
- return map(use_reg("x$_"), @_);
- }
- sub save_regs {
- my $ret = '';
- my $stack_reservation = ($#must_save + 1) * 8;
- my $stack_offset = $stack_reservation;
- if ($stack_reservation % 16) {
- $stack_reservation += 8;
- }
- $ret.=" addi sp,sp,-$stack_reservation\n";
- foreach (@must_save) {
- $stack_offset -= 8;
- $ret.=" sw $_,$stack_offset(sp)\n";
- }
- return $ret;
- }
- sub load_regs {
- my $ret = '';
- my $stack_reservation = ($#must_save + 1) * 8;
- my $stack_offset = $stack_reservation;
- if ($stack_reservation % 16) {
- $stack_reservation += 8;
- }
- foreach (@must_save) {
- $stack_offset -= 8;
- $ret.=" lw $_,$stack_offset(sp)\n";
- }
- $ret.=" addi sp,sp,$stack_reservation\n";
- return $ret;
- }
- sub clear_regs {
- @must_save = ();
- }
- }
- ################################################################################
- # util for encoding scalar crypto extension instructions
- ################################################################################
- my @regs = map("x$_",(0..31));
- my %reglookup;
- @reglookup{@regs} = @regs;
- # Takes a register name, possibly an alias, and converts it to a register index
- # from 0 to 31
- sub read_reg {
- my $reg = lc shift;
- if (!exists($reglookup{$reg})) {
- die("Unknown register ".$reg);
- }
- my $regstr = $reglookup{$reg};
- if (!($regstr =~ /^x([0-9]+)$/)) {
- die("Could not process register ".$reg);
- }
- return $1;
- }
- sub aes32dsi {
- # Encoding for aes32dsi rd, rs1, rs2, bs instruction on RV32
- # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
- my $template = 0b00_10101_00000_00000_000_00000_0110011;
- my $rd = read_reg shift;
- my $rs1 = read_reg shift;
- my $rs2 = read_reg shift;
- my $bs = shift;
- return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
- }
- sub aes32dsmi {
- # Encoding for aes32dsmi rd, rs1, rs2, bs instruction on RV32
- # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
- my $template = 0b00_10111_00000_00000_000_00000_0110011;
- my $rd = read_reg shift;
- my $rs1 = read_reg shift;
- my $rs2 = read_reg shift;
- my $bs = shift;
- return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
- }
- sub aes32esi {
- # Encoding for aes32esi rd, rs1, rs2, bs instruction on RV32
- # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
- my $template = 0b00_10001_00000_00000_000_00000_0110011;
- my $rd = read_reg shift;
- my $rs1 = read_reg shift;
- my $rs2 = read_reg shift;
- my $bs = shift;
- return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
- }
- sub aes32esmi {
- # Encoding for aes32esmi rd, rs1, rs2, bs instruction on RV32
- # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
- my $template = 0b00_10011_00000_00000_000_00000_0110011;
- my $rd = read_reg shift;
- my $rs1 = read_reg shift;
- my $rs2 = read_reg shift;
- my $bs = shift;
- return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
- }
- sub rori {
- # Encoding for ror rd, rs1, imm instruction on RV64
- # XXXXXXX_shamt_ rs1 _XXX_ rd _XXXXXXX
- my $template = 0b0110000_00000_00000_101_00000_0010011;
- my $rd = read_reg shift;
- my $rs1 = read_reg shift;
- my $shamt = shift;
- return ".word ".($template | ($shamt << 20) | ($rs1 << 15) | ($rd << 7));
- }
- ################################################################################
- # Register assignment for rv32i_zkne_encrypt and rv32i_zknd_decrypt
- ################################################################################
- # Registers initially to hold AES state (called s0-s3 or y0-y3 elsewhere)
- my ($Q0,$Q1,$Q2,$Q3) = use_regs(6..9);
- # Function arguments (x10-x12 are a0-a2 in the ABI)
- # Input block pointer, output block pointer, key pointer
- my ($INP,$OUTP,$KEYP) = use_regs(10..12);
- # Registers initially to hold Key
- my ($T0,$T1,$T2,$T3) = use_regs(13..16);
- # Loop counter
- my ($loopcntr) = use_regs(30);
- ################################################################################
- # Utility for rv32i_zkne_encrypt and rv32i_zknd_decrypt
- ################################################################################
- # outer product of whole state into one column of key
- sub outer {
- my $inst = shift;
- my $key = shift;
- # state 0 to 3
- my $s0 = shift;
- my $s1 = shift;
- my $s2 = shift;
- my $s3 = shift;
- my $ret = '';
- $ret .= <<___;
- @{[$inst->($key,$key,$s0,0)]}
- @{[$inst->($key,$key,$s1,1)]}
- @{[$inst->($key,$key,$s2,2)]}
- @{[$inst->($key,$key,$s3,3)]}
- ___
- return $ret;
- }
- sub aes32esmi4 {
- return outer(\&aes32esmi, @_)
- }
- sub aes32esi4 {
- return outer(\&aes32esi, @_)
- }
- sub aes32dsmi4 {
- return outer(\&aes32dsmi, @_)
- }
- sub aes32dsi4 {
- return outer(\&aes32dsi, @_)
- }
- ################################################################################
- # void rv32i_zkne_encrypt(const unsigned char *in, unsigned char *out,
- # const AES_KEY *key);
- ################################################################################
- my $code .= <<___;
- .text
- .balign 16
- .globl rv32i_zkne_encrypt
- .type rv32i_zkne_encrypt,\@function
- rv32i_zkne_encrypt:
- ___
- $code .= save_regs();
- $code .= <<___;
- # Load input to block cipher
- lw $Q0,0($INP)
- lw $Q1,4($INP)
- lw $Q2,8($INP)
- lw $Q3,12($INP)
- # Load key
- lw $T0,0($KEYP)
- lw $T1,4($KEYP)
- lw $T2,8($KEYP)
- lw $T3,12($KEYP)
- # Load number of rounds
- lw $loopcntr,240($KEYP)
- # initial transformation
- xor $Q0,$Q0,$T0
- xor $Q1,$Q1,$T1
- xor $Q2,$Q2,$T2
- xor $Q3,$Q3,$T3
- # The main loop only executes the first N-2 rounds, each loop consumes two rounds
- add $loopcntr,$loopcntr,-2
- srli $loopcntr,$loopcntr,1
- 1:
- # Grab next key in schedule
- add $KEYP,$KEYP,16
- lw $T0,0($KEYP)
- lw $T1,4($KEYP)
- lw $T2,8($KEYP)
- lw $T3,12($KEYP)
- @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]}
- @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]}
- @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]}
- @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]}
- # now T0~T3 hold the new state
- # Grab next key in schedule
- add $KEYP,$KEYP,16
- lw $Q0,0($KEYP)
- lw $Q1,4($KEYP)
- lw $Q2,8($KEYP)
- lw $Q3,12($KEYP)
- @{[aes32esmi4 $Q0,$T0,$T1,$T2,$T3]}
- @{[aes32esmi4 $Q1,$T1,$T2,$T3,$T0]}
- @{[aes32esmi4 $Q2,$T2,$T3,$T0,$T1]}
- @{[aes32esmi4 $Q3,$T3,$T0,$T1,$T2]}
- # now Q0~Q3 hold the new state
- add $loopcntr,$loopcntr,-1
- bgtz $loopcntr,1b
- # final two rounds
- # Grab next key in schedule
- add $KEYP,$KEYP,16
- lw $T0,0($KEYP)
- lw $T1,4($KEYP)
- lw $T2,8($KEYP)
- lw $T3,12($KEYP)
- @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]}
- @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]}
- @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]}
- @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]}
- # now T0~T3 hold the new state
- # Grab next key in schedule
- add $KEYP,$KEYP,16
- lw $Q0,0($KEYP)
- lw $Q1,4($KEYP)
- lw $Q2,8($KEYP)
- lw $Q3,12($KEYP)
- # no mix column now
- @{[aes32esi4 $Q0,$T0,$T1,$T2,$T3]}
- @{[aes32esi4 $Q1,$T1,$T2,$T3,$T0]}
- @{[aes32esi4 $Q2,$T2,$T3,$T0,$T1]}
- @{[aes32esi4 $Q3,$T3,$T0,$T1,$T2]}
- # now Q0~Q3 hold the new state
- sw $Q0,0($OUTP)
- sw $Q1,4($OUTP)
- sw $Q2,8($OUTP)
- sw $Q3,12($OUTP)
- # Pop registers and return
- ___
- $code .= load_regs();
- $code .= <<___;
- ret
- ___
- ################################################################################
- # void rv32i_zknd_decrypt(const unsigned char *in, unsigned char *out,
- # const AES_KEY *key);
- ################################################################################
- $code .= <<___;
- .text
- .balign 16
- .globl rv32i_zknd_decrypt
- .type rv32i_zknd_decrypt,\@function
- rv32i_zknd_decrypt:
- ___
- $code .= save_regs();
- $code .= <<___;
- # Load input to block cipher
- lw $Q0,0($INP)
- lw $Q1,4($INP)
- lw $Q2,8($INP)
- lw $Q3,12($INP)
- # Load number of rounds
- lw $loopcntr,240($KEYP)
- # Load the last key
- # use T0 as temporary now
- slli $T0,$loopcntr,4
- add $KEYP,$KEYP,$T0
- # Load key
- lw $T0,0($KEYP)
- lw $T1,4($KEYP)
- lw $T2,8($KEYP)
- lw $T3,12($KEYP)
- # initial transformation
- xor $Q0,$Q0,$T0
- xor $Q1,$Q1,$T1
- xor $Q2,$Q2,$T2
- xor $Q3,$Q3,$T3
- # The main loop only executes the first N-2 rounds, each loop consumes two rounds
- add $loopcntr,$loopcntr,-2
- srli $loopcntr,$loopcntr,1
- 1:
- # Grab next key in schedule
- add $KEYP,$KEYP,-16
- lw $T0,0($KEYP)
- lw $T1,4($KEYP)
- lw $T2,8($KEYP)
- lw $T3,12($KEYP)
- @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]}
- @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]}
- @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]}
- @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]}
- # now T0~T3 hold the new state
- # Grab next key in schedule
- add $KEYP,$KEYP,-16
- lw $Q0,0($KEYP)
- lw $Q1,4($KEYP)
- lw $Q2,8($KEYP)
- lw $Q3,12($KEYP)
- @{[aes32dsmi4 $Q0,$T0,$T3,$T2,$T1]}
- @{[aes32dsmi4 $Q1,$T1,$T0,$T3,$T2]}
- @{[aes32dsmi4 $Q2,$T2,$T1,$T0,$T3]}
- @{[aes32dsmi4 $Q3,$T3,$T2,$T1,$T0]}
- # now Q0~Q3 hold the new state
- add $loopcntr,$loopcntr,-1
- bgtz $loopcntr,1b
- # final two rounds
- # Grab next key in schedule
- add $KEYP,$KEYP,-16
- lw $T0,0($KEYP)
- lw $T1,4($KEYP)
- lw $T2,8($KEYP)
- lw $T3,12($KEYP)
- @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]}
- @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]}
- @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]}
- @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]}
- # now T0~T3 hold the new state
- # Grab next key in schedule
- add $KEYP,$KEYP,-16
- lw $Q0,0($KEYP)
- lw $Q1,4($KEYP)
- lw $Q2,8($KEYP)
- lw $Q3,12($KEYP)
- # no mix column now
- @{[aes32dsi4 $Q0,$T0,$T3,$T2,$T1]}
- @{[aes32dsi4 $Q1,$T1,$T0,$T3,$T2]}
- @{[aes32dsi4 $Q2,$T2,$T1,$T0,$T3]}
- @{[aes32dsi4 $Q3,$T3,$T2,$T1,$T0]}
- # now Q0~Q3 hold the new state
- sw $Q0,0($OUTP)
- sw $Q1,4($OUTP)
- sw $Q2,8($OUTP)
- sw $Q3,12($OUTP)
- # Pop registers and return
- ___
- $code .= load_regs();
- $code .= <<___;
- ret
- ___
- clear_regs();
- ################################################################################
- # Register assignment for rv32i_zkn[e/d]_set_[en/de]crypt
- ################################################################################
- # Function arguments (x10-x12 are a0-a2 in the ABI)
- # Pointer to user key, number of bits in key, key pointer
- my ($UKEY,$BITS,$KEYP) = use_regs(10..12);
- # Temporaries
- my ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8) = use_regs(13..17,28..31);
- ################################################################################
- # utility functions for rv32i_zkne_set_encrypt_key
- ################################################################################
- my @rcon = (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36);
- # do 4 sbox on 4 bytes of rs, (possibly mix), then xor with rd
- sub sbox4 {
- my $inst = shift;
- my $rd = shift;
- my $rs = shift;
- my $ret = <<___;
- @{[$inst->($rd,$rd,$rs,0)]}
- @{[$inst->($rd,$rd,$rs,1)]}
- @{[$inst->($rd,$rd,$rs,2)]}
- @{[$inst->($rd,$rd,$rs,3)]}
- ___
- return $ret;
- }
- sub fwdsbox4 {
- return sbox4(\&aes32esi, @_);
- }
- sub ke128enc {
- my $zbkb = shift;
- my $rnum = 0;
- my $ret = '';
- $ret .= <<___;
- lw $T0,0($UKEY)
- lw $T1,4($UKEY)
- lw $T2,8($UKEY)
- lw $T3,12($UKEY)
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- ___
- while($rnum < 10) {
- $ret .= <<___;
- # use T4 to store rcon
- li $T4,$rcon[$rnum]
- # as xor is associative and commutative
- # we fist xor T0 with RCON, then use T0 to
- # xor the result of each SBOX result of T3
- xor $T0,$T0,$T4
- # use T4 to store rotated T3
- ___
- # right rotate by 8
- if ($zbkb) {
- $ret .= <<___;
- @{[rori $T4,$T3,8]}
- ___
- } else {
- $ret .= <<___;
- srli $T4,$T3,8
- slli $T5,$T3,24
- or $T4,$T4,$T5
- ___
- }
- $ret .= <<___;
- # update T0
- @{[fwdsbox4 $T0,$T4]}
- # update new T1~T3
- xor $T1,$T1,$T0
- xor $T2,$T2,$T1
- xor $T3,$T3,$T2
- add $KEYP,$KEYP,16
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- ___
- $rnum++;
- }
- return $ret;
- }
- sub ke192enc {
- my $zbkb = shift;
- my $rnum = 0;
- my $ret = '';
- $ret .= <<___;
- lw $T0,0($UKEY)
- lw $T1,4($UKEY)
- lw $T2,8($UKEY)
- lw $T3,12($UKEY)
- lw $T4,16($UKEY)
- lw $T5,20($UKEY)
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- sw $T4,16($KEYP)
- sw $T5,20($KEYP)
- ___
- while($rnum < 8) {
- $ret .= <<___;
- # see the comment in ke128enc
- li $T6,$rcon[$rnum]
- xor $T0,$T0,$T6
- ___
- # right rotate by 8
- if ($zbkb) {
- $ret .= <<___;
- @{[rori $T6,$T5,8]}
- ___
- } else {
- $ret .= <<___;
- srli $T6,$T5,8
- slli $T7,$T5,24
- or $T6,$T6,$T7
- ___
- }
- $ret .= <<___;
- @{[fwdsbox4 $T0,$T6]}
- xor $T1,$T1,$T0
- xor $T2,$T2,$T1
- xor $T3,$T3,$T2
- ___
- if ($rnum != 7) {
- # note that (8+1)*24 = 216, (12+1)*16 = 208
- # thus the last 8 bytes can be dropped
- $ret .= <<___;
- xor $T4,$T4,$T3
- xor $T5,$T5,$T4
- ___
- }
- $ret .= <<___;
- add $KEYP,$KEYP,24
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- ___
- if ($rnum != 7) {
- $ret .= <<___;
- sw $T4,16($KEYP)
- sw $T5,20($KEYP)
- ___
- }
- $rnum++;
- }
- return $ret;
- }
- sub ke256enc {
- my $zbkb = shift;
- my $rnum = 0;
- my $ret = '';
- $ret .= <<___;
- lw $T0,0($UKEY)
- lw $T1,4($UKEY)
- lw $T2,8($UKEY)
- lw $T3,12($UKEY)
- lw $T4,16($UKEY)
- lw $T5,20($UKEY)
- lw $T6,24($UKEY)
- lw $T7,28($UKEY)
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- sw $T4,16($KEYP)
- sw $T5,20($KEYP)
- sw $T6,24($KEYP)
- sw $T7,28($KEYP)
- ___
- while($rnum < 7) {
- $ret .= <<___;
- # see the comment in ke128enc
- li $T8,$rcon[$rnum]
- xor $T0,$T0,$T8
- ___
- # right rotate by 8
- if ($zbkb) {
- $ret .= <<___;
- @{[rori $T8,$T7,8]}
- ___
- } else {
- $ret .= <<___;
- srli $T8,$T7,8
- slli $BITS,$T7,24
- or $T8,$T8,$BITS
- ___
- }
- $ret .= <<___;
- @{[fwdsbox4 $T0,$T8]}
- xor $T1,$T1,$T0
- xor $T2,$T2,$T1
- xor $T3,$T3,$T2
- add $KEYP,$KEYP,32
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- ___
- if ($rnum != 6) {
- # note that (7+1)*32 = 256, (14+1)*16 = 240
- # thus the last 16 bytes can be dropped
- $ret .= <<___;
- # for aes256, T3->T4 needs 4sbox but no rotate/rcon
- @{[fwdsbox4 $T4,$T3]}
- xor $T5,$T5,$T4
- xor $T6,$T6,$T5
- xor $T7,$T7,$T6
- sw $T4,16($KEYP)
- sw $T5,20($KEYP)
- sw $T6,24($KEYP)
- sw $T7,28($KEYP)
- ___
- }
- $rnum++;
- }
- return $ret;
- }
- ################################################################################
- # void rv32i_zkne_set_encrypt_key(const unsigned char *userKey, const int bits,
- # AES_KEY *key)
- ################################################################################
- sub AES_set_common {
- my ($ke128, $ke192, $ke256) = @_;
- my $ret = '';
- $ret .= <<___;
- bnez $UKEY,1f # if (!userKey || !key) return -1;
- bnez $KEYP,1f
- li a0,-1
- ret
- 1:
- # Determine number of rounds from key size in bits
- li $T0,128
- bne $BITS,$T0,1f
- li $T1,10 # key->rounds = 10 if bits == 128
- sw $T1,240($KEYP) # store key->rounds
- $ke128
- j 4f
- 1:
- li $T0,192
- bne $BITS,$T0,2f
- li $T1,12 # key->rounds = 12 if bits == 192
- sw $T1,240($KEYP) # store key->rounds
- $ke192
- j 4f
- 2:
- li $T1,14 # key->rounds = 14 if bits == 256
- li $T0,256
- beq $BITS,$T0,3f
- li a0,-2 # If bits != 128, 192, or 256, return -2
- j 5f
- 3:
- sw $T1,240($KEYP) # store key->rounds
- $ke256
- 4: # return 0
- li a0,0
- 5: # return a0
- ___
- return $ret;
- }
- $code .= <<___;
- .text
- .balign 16
- .globl rv32i_zkne_set_encrypt_key
- .type rv32i_zkne_set_encrypt_key,\@function
- rv32i_zkne_set_encrypt_key:
- ___
- $code .= save_regs();
- $code .= AES_set_common(ke128enc(0), ke192enc(0),ke256enc(0));
- $code .= load_regs();
- $code .= <<___;
- ret
- ___
- ################################################################################
- # void rv32i_zbkb_zkne_set_encrypt_key(const unsigned char *userKey,
- # const int bits, AES_KEY *key)
- ################################################################################
- $code .= <<___;
- .text
- .balign 16
- .globl rv32i_zbkb_zkne_set_encrypt_key
- .type rv32i_zbkb_zkne_set_encrypt_key,\@function
- rv32i_zbkb_zkne_set_encrypt_key:
- ___
- $code .= save_regs();
- $code .= AES_set_common(ke128enc(1), ke192enc(1),ke256enc(1));
- $code .= load_regs();
- $code .= <<___;
- ret
- ___
- ################################################################################
- # utility functions for rv32i_zknd_zkne_set_decrypt_key
- ################################################################################
- sub invm4 {
- # fwd sbox then inv sbox then mix column
- # the result is only mix column
- # this simulates aes64im T0
- my $rd = shift;
- my $tmp = shift;
- my $rs = shift;
- my $ret = <<___;
- li $tmp,0
- li $rd,0
- @{[fwdsbox4 $tmp,$rs]}
- @{[sbox4(\&aes32dsmi, $rd,$tmp)]}
- ___
- return $ret;
- }
- sub ke128dec {
- my $zbkb = shift;
- my $rnum = 0;
- my $ret = '';
- $ret .= <<___;
- lw $T0,0($UKEY)
- lw $T1,4($UKEY)
- lw $T2,8($UKEY)
- lw $T3,12($UKEY)
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- ___
- while($rnum < 10) {
- $ret .= <<___;
- # see comments in ke128enc
- li $T4,$rcon[$rnum]
- xor $T0,$T0,$T4
- ___
- # right rotate by 8
- if ($zbkb) {
- $ret .= <<___;
- @{[rori $T4,$T3,8]}
- ___
- } else {
- $ret .= <<___;
- srli $T4,$T3,8
- slli $T5,$T3,24
- or $T4,$T4,$T5
- ___
- }
- $ret .= <<___;
- @{[fwdsbox4 $T0,$T4]}
- xor $T1,$T1,$T0
- xor $T2,$T2,$T1
- xor $T3,$T3,$T2
- add $KEYP,$KEYP,16
- ___
- # need to mixcolumn only for [1:N-1] round keys
- # this is from the fact that aes32dsmi subwords first then mix column
- # intuitively decryption needs to first mix column then subwords
- # however, for merging datapaths (encryption first subwords then mix column)
- # aes32dsmi chooses to inverse the order of them, thus
- # transform should then be done on the round key
- if ($rnum < 9) {
- $ret .= <<___;
- # T4 and T5 are temp variables
- @{[invm4 $T5,$T4,$T0]}
- sw $T5,0($KEYP)
- @{[invm4 $T5,$T4,$T1]}
- sw $T5,4($KEYP)
- @{[invm4 $T5,$T4,$T2]}
- sw $T5,8($KEYP)
- @{[invm4 $T5,$T4,$T3]}
- sw $T5,12($KEYP)
- ___
- } else {
- $ret .= <<___;
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- ___
- }
- $rnum++;
- }
- return $ret;
- }
- sub ke192dec {
- my $zbkb = shift;
- my $rnum = 0;
- my $ret = '';
- $ret .= <<___;
- lw $T0,0($UKEY)
- lw $T1,4($UKEY)
- lw $T2,8($UKEY)
- lw $T3,12($UKEY)
- lw $T4,16($UKEY)
- lw $T5,20($UKEY)
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- # see the comment in ke128dec
- # T7 and T6 are temp variables
- @{[invm4 $T7,$T6,$T4]}
- sw $T7,16($KEYP)
- @{[invm4 $T7,$T6,$T5]}
- sw $T7,20($KEYP)
- ___
- while($rnum < 8) {
- $ret .= <<___;
- # see the comment in ke128enc
- li $T6,$rcon[$rnum]
- xor $T0,$T0,$T6
- ___
- # right rotate by 8
- if ($zbkb) {
- $ret .= <<___;
- @{[rori $T6,$T5,8]}
- ___
- } else {
- $ret .= <<___;
- srli $T6,$T5,8
- slli $T7,$T5,24
- or $T6,$T6,$T7
- ___
- }
- $ret .= <<___;
- @{[fwdsbox4 $T0,$T6]}
- xor $T1,$T1,$T0
- xor $T2,$T2,$T1
- xor $T3,$T3,$T2
- add $KEYP,$KEYP,24
- ___
- if ($rnum < 7) {
- $ret .= <<___;
- xor $T4,$T4,$T3
- xor $T5,$T5,$T4
- # see the comment in ke128dec
- # T7 and T6 are temp variables
- @{[invm4 $T7,$T6,$T0]}
- sw $T7,0($KEYP)
- @{[invm4 $T7,$T6,$T1]}
- sw $T7,4($KEYP)
- @{[invm4 $T7,$T6,$T2]}
- sw $T7,8($KEYP)
- @{[invm4 $T7,$T6,$T3]}
- sw $T7,12($KEYP)
- @{[invm4 $T7,$T6,$T4]}
- sw $T7,16($KEYP)
- @{[invm4 $T7,$T6,$T5]}
- sw $T7,20($KEYP)
- ___
- } else { # rnum == 7
- $ret .= <<___;
- # the reason for dropping T4/T5 is in ke192enc
- # the reason for not invm4 is in ke128dec
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- ___
- }
- $rnum++;
- }
- return $ret;
- }
- sub ke256dec {
- my $zbkb = shift;
- my $rnum = 0;
- my $ret = '';
- $ret .= <<___;
- lw $T0,0($UKEY)
- lw $T1,4($UKEY)
- lw $T2,8($UKEY)
- lw $T3,12($UKEY)
- lw $T4,16($UKEY)
- lw $T5,20($UKEY)
- lw $T6,24($UKEY)
- lw $T7,28($UKEY)
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- # see the comment in ke128dec
- # BITS and T8 are temp variables
- # BITS are not used anymore
- @{[invm4 $T8,$BITS,$T4]}
- sw $T8,16($KEYP)
- @{[invm4 $T8,$BITS,$T5]}
- sw $T8,20($KEYP)
- @{[invm4 $T8,$BITS,$T6]}
- sw $T8,24($KEYP)
- @{[invm4 $T8,$BITS,$T7]}
- sw $T8,28($KEYP)
- ___
- while($rnum < 7) {
- $ret .= <<___;
- # see the comment in ke128enc
- li $T8,$rcon[$rnum]
- xor $T0,$T0,$T8
- ___
- # right rotate by 8
- if ($zbkb) {
- $ret .= <<___;
- @{[rori $T8,$T7,8]}
- ___
- } else {
- $ret .= <<___;
- srli $T8,$T7,8
- slli $BITS,$T7,24
- or $T8,$T8,$BITS
- ___
- }
- $ret .= <<___;
- @{[fwdsbox4 $T0,$T8]}
- xor $T1,$T1,$T0
- xor $T2,$T2,$T1
- xor $T3,$T3,$T2
- add $KEYP,$KEYP,32
- ___
- if ($rnum < 6) {
- $ret .= <<___;
- # for aes256, T3->T4 needs 4sbox but no rotate/rcon
- @{[fwdsbox4 $T4,$T3]}
- xor $T5,$T5,$T4
- xor $T6,$T6,$T5
- xor $T7,$T7,$T6
- # see the comment in ke128dec
- # T8 and BITS are temp variables
- @{[invm4 $T8,$BITS,$T0]}
- sw $T8,0($KEYP)
- @{[invm4 $T8,$BITS,$T1]}
- sw $T8,4($KEYP)
- @{[invm4 $T8,$BITS,$T2]}
- sw $T8,8($KEYP)
- @{[invm4 $T8,$BITS,$T3]}
- sw $T8,12($KEYP)
- @{[invm4 $T8,$BITS,$T4]}
- sw $T8,16($KEYP)
- @{[invm4 $T8,$BITS,$T5]}
- sw $T8,20($KEYP)
- @{[invm4 $T8,$BITS,$T6]}
- sw $T8,24($KEYP)
- @{[invm4 $T8,$BITS,$T7]}
- sw $T8,28($KEYP)
- ___
- } else {
- $ret .= <<___;
- sw $T0,0($KEYP)
- sw $T1,4($KEYP)
- sw $T2,8($KEYP)
- sw $T3,12($KEYP)
- # last 16 bytes are dropped
- # see the comment in ke256enc
- ___
- }
- $rnum++;
- }
- return $ret;
- }
- ################################################################################
- # void rv32i_zknd_zkne_set_decrypt_key(const unsigned char *userKey, const int bits,
- # AES_KEY *key)
- ################################################################################
- # a note on naming: set_decrypt_key needs aes32esi thus add zkne on name
- $code .= <<___;
- .text
- .balign 16
- .globl rv32i_zknd_zkne_set_decrypt_key
- .type rv32i_zknd_zkne_set_decrypt_key,\@function
- rv32i_zknd_zkne_set_decrypt_key:
- ___
- $code .= save_regs();
- $code .= AES_set_common(ke128dec(0), ke192dec(0),ke256dec(0));
- $code .= load_regs();
- $code .= <<___;
- ret
- ___
- ################################################################################
- # void rv32i_zbkb_zknd_zkne_set_decrypt_key(const unsigned char *userKey,
- # const int bits, AES_KEY *key)
- ################################################################################
- $code .= <<___;
- .text
- .balign 16
- .globl rv32i_zbkb_zknd_zkne_set_decrypt_key
- .type rv32i_zbkb_zknd_zkne_set_decrypt_key,\@function
- rv32i_zbkb_zknd_zkne_set_decrypt_key:
- ___
- $code .= save_regs();
- $code .= AES_set_common(ke128dec(1), ke192dec(1),ke256dec(1));
- $code .= load_regs();
- $code .= <<___;
- ret
- ___
- print $code;
- close STDOUT or die "error closing STDOUT: $!";
|