aes-riscv32-zkn.pl 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089
  1. #! /usr/bin/env perl
  2. # This file is dual-licensed, meaning that you can use it under your
  3. # choice of either of the following two licenses:
  4. #
  5. # Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved.
  6. #
  7. # Licensed under the Apache License 2.0 (the "License"). You can obtain
  8. # a copy in the file LICENSE in the source distribution or at
  9. # https://www.openssl.org/source/license.html
  10. #
  11. # or
  12. #
  13. # Copyright (c) 2022, Hongren (Zenithal) Zheng <i@zenithal.me>
  14. # All rights reserved.
  15. #
  16. # Redistribution and use in source and binary forms, with or without
  17. # modification, are permitted provided that the following conditions
  18. # are met:
  19. # 1. Redistributions of source code must retain the above copyright
  20. # notice, this list of conditions and the following disclaimer.
  21. # 2. Redistributions in binary form must reproduce the above copyright
  22. # notice, this list of conditions and the following disclaimer in the
  23. # documentation and/or other materials provided with the distribution.
  24. #
  25. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36. # $output is the last argument if it looks like a file (it has an extension)
  37. # $flavour is the first argument if it doesn't look like a file
  38. $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  39. $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  40. $output and open STDOUT,">$output";
  41. ################################################################################
  42. # Utility functions to help with keeping track of which registers to stack/
  43. # unstack when entering / exiting routines.
  44. ################################################################################
  45. {
  46. # Callee-saved registers
  47. my @callee_saved = map("x$_",(2,8,9,18..27));
  48. # Caller-saved registers
  49. my @caller_saved = map("x$_",(1,5..7,10..17,28..31));
  50. my @must_save;
  51. sub use_reg {
  52. my $reg = shift;
  53. if (grep(/^$reg$/, @callee_saved)) {
  54. push(@must_save, $reg);
  55. } elsif (!grep(/^$reg$/, @caller_saved)) {
  56. # Register is not usable!
  57. die("Unusable register ".$reg);
  58. }
  59. return $reg;
  60. }
  61. sub use_regs {
  62. return map(use_reg("x$_"), @_);
  63. }
  64. sub save_regs {
  65. my $ret = '';
  66. my $stack_reservation = ($#must_save + 1) * 8;
  67. my $stack_offset = $stack_reservation;
  68. if ($stack_reservation % 16) {
  69. $stack_reservation += 8;
  70. }
  71. $ret.=" addi sp,sp,-$stack_reservation\n";
  72. foreach (@must_save) {
  73. $stack_offset -= 8;
  74. $ret.=" sw $_,$stack_offset(sp)\n";
  75. }
  76. return $ret;
  77. }
  78. sub load_regs {
  79. my $ret = '';
  80. my $stack_reservation = ($#must_save + 1) * 8;
  81. my $stack_offset = $stack_reservation;
  82. if ($stack_reservation % 16) {
  83. $stack_reservation += 8;
  84. }
  85. foreach (@must_save) {
  86. $stack_offset -= 8;
  87. $ret.=" lw $_,$stack_offset(sp)\n";
  88. }
  89. $ret.=" addi sp,sp,$stack_reservation\n";
  90. return $ret;
  91. }
  92. sub clear_regs {
  93. @must_save = ();
  94. }
  95. }
  96. ################################################################################
  97. # util for encoding scalar crypto extension instructions
  98. ################################################################################
  99. my @regs = map("x$_",(0..31));
  100. my %reglookup;
  101. @reglookup{@regs} = @regs;
  102. # Takes a register name, possibly an alias, and converts it to a register index
  103. # from 0 to 31
  104. sub read_reg {
  105. my $reg = lc shift;
  106. if (!exists($reglookup{$reg})) {
  107. die("Unknown register ".$reg);
  108. }
  109. my $regstr = $reglookup{$reg};
  110. if (!($regstr =~ /^x([0-9]+)$/)) {
  111. die("Could not process register ".$reg);
  112. }
  113. return $1;
  114. }
  115. sub aes32dsi {
  116. # Encoding for aes32dsi rd, rs1, rs2, bs instruction on RV32
  117. # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
  118. my $template = 0b00_10101_00000_00000_000_00000_0110011;
  119. my $rd = read_reg shift;
  120. my $rs1 = read_reg shift;
  121. my $rs2 = read_reg shift;
  122. my $bs = shift;
  123. return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
  124. }
  125. sub aes32dsmi {
  126. # Encoding for aes32dsmi rd, rs1, rs2, bs instruction on RV32
  127. # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
  128. my $template = 0b00_10111_00000_00000_000_00000_0110011;
  129. my $rd = read_reg shift;
  130. my $rs1 = read_reg shift;
  131. my $rs2 = read_reg shift;
  132. my $bs = shift;
  133. return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
  134. }
  135. sub aes32esi {
  136. # Encoding for aes32esi rd, rs1, rs2, bs instruction on RV32
  137. # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
  138. my $template = 0b00_10001_00000_00000_000_00000_0110011;
  139. my $rd = read_reg shift;
  140. my $rs1 = read_reg shift;
  141. my $rs2 = read_reg shift;
  142. my $bs = shift;
  143. return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
  144. }
  145. sub aes32esmi {
  146. # Encoding for aes32esmi rd, rs1, rs2, bs instruction on RV32
  147. # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
  148. my $template = 0b00_10011_00000_00000_000_00000_0110011;
  149. my $rd = read_reg shift;
  150. my $rs1 = read_reg shift;
  151. my $rs2 = read_reg shift;
  152. my $bs = shift;
  153. return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
  154. }
  155. sub rori {
  156. # Encoding for ror rd, rs1, imm instruction on RV64
  157. # XXXXXXX_shamt_ rs1 _XXX_ rd _XXXXXXX
  158. my $template = 0b0110000_00000_00000_101_00000_0010011;
  159. my $rd = read_reg shift;
  160. my $rs1 = read_reg shift;
  161. my $shamt = shift;
  162. return ".word ".($template | ($shamt << 20) | ($rs1 << 15) | ($rd << 7));
  163. }
  164. ################################################################################
  165. # Register assignment for rv32i_zkne_encrypt and rv32i_zknd_decrypt
  166. ################################################################################
  167. # Registers initially to hold AES state (called s0-s3 or y0-y3 elsewhere)
  168. my ($Q0,$Q1,$Q2,$Q3) = use_regs(6..9);
  169. # Function arguments (x10-x12 are a0-a2 in the ABI)
  170. # Input block pointer, output block pointer, key pointer
  171. my ($INP,$OUTP,$KEYP) = use_regs(10..12);
  172. # Registers initially to hold Key
  173. my ($T0,$T1,$T2,$T3) = use_regs(13..16);
  174. # Loop counter
  175. my ($loopcntr) = use_regs(30);
  176. ################################################################################
  177. # Utility for rv32i_zkne_encrypt and rv32i_zknd_decrypt
  178. ################################################################################
  179. # outer product of whole state into one column of key
  180. sub outer {
  181. my $inst = shift;
  182. my $key = shift;
  183. # state 0 to 3
  184. my $s0 = shift;
  185. my $s1 = shift;
  186. my $s2 = shift;
  187. my $s3 = shift;
  188. my $ret = '';
  189. $ret .= <<___;
  190. @{[$inst->($key,$key,$s0,0)]}
  191. @{[$inst->($key,$key,$s1,1)]}
  192. @{[$inst->($key,$key,$s2,2)]}
  193. @{[$inst->($key,$key,$s3,3)]}
  194. ___
  195. return $ret;
  196. }
  197. sub aes32esmi4 {
  198. return outer(\&aes32esmi, @_)
  199. }
  200. sub aes32esi4 {
  201. return outer(\&aes32esi, @_)
  202. }
  203. sub aes32dsmi4 {
  204. return outer(\&aes32dsmi, @_)
  205. }
  206. sub aes32dsi4 {
  207. return outer(\&aes32dsi, @_)
  208. }
  209. ################################################################################
  210. # void rv32i_zkne_encrypt(const unsigned char *in, unsigned char *out,
  211. # const AES_KEY *key);
  212. ################################################################################
  213. my $code .= <<___;
  214. .text
  215. .balign 16
  216. .globl rv32i_zkne_encrypt
  217. .type rv32i_zkne_encrypt,\@function
  218. rv32i_zkne_encrypt:
  219. ___
  220. $code .= save_regs();
  221. $code .= <<___;
  222. # Load input to block cipher
  223. lw $Q0,0($INP)
  224. lw $Q1,4($INP)
  225. lw $Q2,8($INP)
  226. lw $Q3,12($INP)
  227. # Load key
  228. lw $T0,0($KEYP)
  229. lw $T1,4($KEYP)
  230. lw $T2,8($KEYP)
  231. lw $T3,12($KEYP)
  232. # Load number of rounds
  233. lw $loopcntr,240($KEYP)
  234. # initial transformation
  235. xor $Q0,$Q0,$T0
  236. xor $Q1,$Q1,$T1
  237. xor $Q2,$Q2,$T2
  238. xor $Q3,$Q3,$T3
  239. # The main loop only executes the first N-2 rounds, each loop consumes two rounds
  240. add $loopcntr,$loopcntr,-2
  241. srli $loopcntr,$loopcntr,1
  242. 1:
  243. # Grab next key in schedule
  244. add $KEYP,$KEYP,16
  245. lw $T0,0($KEYP)
  246. lw $T1,4($KEYP)
  247. lw $T2,8($KEYP)
  248. lw $T3,12($KEYP)
  249. @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]}
  250. @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]}
  251. @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]}
  252. @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]}
  253. # now T0~T3 hold the new state
  254. # Grab next key in schedule
  255. add $KEYP,$KEYP,16
  256. lw $Q0,0($KEYP)
  257. lw $Q1,4($KEYP)
  258. lw $Q2,8($KEYP)
  259. lw $Q3,12($KEYP)
  260. @{[aes32esmi4 $Q0,$T0,$T1,$T2,$T3]}
  261. @{[aes32esmi4 $Q1,$T1,$T2,$T3,$T0]}
  262. @{[aes32esmi4 $Q2,$T2,$T3,$T0,$T1]}
  263. @{[aes32esmi4 $Q3,$T3,$T0,$T1,$T2]}
  264. # now Q0~Q3 hold the new state
  265. add $loopcntr,$loopcntr,-1
  266. bgtz $loopcntr,1b
  267. # final two rounds
  268. # Grab next key in schedule
  269. add $KEYP,$KEYP,16
  270. lw $T0,0($KEYP)
  271. lw $T1,4($KEYP)
  272. lw $T2,8($KEYP)
  273. lw $T3,12($KEYP)
  274. @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]}
  275. @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]}
  276. @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]}
  277. @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]}
  278. # now T0~T3 hold the new state
  279. # Grab next key in schedule
  280. add $KEYP,$KEYP,16
  281. lw $Q0,0($KEYP)
  282. lw $Q1,4($KEYP)
  283. lw $Q2,8($KEYP)
  284. lw $Q3,12($KEYP)
  285. # no mix column now
  286. @{[aes32esi4 $Q0,$T0,$T1,$T2,$T3]}
  287. @{[aes32esi4 $Q1,$T1,$T2,$T3,$T0]}
  288. @{[aes32esi4 $Q2,$T2,$T3,$T0,$T1]}
  289. @{[aes32esi4 $Q3,$T3,$T0,$T1,$T2]}
  290. # now Q0~Q3 hold the new state
  291. sw $Q0,0($OUTP)
  292. sw $Q1,4($OUTP)
  293. sw $Q2,8($OUTP)
  294. sw $Q3,12($OUTP)
  295. # Pop registers and return
  296. ___
  297. $code .= load_regs();
  298. $code .= <<___;
  299. ret
  300. ___
  301. ################################################################################
  302. # void rv32i_zknd_decrypt(const unsigned char *in, unsigned char *out,
  303. # const AES_KEY *key);
  304. ################################################################################
  305. $code .= <<___;
  306. .text
  307. .balign 16
  308. .globl rv32i_zknd_decrypt
  309. .type rv32i_zknd_decrypt,\@function
  310. rv32i_zknd_decrypt:
  311. ___
  312. $code .= save_regs();
  313. $code .= <<___;
  314. # Load input to block cipher
  315. lw $Q0,0($INP)
  316. lw $Q1,4($INP)
  317. lw $Q2,8($INP)
  318. lw $Q3,12($INP)
  319. # Load number of rounds
  320. lw $loopcntr,240($KEYP)
  321. # Load the last key
  322. # use T0 as temporary now
  323. slli $T0,$loopcntr,4
  324. add $KEYP,$KEYP,$T0
  325. # Load key
  326. lw $T0,0($KEYP)
  327. lw $T1,4($KEYP)
  328. lw $T2,8($KEYP)
  329. lw $T3,12($KEYP)
  330. # initial transformation
  331. xor $Q0,$Q0,$T0
  332. xor $Q1,$Q1,$T1
  333. xor $Q2,$Q2,$T2
  334. xor $Q3,$Q3,$T3
  335. # The main loop only executes the first N-2 rounds, each loop consumes two rounds
  336. add $loopcntr,$loopcntr,-2
  337. srli $loopcntr,$loopcntr,1
  338. 1:
  339. # Grab next key in schedule
  340. add $KEYP,$KEYP,-16
  341. lw $T0,0($KEYP)
  342. lw $T1,4($KEYP)
  343. lw $T2,8($KEYP)
  344. lw $T3,12($KEYP)
  345. @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]}
  346. @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]}
  347. @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]}
  348. @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]}
  349. # now T0~T3 hold the new state
  350. # Grab next key in schedule
  351. add $KEYP,$KEYP,-16
  352. lw $Q0,0($KEYP)
  353. lw $Q1,4($KEYP)
  354. lw $Q2,8($KEYP)
  355. lw $Q3,12($KEYP)
  356. @{[aes32dsmi4 $Q0,$T0,$T3,$T2,$T1]}
  357. @{[aes32dsmi4 $Q1,$T1,$T0,$T3,$T2]}
  358. @{[aes32dsmi4 $Q2,$T2,$T1,$T0,$T3]}
  359. @{[aes32dsmi4 $Q3,$T3,$T2,$T1,$T0]}
  360. # now Q0~Q3 hold the new state
  361. add $loopcntr,$loopcntr,-1
  362. bgtz $loopcntr,1b
  363. # final two rounds
  364. # Grab next key in schedule
  365. add $KEYP,$KEYP,-16
  366. lw $T0,0($KEYP)
  367. lw $T1,4($KEYP)
  368. lw $T2,8($KEYP)
  369. lw $T3,12($KEYP)
  370. @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]}
  371. @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]}
  372. @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]}
  373. @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]}
  374. # now T0~T3 hold the new state
  375. # Grab next key in schedule
  376. add $KEYP,$KEYP,-16
  377. lw $Q0,0($KEYP)
  378. lw $Q1,4($KEYP)
  379. lw $Q2,8($KEYP)
  380. lw $Q3,12($KEYP)
  381. # no mix column now
  382. @{[aes32dsi4 $Q0,$T0,$T3,$T2,$T1]}
  383. @{[aes32dsi4 $Q1,$T1,$T0,$T3,$T2]}
  384. @{[aes32dsi4 $Q2,$T2,$T1,$T0,$T3]}
  385. @{[aes32dsi4 $Q3,$T3,$T2,$T1,$T0]}
  386. # now Q0~Q3 hold the new state
  387. sw $Q0,0($OUTP)
  388. sw $Q1,4($OUTP)
  389. sw $Q2,8($OUTP)
  390. sw $Q3,12($OUTP)
  391. # Pop registers and return
  392. ___
  393. $code .= load_regs();
  394. $code .= <<___;
  395. ret
  396. ___
  397. clear_regs();
  398. ################################################################################
  399. # Register assignment for rv32i_zkn[e/d]_set_[en/de]crypt
  400. ################################################################################
  401. # Function arguments (x10-x12 are a0-a2 in the ABI)
  402. # Pointer to user key, number of bits in key, key pointer
  403. my ($UKEY,$BITS,$KEYP) = use_regs(10..12);
  404. # Temporaries
  405. my ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8) = use_regs(13..17,28..31);
  406. ################################################################################
  407. # utility functions for rv32i_zkne_set_encrypt_key
  408. ################################################################################
  409. my @rcon = (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36);
  410. # do 4 sbox on 4 bytes of rs, (possibly mix), then xor with rd
  411. sub sbox4 {
  412. my $inst = shift;
  413. my $rd = shift;
  414. my $rs = shift;
  415. my $ret = <<___;
  416. @{[$inst->($rd,$rd,$rs,0)]}
  417. @{[$inst->($rd,$rd,$rs,1)]}
  418. @{[$inst->($rd,$rd,$rs,2)]}
  419. @{[$inst->($rd,$rd,$rs,3)]}
  420. ___
  421. return $ret;
  422. }
  423. sub fwdsbox4 {
  424. return sbox4(\&aes32esi, @_);
  425. }
  426. sub ke128enc {
  427. my $zbkb = shift;
  428. my $rnum = 0;
  429. my $ret = '';
  430. $ret .= <<___;
  431. lw $T0,0($UKEY)
  432. lw $T1,4($UKEY)
  433. lw $T2,8($UKEY)
  434. lw $T3,12($UKEY)
  435. sw $T0,0($KEYP)
  436. sw $T1,4($KEYP)
  437. sw $T2,8($KEYP)
  438. sw $T3,12($KEYP)
  439. ___
  440. while($rnum < 10) {
  441. $ret .= <<___;
  442. # use T4 to store rcon
  443. li $T4,$rcon[$rnum]
  444. # as xor is associative and commutative
  445. # we fist xor T0 with RCON, then use T0 to
  446. # xor the result of each SBOX result of T3
  447. xor $T0,$T0,$T4
  448. # use T4 to store rotated T3
  449. ___
  450. # right rotate by 8
  451. if ($zbkb) {
  452. $ret .= <<___;
  453. @{[rori $T4,$T3,8]}
  454. ___
  455. } else {
  456. $ret .= <<___;
  457. srli $T4,$T3,8
  458. slli $T5,$T3,24
  459. or $T4,$T4,$T5
  460. ___
  461. }
  462. $ret .= <<___;
  463. # update T0
  464. @{[fwdsbox4 $T0,$T4]}
  465. # update new T1~T3
  466. xor $T1,$T1,$T0
  467. xor $T2,$T2,$T1
  468. xor $T3,$T3,$T2
  469. add $KEYP,$KEYP,16
  470. sw $T0,0($KEYP)
  471. sw $T1,4($KEYP)
  472. sw $T2,8($KEYP)
  473. sw $T3,12($KEYP)
  474. ___
  475. $rnum++;
  476. }
  477. return $ret;
  478. }
  479. sub ke192enc {
  480. my $zbkb = shift;
  481. my $rnum = 0;
  482. my $ret = '';
  483. $ret .= <<___;
  484. lw $T0,0($UKEY)
  485. lw $T1,4($UKEY)
  486. lw $T2,8($UKEY)
  487. lw $T3,12($UKEY)
  488. lw $T4,16($UKEY)
  489. lw $T5,20($UKEY)
  490. sw $T0,0($KEYP)
  491. sw $T1,4($KEYP)
  492. sw $T2,8($KEYP)
  493. sw $T3,12($KEYP)
  494. sw $T4,16($KEYP)
  495. sw $T5,20($KEYP)
  496. ___
  497. while($rnum < 8) {
  498. $ret .= <<___;
  499. # see the comment in ke128enc
  500. li $T6,$rcon[$rnum]
  501. xor $T0,$T0,$T6
  502. ___
  503. # right rotate by 8
  504. if ($zbkb) {
  505. $ret .= <<___;
  506. @{[rori $T6,$T5,8]}
  507. ___
  508. } else {
  509. $ret .= <<___;
  510. srli $T6,$T5,8
  511. slli $T7,$T5,24
  512. or $T6,$T6,$T7
  513. ___
  514. }
  515. $ret .= <<___;
  516. @{[fwdsbox4 $T0,$T6]}
  517. xor $T1,$T1,$T0
  518. xor $T2,$T2,$T1
  519. xor $T3,$T3,$T2
  520. ___
  521. if ($rnum != 7) {
  522. # note that (8+1)*24 = 216, (12+1)*16 = 208
  523. # thus the last 8 bytes can be dropped
  524. $ret .= <<___;
  525. xor $T4,$T4,$T3
  526. xor $T5,$T5,$T4
  527. ___
  528. }
  529. $ret .= <<___;
  530. add $KEYP,$KEYP,24
  531. sw $T0,0($KEYP)
  532. sw $T1,4($KEYP)
  533. sw $T2,8($KEYP)
  534. sw $T3,12($KEYP)
  535. ___
  536. if ($rnum != 7) {
  537. $ret .= <<___;
  538. sw $T4,16($KEYP)
  539. sw $T5,20($KEYP)
  540. ___
  541. }
  542. $rnum++;
  543. }
  544. return $ret;
  545. }
  546. sub ke256enc {
  547. my $zbkb = shift;
  548. my $rnum = 0;
  549. my $ret = '';
  550. $ret .= <<___;
  551. lw $T0,0($UKEY)
  552. lw $T1,4($UKEY)
  553. lw $T2,8($UKEY)
  554. lw $T3,12($UKEY)
  555. lw $T4,16($UKEY)
  556. lw $T5,20($UKEY)
  557. lw $T6,24($UKEY)
  558. lw $T7,28($UKEY)
  559. sw $T0,0($KEYP)
  560. sw $T1,4($KEYP)
  561. sw $T2,8($KEYP)
  562. sw $T3,12($KEYP)
  563. sw $T4,16($KEYP)
  564. sw $T5,20($KEYP)
  565. sw $T6,24($KEYP)
  566. sw $T7,28($KEYP)
  567. ___
  568. while($rnum < 7) {
  569. $ret .= <<___;
  570. # see the comment in ke128enc
  571. li $T8,$rcon[$rnum]
  572. xor $T0,$T0,$T8
  573. ___
  574. # right rotate by 8
  575. if ($zbkb) {
  576. $ret .= <<___;
  577. @{[rori $T8,$T7,8]}
  578. ___
  579. } else {
  580. $ret .= <<___;
  581. srli $T8,$T7,8
  582. slli $BITS,$T7,24
  583. or $T8,$T8,$BITS
  584. ___
  585. }
  586. $ret .= <<___;
  587. @{[fwdsbox4 $T0,$T8]}
  588. xor $T1,$T1,$T0
  589. xor $T2,$T2,$T1
  590. xor $T3,$T3,$T2
  591. add $KEYP,$KEYP,32
  592. sw $T0,0($KEYP)
  593. sw $T1,4($KEYP)
  594. sw $T2,8($KEYP)
  595. sw $T3,12($KEYP)
  596. ___
  597. if ($rnum != 6) {
  598. # note that (7+1)*32 = 256, (14+1)*16 = 240
  599. # thus the last 16 bytes can be dropped
  600. $ret .= <<___;
  601. # for aes256, T3->T4 needs 4sbox but no rotate/rcon
  602. @{[fwdsbox4 $T4,$T3]}
  603. xor $T5,$T5,$T4
  604. xor $T6,$T6,$T5
  605. xor $T7,$T7,$T6
  606. sw $T4,16($KEYP)
  607. sw $T5,20($KEYP)
  608. sw $T6,24($KEYP)
  609. sw $T7,28($KEYP)
  610. ___
  611. }
  612. $rnum++;
  613. }
  614. return $ret;
  615. }
  616. ################################################################################
  617. # void rv32i_zkne_set_encrypt_key(const unsigned char *userKey, const int bits,
  618. # AES_KEY *key)
  619. ################################################################################
  620. sub AES_set_common {
  621. my ($ke128, $ke192, $ke256) = @_;
  622. my $ret = '';
  623. $ret .= <<___;
  624. bnez $UKEY,1f # if (!userKey || !key) return -1;
  625. bnez $KEYP,1f
  626. li a0,-1
  627. ret
  628. 1:
  629. # Determine number of rounds from key size in bits
  630. li $T0,128
  631. bne $BITS,$T0,1f
  632. li $T1,10 # key->rounds = 10 if bits == 128
  633. sw $T1,240($KEYP) # store key->rounds
  634. $ke128
  635. j 4f
  636. 1:
  637. li $T0,192
  638. bne $BITS,$T0,2f
  639. li $T1,12 # key->rounds = 12 if bits == 192
  640. sw $T1,240($KEYP) # store key->rounds
  641. $ke192
  642. j 4f
  643. 2:
  644. li $T1,14 # key->rounds = 14 if bits == 256
  645. li $T0,256
  646. beq $BITS,$T0,3f
  647. li a0,-2 # If bits != 128, 192, or 256, return -2
  648. j 5f
  649. 3:
  650. sw $T1,240($KEYP) # store key->rounds
  651. $ke256
  652. 4: # return 0
  653. li a0,0
  654. 5: # return a0
  655. ___
  656. return $ret;
  657. }
  658. $code .= <<___;
  659. .text
  660. .balign 16
  661. .globl rv32i_zkne_set_encrypt_key
  662. .type rv32i_zkne_set_encrypt_key,\@function
  663. rv32i_zkne_set_encrypt_key:
  664. ___
  665. $code .= save_regs();
  666. $code .= AES_set_common(ke128enc(0), ke192enc(0),ke256enc(0));
  667. $code .= load_regs();
  668. $code .= <<___;
  669. ret
  670. ___
  671. ################################################################################
  672. # void rv32i_zbkb_zkne_set_encrypt_key(const unsigned char *userKey,
  673. # const int bits, AES_KEY *key)
  674. ################################################################################
  675. $code .= <<___;
  676. .text
  677. .balign 16
  678. .globl rv32i_zbkb_zkne_set_encrypt_key
  679. .type rv32i_zbkb_zkne_set_encrypt_key,\@function
  680. rv32i_zbkb_zkne_set_encrypt_key:
  681. ___
  682. $code .= save_regs();
  683. $code .= AES_set_common(ke128enc(1), ke192enc(1),ke256enc(1));
  684. $code .= load_regs();
  685. $code .= <<___;
  686. ret
  687. ___
  688. ################################################################################
  689. # utility functions for rv32i_zknd_zkne_set_decrypt_key
  690. ################################################################################
  691. sub invm4 {
  692. # fwd sbox then inv sbox then mix column
  693. # the result is only mix column
  694. # this simulates aes64im T0
  695. my $rd = shift;
  696. my $tmp = shift;
  697. my $rs = shift;
  698. my $ret = <<___;
  699. li $tmp,0
  700. li $rd,0
  701. @{[fwdsbox4 $tmp,$rs]}
  702. @{[sbox4(\&aes32dsmi, $rd,$tmp)]}
  703. ___
  704. return $ret;
  705. }
  706. sub ke128dec {
  707. my $zbkb = shift;
  708. my $rnum = 0;
  709. my $ret = '';
  710. $ret .= <<___;
  711. lw $T0,0($UKEY)
  712. lw $T1,4($UKEY)
  713. lw $T2,8($UKEY)
  714. lw $T3,12($UKEY)
  715. sw $T0,0($KEYP)
  716. sw $T1,4($KEYP)
  717. sw $T2,8($KEYP)
  718. sw $T3,12($KEYP)
  719. ___
  720. while($rnum < 10) {
  721. $ret .= <<___;
  722. # see comments in ke128enc
  723. li $T4,$rcon[$rnum]
  724. xor $T0,$T0,$T4
  725. ___
  726. # right rotate by 8
  727. if ($zbkb) {
  728. $ret .= <<___;
  729. @{[rori $T4,$T3,8]}
  730. ___
  731. } else {
  732. $ret .= <<___;
  733. srli $T4,$T3,8
  734. slli $T5,$T3,24
  735. or $T4,$T4,$T5
  736. ___
  737. }
  738. $ret .= <<___;
  739. @{[fwdsbox4 $T0,$T4]}
  740. xor $T1,$T1,$T0
  741. xor $T2,$T2,$T1
  742. xor $T3,$T3,$T2
  743. add $KEYP,$KEYP,16
  744. ___
  745. # need to mixcolumn only for [1:N-1] round keys
  746. # this is from the fact that aes32dsmi subwords first then mix column
  747. # intuitively decryption needs to first mix column then subwords
  748. # however, for merging datapaths (encryption first subwords then mix column)
  749. # aes32dsmi chooses to inverse the order of them, thus
  750. # transform should then be done on the round key
  751. if ($rnum < 9) {
  752. $ret .= <<___;
  753. # T4 and T5 are temp variables
  754. @{[invm4 $T5,$T4,$T0]}
  755. sw $T5,0($KEYP)
  756. @{[invm4 $T5,$T4,$T1]}
  757. sw $T5,4($KEYP)
  758. @{[invm4 $T5,$T4,$T2]}
  759. sw $T5,8($KEYP)
  760. @{[invm4 $T5,$T4,$T3]}
  761. sw $T5,12($KEYP)
  762. ___
  763. } else {
  764. $ret .= <<___;
  765. sw $T0,0($KEYP)
  766. sw $T1,4($KEYP)
  767. sw $T2,8($KEYP)
  768. sw $T3,12($KEYP)
  769. ___
  770. }
  771. $rnum++;
  772. }
  773. return $ret;
  774. }
  775. sub ke192dec {
  776. my $zbkb = shift;
  777. my $rnum = 0;
  778. my $ret = '';
  779. $ret .= <<___;
  780. lw $T0,0($UKEY)
  781. lw $T1,4($UKEY)
  782. lw $T2,8($UKEY)
  783. lw $T3,12($UKEY)
  784. lw $T4,16($UKEY)
  785. lw $T5,20($UKEY)
  786. sw $T0,0($KEYP)
  787. sw $T1,4($KEYP)
  788. sw $T2,8($KEYP)
  789. sw $T3,12($KEYP)
  790. # see the comment in ke128dec
  791. # T7 and T6 are temp variables
  792. @{[invm4 $T7,$T6,$T4]}
  793. sw $T7,16($KEYP)
  794. @{[invm4 $T7,$T6,$T5]}
  795. sw $T7,20($KEYP)
  796. ___
  797. while($rnum < 8) {
  798. $ret .= <<___;
  799. # see the comment in ke128enc
  800. li $T6,$rcon[$rnum]
  801. xor $T0,$T0,$T6
  802. ___
  803. # right rotate by 8
  804. if ($zbkb) {
  805. $ret .= <<___;
  806. @{[rori $T6,$T5,8]}
  807. ___
  808. } else {
  809. $ret .= <<___;
  810. srli $T6,$T5,8
  811. slli $T7,$T5,24
  812. or $T6,$T6,$T7
  813. ___
  814. }
  815. $ret .= <<___;
  816. @{[fwdsbox4 $T0,$T6]}
  817. xor $T1,$T1,$T0
  818. xor $T2,$T2,$T1
  819. xor $T3,$T3,$T2
  820. add $KEYP,$KEYP,24
  821. ___
  822. if ($rnum < 7) {
  823. $ret .= <<___;
  824. xor $T4,$T4,$T3
  825. xor $T5,$T5,$T4
  826. # see the comment in ke128dec
  827. # T7 and T6 are temp variables
  828. @{[invm4 $T7,$T6,$T0]}
  829. sw $T7,0($KEYP)
  830. @{[invm4 $T7,$T6,$T1]}
  831. sw $T7,4($KEYP)
  832. @{[invm4 $T7,$T6,$T2]}
  833. sw $T7,8($KEYP)
  834. @{[invm4 $T7,$T6,$T3]}
  835. sw $T7,12($KEYP)
  836. @{[invm4 $T7,$T6,$T4]}
  837. sw $T7,16($KEYP)
  838. @{[invm4 $T7,$T6,$T5]}
  839. sw $T7,20($KEYP)
  840. ___
  841. } else { # rnum == 7
  842. $ret .= <<___;
  843. # the reason for dropping T4/T5 is in ke192enc
  844. # the reason for not invm4 is in ke128dec
  845. sw $T0,0($KEYP)
  846. sw $T1,4($KEYP)
  847. sw $T2,8($KEYP)
  848. sw $T3,12($KEYP)
  849. ___
  850. }
  851. $rnum++;
  852. }
  853. return $ret;
  854. }
  855. sub ke256dec {
  856. my $zbkb = shift;
  857. my $rnum = 0;
  858. my $ret = '';
  859. $ret .= <<___;
  860. lw $T0,0($UKEY)
  861. lw $T1,4($UKEY)
  862. lw $T2,8($UKEY)
  863. lw $T3,12($UKEY)
  864. lw $T4,16($UKEY)
  865. lw $T5,20($UKEY)
  866. lw $T6,24($UKEY)
  867. lw $T7,28($UKEY)
  868. sw $T0,0($KEYP)
  869. sw $T1,4($KEYP)
  870. sw $T2,8($KEYP)
  871. sw $T3,12($KEYP)
  872. # see the comment in ke128dec
  873. # BITS and T8 are temp variables
  874. # BITS are not used anymore
  875. @{[invm4 $T8,$BITS,$T4]}
  876. sw $T8,16($KEYP)
  877. @{[invm4 $T8,$BITS,$T5]}
  878. sw $T8,20($KEYP)
  879. @{[invm4 $T8,$BITS,$T6]}
  880. sw $T8,24($KEYP)
  881. @{[invm4 $T8,$BITS,$T7]}
  882. sw $T8,28($KEYP)
  883. ___
  884. while($rnum < 7) {
  885. $ret .= <<___;
  886. # see the comment in ke128enc
  887. li $T8,$rcon[$rnum]
  888. xor $T0,$T0,$T8
  889. ___
  890. # right rotate by 8
  891. if ($zbkb) {
  892. $ret .= <<___;
  893. @{[rori $T8,$T7,8]}
  894. ___
  895. } else {
  896. $ret .= <<___;
  897. srli $T8,$T7,8
  898. slli $BITS,$T7,24
  899. or $T8,$T8,$BITS
  900. ___
  901. }
  902. $ret .= <<___;
  903. @{[fwdsbox4 $T0,$T8]}
  904. xor $T1,$T1,$T0
  905. xor $T2,$T2,$T1
  906. xor $T3,$T3,$T2
  907. add $KEYP,$KEYP,32
  908. ___
  909. if ($rnum < 6) {
  910. $ret .= <<___;
  911. # for aes256, T3->T4 needs 4sbox but no rotate/rcon
  912. @{[fwdsbox4 $T4,$T3]}
  913. xor $T5,$T5,$T4
  914. xor $T6,$T6,$T5
  915. xor $T7,$T7,$T6
  916. # see the comment in ke128dec
  917. # T8 and BITS are temp variables
  918. @{[invm4 $T8,$BITS,$T0]}
  919. sw $T8,0($KEYP)
  920. @{[invm4 $T8,$BITS,$T1]}
  921. sw $T8,4($KEYP)
  922. @{[invm4 $T8,$BITS,$T2]}
  923. sw $T8,8($KEYP)
  924. @{[invm4 $T8,$BITS,$T3]}
  925. sw $T8,12($KEYP)
  926. @{[invm4 $T8,$BITS,$T4]}
  927. sw $T8,16($KEYP)
  928. @{[invm4 $T8,$BITS,$T5]}
  929. sw $T8,20($KEYP)
  930. @{[invm4 $T8,$BITS,$T6]}
  931. sw $T8,24($KEYP)
  932. @{[invm4 $T8,$BITS,$T7]}
  933. sw $T8,28($KEYP)
  934. ___
  935. } else {
  936. $ret .= <<___;
  937. sw $T0,0($KEYP)
  938. sw $T1,4($KEYP)
  939. sw $T2,8($KEYP)
  940. sw $T3,12($KEYP)
  941. # last 16 bytes are dropped
  942. # see the comment in ke256enc
  943. ___
  944. }
  945. $rnum++;
  946. }
  947. return $ret;
  948. }
  949. ################################################################################
  950. # void rv32i_zknd_zkne_set_decrypt_key(const unsigned char *userKey, const int bits,
  951. # AES_KEY *key)
  952. ################################################################################
  953. # a note on naming: set_decrypt_key needs aes32esi thus add zkne on name
  954. $code .= <<___;
  955. .text
  956. .balign 16
  957. .globl rv32i_zknd_zkne_set_decrypt_key
  958. .type rv32i_zknd_zkne_set_decrypt_key,\@function
  959. rv32i_zknd_zkne_set_decrypt_key:
  960. ___
  961. $code .= save_regs();
  962. $code .= AES_set_common(ke128dec(0), ke192dec(0),ke256dec(0));
  963. $code .= load_regs();
  964. $code .= <<___;
  965. ret
  966. ___
  967. ################################################################################
  968. # void rv32i_zbkb_zknd_zkne_set_decrypt_key(const unsigned char *userKey,
  969. # const int bits, AES_KEY *key)
  970. ################################################################################
  971. $code .= <<___;
  972. .text
  973. .balign 16
  974. .globl rv32i_zbkb_zknd_zkne_set_decrypt_key
  975. .type rv32i_zbkb_zknd_zkne_set_decrypt_key,\@function
  976. rv32i_zbkb_zknd_zkne_set_decrypt_key:
  977. ___
  978. $code .= save_regs();
  979. $code .= AES_set_common(ke128dec(1), ke192dec(1),ke256dec(1));
  980. $code .= load_regs();
  981. $code .= <<___;
  982. ret
  983. ___
  984. print $code;
  985. close STDOUT or die "error closing STDOUT: $!";