2
0

ghash-riscv64.pl 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. #! /usr/bin/env perl
  2. # This file is dual-licensed, meaning that you can use it under your
  3. # choice of either of the following two licenses:
  4. #
  5. # Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved.
  6. #
  7. # Licensed under the Apache License 2.0 (the "License"). You can obtain
  8. # a copy in the file LICENSE in the source distribution or at
  9. # https://www.openssl.org/source/license.html
  10. #
  11. # or
  12. #
  13. # Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
  14. # All rights reserved.
  15. #
  16. # Redistribution and use in source and binary forms, with or without
  17. # modification, are permitted provided that the following conditions
  18. # are met:
  19. # 1. Redistributions of source code must retain the above copyright
  20. # notice, this list of conditions and the following disclaimer.
  21. # 2. Redistributions in binary form must reproduce the above copyright
  22. # notice, this list of conditions and the following disclaimer in the
  23. # documentation and/or other materials provided with the distribution.
  24. #
  25. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36. use strict;
  37. use warnings;
  38. use FindBin qw($Bin);
  39. use lib "$Bin";
  40. use lib "$Bin/../../perlasm";
  41. use riscv;
  42. # $output is the last argument if it looks like a file (it has an extension)
  43. # $flavour is the first argument if it doesn't look like a file
  44. my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
  45. my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
  46. $output and open STDOUT,">$output";
  47. my $code=<<___;
  48. .text
  49. ___
  50. ################################################################################
  51. # void gcm_init_rv64i_zbc(u128 Htable[16], const u64 H[2]);
  52. # void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 H[2]);
  53. # void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 H[2]);
  54. #
  55. # input: H: 128-bit H - secret parameter E(K, 0^128)
  56. # output: Htable: Preprocessed key data for gcm_gmult_rv64i_zbc* and
  57. # gcm_ghash_rv64i_zbc*
  58. #
  59. # All callers of this function revert the byte-order unconditionally
  60. # on little-endian machines. So we need to revert the byte-order back.
  61. # Additionally we reverse the bits of each byte.
  62. {
  63. my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2");
  64. $code .= <<___;
  65. .p2align 3
  66. .globl gcm_init_rv64i_zbc
  67. .type gcm_init_rv64i_zbc,\@function
  68. gcm_init_rv64i_zbc:
  69. ld $VAL0,0($H)
  70. ld $VAL1,8($H)
  71. @{[brev8_rv64i $VAL0, $TMP0, $TMP1, $TMP2]}
  72. @{[brev8_rv64i $VAL1, $TMP0, $TMP1, $TMP2]}
  73. @{[sd_rev8_rv64i $VAL0, $Htable, 0, $TMP0]}
  74. @{[sd_rev8_rv64i $VAL1, $Htable, 8, $TMP0]}
  75. ret
  76. .size gcm_init_rv64i_zbc,.-gcm_init_rv64i_zbc
  77. ___
  78. }
  79. {
  80. my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2");
  81. $code .= <<___;
  82. .p2align 3
  83. .globl gcm_init_rv64i_zbc__zbb
  84. .type gcm_init_rv64i_zbc__zbb,\@function
  85. gcm_init_rv64i_zbc__zbb:
  86. ld $VAL0,0($H)
  87. ld $VAL1,8($H)
  88. @{[brev8_rv64i $VAL0, $TMP0, $TMP1, $TMP2]}
  89. @{[brev8_rv64i $VAL1, $TMP0, $TMP1, $TMP2]}
  90. @{[rev8 $VAL0, $VAL0]}
  91. @{[rev8 $VAL1, $VAL1]}
  92. sd $VAL0,0($Htable)
  93. sd $VAL1,8($Htable)
  94. ret
  95. .size gcm_init_rv64i_zbc__zbb,.-gcm_init_rv64i_zbc__zbb
  96. ___
  97. }
  98. {
  99. my ($Htable,$H,$TMP0,$TMP1) = ("a0","a1","t0","t1");
  100. $code .= <<___;
  101. .p2align 3
  102. .globl gcm_init_rv64i_zbc__zbkb
  103. .type gcm_init_rv64i_zbc__zbkb,\@function
  104. gcm_init_rv64i_zbc__zbkb:
  105. ld $TMP0,0($H)
  106. ld $TMP1,8($H)
  107. @{[brev8 $TMP0, $TMP0]}
  108. @{[brev8 $TMP1, $TMP1]}
  109. @{[rev8 $TMP0, $TMP0]}
  110. @{[rev8 $TMP1, $TMP1]}
  111. sd $TMP0,0($Htable)
  112. sd $TMP1,8($Htable)
  113. ret
  114. .size gcm_init_rv64i_zbc__zbkb,.-gcm_init_rv64i_zbc__zbkb
  115. ___
  116. }
  117. ################################################################################
  118. # void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]);
  119. # void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]);
  120. #
  121. # input: Xi: current hash value
  122. # Htable: copy of H
  123. # output: Xi: next hash value Xi
  124. #
  125. # Compute GMULT (Xi*H mod f) using the Zbc (clmul) and Zbb (basic bit manip)
  126. # extensions. Using the no-Karatsuba approach and clmul for the final reduction.
  127. # This results in an implementation with minimized number of instructions.
  128. # HW with clmul latencies higher than 2 cycles might observe a performance
  129. # improvement with Karatsuba. HW with clmul latencies higher than 6 cycles
  130. # might observe a performance improvement with additionally converting the
  131. # reduction to shift&xor. For a full discussion of this estimates see
  132. # https://github.com/riscv/riscv-crypto/blob/master/doc/supp/gcm-mode-cmul.adoc
  133. {
  134. my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7");
  135. my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
  136. $code .= <<___;
  137. .p2align 3
  138. .globl gcm_gmult_rv64i_zbc
  139. .type gcm_gmult_rv64i_zbc,\@function
  140. gcm_gmult_rv64i_zbc:
  141. # Load Xi and bit-reverse it
  142. ld $x0, 0($Xi)
  143. ld $x1, 8($Xi)
  144. @{[brev8_rv64i $x0, $z0, $z1, $z2]}
  145. @{[brev8_rv64i $x1, $z0, $z1, $z2]}
  146. # Load the key (already bit-reversed)
  147. ld $y0, 0($Htable)
  148. ld $y1, 8($Htable)
  149. # Load the reduction constant
  150. la $polymod, Lpolymod
  151. lbu $polymod, 0($polymod)
  152. # Multiplication (without Karatsuba)
  153. @{[clmulh $z3, $x1, $y1]}
  154. @{[clmul $z2, $x1, $y1]}
  155. @{[clmulh $t1, $x0, $y1]}
  156. @{[clmul $z1, $x0, $y1]}
  157. xor $z2, $z2, $t1
  158. @{[clmulh $t1, $x1, $y0]}
  159. @{[clmul $t0, $x1, $y0]}
  160. xor $z2, $z2, $t1
  161. xor $z1, $z1, $t0
  162. @{[clmulh $t1, $x0, $y0]}
  163. @{[clmul $z0, $x0, $y0]}
  164. xor $z1, $z1, $t1
  165. # Reduction with clmul
  166. @{[clmulh $t1, $z3, $polymod]}
  167. @{[clmul $t0, $z3, $polymod]}
  168. xor $z2, $z2, $t1
  169. xor $z1, $z1, $t0
  170. @{[clmulh $t1, $z2, $polymod]}
  171. @{[clmul $t0, $z2, $polymod]}
  172. xor $x1, $z1, $t1
  173. xor $x0, $z0, $t0
  174. # Bit-reverse Xi back and store it
  175. @{[brev8_rv64i $x0, $z0, $z1, $z2]}
  176. @{[brev8_rv64i $x1, $z0, $z1, $z2]}
  177. sd $x0, 0($Xi)
  178. sd $x1, 8($Xi)
  179. ret
  180. .size gcm_gmult_rv64i_zbc,.-gcm_gmult_rv64i_zbc
  181. ___
  182. }
  183. {
  184. my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7");
  185. my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
  186. $code .= <<___;
  187. .p2align 3
  188. .globl gcm_gmult_rv64i_zbc__zbkb
  189. .type gcm_gmult_rv64i_zbc__zbkb,\@function
  190. gcm_gmult_rv64i_zbc__zbkb:
  191. # Load Xi and bit-reverse it
  192. ld $x0, 0($Xi)
  193. ld $x1, 8($Xi)
  194. @{[brev8 $x0, $x0]}
  195. @{[brev8 $x1, $x1]}
  196. # Load the key (already bit-reversed)
  197. ld $y0, 0($Htable)
  198. ld $y1, 8($Htable)
  199. # Load the reduction constant
  200. la $polymod, Lpolymod
  201. lbu $polymod, 0($polymod)
  202. # Multiplication (without Karatsuba)
  203. @{[clmulh $z3, $x1, $y1]}
  204. @{[clmul $z2, $x1, $y1]}
  205. @{[clmulh $t1, $x0, $y1]}
  206. @{[clmul $z1, $x0, $y1]}
  207. xor $z2, $z2, $t1
  208. @{[clmulh $t1, $x1, $y0]}
  209. @{[clmul $t0, $x1, $y0]}
  210. xor $z2, $z2, $t1
  211. xor $z1, $z1, $t0
  212. @{[clmulh $t1, $x0, $y0]}
  213. @{[clmul $z0, $x0, $y0]}
  214. xor $z1, $z1, $t1
  215. # Reduction with clmul
  216. @{[clmulh $t1, $z3, $polymod]}
  217. @{[clmul $t0, $z3, $polymod]}
  218. xor $z2, $z2, $t1
  219. xor $z1, $z1, $t0
  220. @{[clmulh $t1, $z2, $polymod]}
  221. @{[clmul $t0, $z2, $polymod]}
  222. xor $x1, $z1, $t1
  223. xor $x0, $z0, $t0
  224. # Bit-reverse Xi back and store it
  225. @{[brev8 $x0, $x0]}
  226. @{[brev8 $x1, $x1]}
  227. sd $x0, 0($Xi)
  228. sd $x1, 8($Xi)
  229. ret
  230. .size gcm_gmult_rv64i_zbc__zbkb,.-gcm_gmult_rv64i_zbc__zbkb
  231. ___
  232. }
  233. ################################################################################
  234. # void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16],
  235. # const u8 *inp, size_t len);
  236. # void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16],
  237. # const u8 *inp, size_t len);
  238. #
  239. # input: Xi: current hash value
  240. # Htable: copy of H
  241. # inp: pointer to input data
  242. # len: length of input data in bytes (multiple of block size)
  243. # output: Xi: Xi+1 (next hash value Xi)
  244. {
  245. my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7");
  246. my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
  247. $code .= <<___;
  248. .p2align 3
  249. .globl gcm_ghash_rv64i_zbc
  250. .type gcm_ghash_rv64i_zbc,\@function
  251. gcm_ghash_rv64i_zbc:
  252. # Load Xi and bit-reverse it
  253. ld $x0, 0($Xi)
  254. ld $x1, 8($Xi)
  255. @{[brev8_rv64i $x0, $z0, $z1, $z2]}
  256. @{[brev8_rv64i $x1, $z0, $z1, $z2]}
  257. # Load the key (already bit-reversed)
  258. ld $y0, 0($Htable)
  259. ld $y1, 8($Htable)
  260. # Load the reduction constant
  261. la $polymod, Lpolymod
  262. lbu $polymod, 0($polymod)
  263. Lstep:
  264. # Load the input data, bit-reverse them, and XOR them with Xi
  265. ld $t0, 0($inp)
  266. ld $t1, 8($inp)
  267. add $inp, $inp, 16
  268. add $len, $len, -16
  269. @{[brev8_rv64i $t0, $z0, $z1, $z2]}
  270. @{[brev8_rv64i $t1, $z0, $z1, $z2]}
  271. xor $x0, $x0, $t0
  272. xor $x1, $x1, $t1
  273. # Multiplication (without Karatsuba)
  274. @{[clmulh $z3, $x1, $y1]}
  275. @{[clmul $z2, $x1, $y1]}
  276. @{[clmulh $t1, $x0, $y1]}
  277. @{[clmul $z1, $x0, $y1]}
  278. xor $z2, $z2, $t1
  279. @{[clmulh $t1, $x1, $y0]}
  280. @{[clmul $t0, $x1, $y0]}
  281. xor $z2, $z2, $t1
  282. xor $z1, $z1, $t0
  283. @{[clmulh $t1, $x0, $y0]}
  284. @{[clmul $z0, $x0, $y0]}
  285. xor $z1, $z1, $t1
  286. # Reduction with clmul
  287. @{[clmulh $t1, $z3, $polymod]}
  288. @{[clmul $t0, $z3, $polymod]}
  289. xor $z2, $z2, $t1
  290. xor $z1, $z1, $t0
  291. @{[clmulh $t1, $z2, $polymod]}
  292. @{[clmul $t0, $z2, $polymod]}
  293. xor $x1, $z1, $t1
  294. xor $x0, $z0, $t0
  295. # Iterate over all blocks
  296. bnez $len, Lstep
  297. # Bit-reverse final Xi back and store it
  298. @{[brev8_rv64i $x0, $z0, $z1, $z2]}
  299. @{[brev8_rv64i $x1, $z0, $z1, $z2]}
  300. sd $x0, 0($Xi)
  301. sd $x1, 8($Xi)
  302. ret
  303. .size gcm_ghash_rv64i_zbc,.-gcm_ghash_rv64i_zbc
  304. ___
  305. }
  306. {
  307. my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7");
  308. my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6");
  309. $code .= <<___;
  310. .p2align 3
  311. .globl gcm_ghash_rv64i_zbc__zbkb
  312. .type gcm_ghash_rv64i_zbc__zbkb,\@function
  313. gcm_ghash_rv64i_zbc__zbkb:
  314. # Load Xi and bit-reverse it
  315. ld $x0, 0($Xi)
  316. ld $x1, 8($Xi)
  317. @{[brev8 $x0, $x0]}
  318. @{[brev8 $x1, $x1]}
  319. # Load the key (already bit-reversed)
  320. ld $y0, 0($Htable)
  321. ld $y1, 8($Htable)
  322. # Load the reduction constant
  323. la $polymod, Lpolymod
  324. lbu $polymod, 0($polymod)
  325. Lstep_zkbk:
  326. # Load the input data, bit-reverse them, and XOR them with Xi
  327. ld $t0, 0($inp)
  328. ld $t1, 8($inp)
  329. add $inp, $inp, 16
  330. add $len, $len, -16
  331. @{[brev8 $t0, $t0]}
  332. @{[brev8 $t1, $t1]}
  333. xor $x0, $x0, $t0
  334. xor $x1, $x1, $t1
  335. # Multiplication (without Karatsuba)
  336. @{[clmulh $z3, $x1, $y1]}
  337. @{[clmul $z2, $x1, $y1]}
  338. @{[clmulh $t1, $x0, $y1]}
  339. @{[clmul $z1, $x0, $y1]}
  340. xor $z2, $z2, $t1
  341. @{[clmulh $t1, $x1, $y0]}
  342. @{[clmul $t0, $x1, $y0]}
  343. xor $z2, $z2, $t1
  344. xor $z1, $z1, $t0
  345. @{[clmulh $t1, $x0, $y0]}
  346. @{[clmul $z0, $x0, $y0]}
  347. xor $z1, $z1, $t1
  348. # Reduction with clmul
  349. @{[clmulh $t1, $z3, $polymod]}
  350. @{[clmul $t0, $z3, $polymod]}
  351. xor $z2, $z2, $t1
  352. xor $z1, $z1, $t0
  353. @{[clmulh $t1, $z2, $polymod]}
  354. @{[clmul $t0, $z2, $polymod]}
  355. xor $x1, $z1, $t1
  356. xor $x0, $z0, $t0
  357. # Iterate over all blocks
  358. bnez $len, Lstep_zkbk
  359. # Bit-reverse final Xi back and store it
  360. @{[brev8 $x0, $x0]}
  361. @{[brev8 $x1, $x1]}
  362. sd $x0, 0($Xi)
  363. sd $x1, 8($Xi)
  364. ret
  365. .size gcm_ghash_rv64i_zbc__zbkb,.-gcm_ghash_rv64i_zbc__zbkb
  366. ___
  367. }
  368. $code .= <<___;
  369. .p2align 3
  370. Lbrev8_const:
  371. .dword 0xAAAAAAAAAAAAAAAA
  372. .dword 0xCCCCCCCCCCCCCCCC
  373. .dword 0xF0F0F0F0F0F0F0F0
  374. .size Lbrev8_const,.-Lbrev8_const
  375. Lpolymod:
  376. .byte 0x87
  377. .size Lpolymod,.-Lpolymod
  378. ___
  379. print $code;
  380. close STDOUT or die "error closing STDOUT: $!";