Browse Source

MIPS32R3 provides the EXT instruction to extract bits from
registers. As the AES table is already 1K aligned we can
use it everywhere and speedup table address calculation by
10%. Performance numbers:

decryption 16B 64B 256B 1024B 8192B
-------------------------------------------------------------------
aes-256-cbc 5636.84k 6443.26k 6689.02k 6752.94k 6766.59k bef.
aes-256-cbc 6200.31k 7195.71k 7504.30k 7585.11k 7599.45k aft.
-------------------------------------------------------------------
aes-128-cbc 7313.85k 8653.67k 9079.55k 9188.35k 9205.08k bef.
aes-128-cbc 7925.38k 9557.99k 10092.37k 10232.15k 10272.77k aft.

encryption 16B 64B 256B 1024B 8192B
-------------------------------------------------------------------
aes-256 cbc 6009.65k 6592.70k 6766.59k 6806.87k 6815.74k bef.
aes-256 cbc 6643.93k 7388.69k 7605.33k 7657.81k 7675.90k aft.
-------------------------------------------------------------------
aes-128 cbc 7862.09k 8892.48k 9214.04k 9291.78k 9311.57k bef.
aes-128 cbc 8639.29k 9881.17k 10265.86k 10363.56k 10392.92k aft.

Reviewed-by: Paul Dale <paul.dale@oracle.com>
Reviewed-by: Richard Levitte <levitte@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/8206)

Markus Stockhausen 5 years ago
parent
commit
4592172376
1 changed files with 85 additions and 49 deletions
  1. 85 49
      crypto/aes/asm/aes-mips.pl

+ 85 - 49
crypto/aes/asm/aes-mips.pl

@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2010-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the Apache License 2.0 (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -34,6 +34,11 @@
 # instead, code path is chosen upon pre-process time, pass -mips32r2
 # or/and -msmartmips.
 
+# February 2019
+#
+# Normalize MIPS32R2 AES table address calculation by always using EXT
+# instruction. This reduces the standard codebase by another 10%. 
+
 ######################################################################
 # There is a number of MIPS ABI in use, O32 and N32/64 are most
 # widely used. Then there is a new contender: NUBI. It appears that if
@@ -223,6 +228,33 @@ _mips_AES_encrypt:
 	ext	$i0,$s1,16,8
 
 	_xtr	$i0,$s1,16-2
+#else
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+	move	$i0,$Tbl
+	move	$i1,$Tbl
+	move	$i2,$Tbl
+	move	$i3,$Tbl
+	ext	$t0,$s1,16,8
+.Loop_enc:
+	ext	$t1,$s2,16,8
+	ext	$t2,$s3,16,8
+	ext	$t3,$s0,16,8
+	$PTR_INS $i0,$t0,2,8
+	$PTR_INS $i1,$t1,2,8
+	$PTR_INS $i2,$t2,2,8
+	$PTR_INS $i3,$t3,2,8
+	lw	$t0,0($i0)		# Te1[s1>>16]
+	ext	$t4,$s2,8,8
+	lw	$t1,0($i1)		# Te1[s2>>16]
+	ext	$t5,$s3,8,8
+	lw	$t2,0($i2)		# Te1[s3>>16]
+	ext	$t6,$s0,8,8
+	lw	$t3,0($i3)		# Te1[s0>>16]
+	ext	$t7,$s1,8,8
+	$PTR_INS $i0,$t4,2,8
+	$PTR_INS $i1,$t5,2,8
+	$PTR_INS $i2,$t6,2,8
+	$PTR_INS $i3,$t7,2,8
 #else
 	_xtr	$i0,$s1,16-2
 .Loop_enc:
@@ -237,16 +269,6 @@ _mips_AES_encrypt:
 	$PTR_ADD $i1,$Tbl
 	$PTR_ADD $i2,$Tbl
 	$PTR_ADD $i3,$Tbl
-#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
-	lw	$t0,0($i0)		# Te1[s1>>16]
-	_xtr	$i0,$s2,8-2
-	lw	$t1,0($i1)		# Te1[s2>>16]
-	_xtr	$i1,$s3,8-2
-	lw	$t2,0($i2)		# Te1[s3>>16]
-	_xtr	$i2,$s0,8-2
-	lw	$t3,0($i3)		# Te1[s0>>16]
-	_xtr	$i3,$s1,8-2
-#else
 	lwl	$t0,3($i0)		# Te1[s1>>16]
 	lwl	$t1,3($i1)		# Te1[s2>>16]
 	lwl	$t2,3($i2)		# Te1[s3>>16]
@@ -259,7 +281,6 @@ _mips_AES_encrypt:
 	_xtr	$i2,$s0,8-2
 	lwr	$t3,2($i3)		# Te1[s0>>16]
 	_xtr	$i3,$s1,8-2
-#endif
 	and	$i0,0x3fc
 	and	$i1,0x3fc
 	and	$i2,0x3fc
@@ -268,6 +289,7 @@ _mips_AES_encrypt:
 	$PTR_ADD $i1,$Tbl
 	$PTR_ADD $i2,$Tbl
 	$PTR_ADD $i3,$Tbl
+#endif
 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
 	rotr	$t0,$t0,8
 	rotr	$t1,$t1,8
@@ -275,22 +297,18 @@ _mips_AES_encrypt:
 	rotr	$t3,$t3,8
 # if defined(_MIPSEL)
 	lw	$t4,0($i0)		# Te2[s2>>8]
-	_xtr	$i0,$s3,0-2
+	ext	$t8,$s3,0,8
 	lw	$t5,0($i1)		# Te2[s3>>8]
-	_xtr	$i1,$s0,0-2
+	ext	$t9,$s0,0,8
 	lw	$t6,0($i2)		# Te2[s0>>8]
-	_xtr	$i2,$s1,0-2
+	ext	$t10,$s1,0,8
 	lw	$t7,0($i3)		# Te2[s1>>8]
-	_xtr	$i3,$s2,0-2
+	ext	$t11,$s2,0,8
+	$PTR_INS $i0,$t8,2,8
+	$PTR_INS $i1,$t9,2,8
+	$PTR_INS $i2,$t10,2,8
+	$PTR_INS $i3,$t11,2,8
 
-	and	$i0,0x3fc
-	and	$i1,0x3fc
-	and	$i2,0x3fc
-	and	$i3,0x3fc
-	$PTR_ADD $i0,$Tbl
-	$PTR_ADD $i1,$Tbl
-	$PTR_ADD $i2,$Tbl
-	$PTR_ADD $i3,$Tbl
 	lw	$t8,0($i0)		# Te3[s3]
 	$PTR_INS $i0,$s0,2,8
 	lw	$t9,0($i1)		# Te3[s0]
@@ -411,6 +429,9 @@ _mips_AES_encrypt:
 	xor	$s3,$t3
 	.set	noreorder
 	bnez	$cnt,.Loop_enc
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+	ext	$t0,$s1,16,8
+#endif
 	_xtr	$i0,$s1,16-2
 #endif
 
@@ -811,6 +832,33 @@ _mips_AES_decrypt:
 	ext	$i0,$s3,16,8
 
 	_xtr	$i0,$s3,16-2
+#else
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+	move	$i0,$Tbl
+	move	$i1,$Tbl
+	move	$i2,$Tbl
+	move	$i3,$Tbl
+	ext	$t0,$s3,16,8
+.Loop_dec:
+	ext	$t1,$s0,16,8
+	ext	$t2,$s1,16,8
+	ext	$t3,$s2,16,8
+	$PTR_INS $i0,$t0,2,8
+	$PTR_INS $i1,$t1,2,8
+	$PTR_INS $i2,$t2,2,8
+	$PTR_INS $i3,$t3,2,8
+	lw	$t0,0($i0)		# Td1[s3>>16]
+	ext	$t4,$s2,8,8
+	lw	$t1,0($i1)		# Td1[s0>>16]
+	ext	$t5,$s3,8,8
+	lw	$t2,0($i2)		# Td1[s1>>16]
+	ext	$t6,$s0,8,8
+	lw	$t3,0($i3)		# Td1[s2>>16]
+	ext	$t7,$s1,8,8
+	$PTR_INS $i0,$t4,2,8
+	$PTR_INS $i1,$t5,2,8
+	$PTR_INS $i2,$t6,2,8
+	$PTR_INS $i3,$t7,2,8
 #else
 	_xtr	$i0,$s3,16-2
 .Loop_dec:
@@ -825,16 +873,6 @@ _mips_AES_decrypt:
 	$PTR_ADD $i1,$Tbl
 	$PTR_ADD $i2,$Tbl
 	$PTR_ADD $i3,$Tbl
-#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
-	lw	$t0,0($i0)		# Td1[s3>>16]
-	_xtr	$i0,$s2,8-2
-	lw	$t1,0($i1)		# Td1[s0>>16]
-	_xtr	$i1,$s3,8-2
-	lw	$t2,0($i2)		# Td1[s1>>16]
-	_xtr	$i2,$s0,8-2
-	lw	$t3,0($i3)		# Td1[s2>>16]
-	_xtr	$i3,$s1,8-2
-#else
 	lwl	$t0,3($i0)		# Td1[s3>>16]
 	lwl	$t1,3($i1)		# Td1[s0>>16]
 	lwl	$t2,3($i2)		# Td1[s1>>16]
@@ -847,8 +885,6 @@ _mips_AES_decrypt:
 	_xtr	$i2,$s0,8-2
 	lwr	$t3,2($i3)		# Td1[s2>>16]
 	_xtr	$i3,$s1,8-2
-#endif
-
 	and	$i0,0x3fc
 	and	$i1,0x3fc
 	and	$i2,0x3fc
@@ -857,6 +893,7 @@ _mips_AES_decrypt:
 	$PTR_ADD $i1,$Tbl
 	$PTR_ADD $i2,$Tbl
 	$PTR_ADD $i3,$Tbl
+#endif
 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
 	rotr	$t0,$t0,8
 	rotr	$t1,$t1,8
@@ -864,22 +901,17 @@ _mips_AES_decrypt:
 	rotr	$t3,$t3,8
 # if defined(_MIPSEL)
 	lw	$t4,0($i0)		# Td2[s2>>8]
-	_xtr	$i0,$s1,0-2
+	ext	$t8,$s1,0,8
 	lw	$t5,0($i1)		# Td2[s3>>8]
-	_xtr	$i1,$s2,0-2
+	ext	$t9,$s2,0,8
 	lw	$t6,0($i2)		# Td2[s0>>8]
-	_xtr	$i2,$s3,0-2
+	ext	$t10,$s3,0,8
 	lw	$t7,0($i3)		# Td2[s1>>8]
-	_xtr	$i3,$s0,0-2
-
-	and	$i0,0x3fc
-	and	$i1,0x3fc
-	and	$i2,0x3fc
-	and	$i3,0x3fc
-	$PTR_ADD $i0,$Tbl
-	$PTR_ADD $i1,$Tbl
-	$PTR_ADD $i2,$Tbl
-	$PTR_ADD $i3,$Tbl
+	ext	$t11,$s0,0,8
+	$PTR_INS $i0,$t8,2,8
+	$PTR_INS $i1,$t9,2,8
+	$PTR_INS $i2,$t10,2,8
+	$PTR_INS $i3,$t11,2,8
 	lw	$t8,0($i0)		# Td3[s1]
 	$PTR_INS $i0,$s0,2,8
 	lw	$t9,0($i1)		# Td3[s2]
@@ -1001,6 +1033,10 @@ _mips_AES_decrypt:
 	xor	$s3,$t3
 	.set	noreorder
 	bnez	$cnt,.Loop_dec
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+	ext	$t0,$s3,16,8
+#endif
+
 	_xtr	$i0,$s3,16-2
 #endif