Browse Source

linuxkm: override-disable SIMD instructions for all .c.o's, with exceptions enumerated in Kbuild (currently only aes.c), and couple -msse with -fno-builtin-functions; export ENABLED_ASM for use as a pivot in Kbuild; use asm/i387.h, not asm/simd.h, for kernel_fpu_{begin,end}() protos.

Daniel Pouzzner 3 years ago
parent
commit
cd14cfb092
5 changed files with 57 additions and 14 deletions
  1. 1 1
      Makefile.am
  2. 1 0
      configure.ac
  3. 32 5
      linuxkm/Kbuild
  4. 16 7
      wolfcrypt/src/aes.c
  5. 7 1
      wolfssl/wolfcrypt/wc_port.h

+ 1 - 1
Makefile.am

@@ -198,7 +198,7 @@ endif
 include scripts/include.am
 
 if BUILD_LINUXKM
-export KERNEL_ROOT KERNEL_ARCH KERNEL_OPT AM_CFLAGS CFLAGS AM_CCASFLAGS CCASFLAGS src_libwolfssl_la_OBJECTS ENABLED_CRYPT_TESTS
+export KERNEL_ROOT KERNEL_ARCH KERNEL_OPT AM_CFLAGS CFLAGS AM_CCASFLAGS CCASFLAGS src_libwolfssl_la_OBJECTS ENABLED_CRYPT_TESTS ENABLED_ASM
 SUBDIRS = linuxkm
 endif
 

+ 1 - 0
configure.ac

@@ -279,6 +279,7 @@ if test "$ENABLED_ASM" = "no"
 then
     AM_CFLAGS="$AM_CFLAGS -DTFM_NO_ASM -DWOLFSSL_NO_ASM"
 fi
+AC_SUBST([ENABLED_ASM])
 
 
 # SINGLE THREADED

+ 32 - 5
linuxkm/Kbuild

@@ -10,6 +10,8 @@ ifeq "$(WOLFSSL_CFLAGS)" ""
 $(error $$WOLFSSL_CFLAGS is unset.)
 endif
 
+WOLFSSL_CFLAGS += -Wframe-larger-than=$(MAX_STACK_FRAME_SIZE) -mpreferred-stack-boundary=4
+
 obj-m := libwolfssl.o
 
 WOLFSSL_OBJ_TARGETS=$(patsubst %, $(obj)/%, $(WOLFSSL_OBJ_FILES))
@@ -30,17 +32,42 @@ MAX_STACK_FRAME_SIZE=$(shell echo $$(( $(KERNEL_THREAD_STACK_SIZE) / 4)))
 
 libwolfssl-y := $(WOLFSSL_OBJ_FILES) linuxkm/module_hooks.o linuxkm/module_exports.o
 
-ccflags-y = $(WOLFSSL_CFLAGS) -Wframe-larger-than=$(MAX_STACK_FRAME_SIZE) -mpreferred-stack-boundary=4
+ifeq "$(KERNEL_ARCH)" "x86"
+    WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::= -mno-sse
+    ifeq "$(ENABLED_ASM)" "yes"
+        # x86 kernel disables fp and vector insns and register usage with
+        # "-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-80387 -mno-fp-ret-in-387".
+        # reenable minimum subset of vector ops needed for compilation,
+	# while explicitly disabling auto-vectorization, and leave fp disabled.
+        # note that including -mavx here is known to introduce unaccommodated
+        # simd register ops, e.g. in integer.c:mp_exch() .
+        WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= -msse -mmmx -fno-builtin -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
+    else
+        WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
+    endif
+else ifeq "$(KERNEL_ARCH)" "arm64"
+    WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::=
+    WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
+#        WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= $AM_CFLAGS -mno-general-regs-only -mno-fpu -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
+else ifeq "$(KERNEL_ARCH)" "arm"
+    WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::=
+    WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
+#        WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= $AM_CFLAGS -mno-general-regs-only -mno-fpu -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
+endif
+
+ccflags-y = $(WOLFSSL_CFLAGS) $(WOLFSSL_CFLAGS_NO_VECTOR_INSNS)
+
+$(obj)/libwolfssl.mod.o: ccflags-y :=
+$(obj)/wolfcrypt/test/test.o: ccflags-y += -DNO_MAIN_DRIVER
 
-%/libwolfssl.mod.o: ccflags-y :=
-%/test.o: ccflags-y += -DNO_MAIN_DRIVER
+$(obj)/wolfcrypt/src/aes.o: ccflags-y = $(WOLFSSL_CFLAGS) $(WOLFSSL_CFLAGS_YES_VECTOR_INSNS)
 
 asflags-y := $(WOLFSSL_ASFLAGS)
 
 # these two _asms are kernel-compatible (they don't reference the pic-related _GLOBAL_OFFSET_TABLE_)
 # but they still irritate objtool: "unannotated intra-function call" and "BP used as a scratch register"
-%/aes_asm.o: OBJECT_FILES_NON_STANDARD := y
-%/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y
+$(obj)/wolfcrypt/src/aes_asm.o: OBJECT_FILES_NON_STANDARD := y
+$(obj)/wolfcrypt/src/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y
 
 # auto-generate the exported symbol list, leveraging the WOLFSSL_API visibility tags.
 # exclude symbols that don't match wc_* or wolf*.

+ 16 - 7
wolfcrypt/src/aes.c

@@ -777,6 +777,8 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
             nr = temp_key.rounds;
             aes->rounds = nr;
 
+            SAVE_VECTOR_REGISTERS();
+
             Key_Schedule[nr] = Temp_Key_Schedule[0];
             Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
             Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
@@ -800,6 +802,8 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
 
             Key_Schedule[0] = Temp_Key_Schedule[nr];
 
+            RESTORE_VECTOR_REGISTERS();
+
             return 0;
         }
     #endif /* HAVE_AES_DECRYPT */
@@ -1696,12 +1700,12 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
 
             tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
 
-            XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
             SAVE_VECTOR_REGISTERS();
+            XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
             AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE,
                     (byte*)aes->key, aes->rounds);
-            RESTORE_VECTOR_REGISTERS();
             XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE);
+            RESTORE_VECTOR_REGISTERS();
             XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
             return;
         #else
@@ -1995,9 +1999,9 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
         #endif
 
         /* if input and output same will overwrite input iv */
+        SAVE_VECTOR_REGISTERS();
         if ((const byte*)aes->tmp != inBlock)
             XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
-        SAVE_VECTOR_REGISTERS();
         AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
                         aes->rounds);
         RESTORE_VECTOR_REGISTERS();
@@ -7268,15 +7272,17 @@ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
 #ifdef WOLFSSL_AESNI
     if (haveAESNI && aes->use_aesni) {
         while (inSz >= AES_BLOCK_SIZE * 4) {
+            SAVE_VECTOR_REGISTERS();
+
             AesCcmCtrIncSet4(B, lenSz);
 
-            SAVE_VECTOR_REGISTERS();
             AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
                             aes->rounds);
-            RESTORE_VECTOR_REGISTERS();
             xorbuf(A, in, AES_BLOCK_SIZE * 4);
             XMEMCPY(out, A, AES_BLOCK_SIZE * 4);
 
+            RESTORE_VECTOR_REGISTERS();
+
             inSz -= AES_BLOCK_SIZE * 4;
             in += AES_BLOCK_SIZE * 4;
             out += AES_BLOCK_SIZE * 4;
@@ -7352,15 +7358,18 @@ int  wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
 #ifdef WOLFSSL_AESNI
     if (haveAESNI && aes->use_aesni) {
         while (oSz >= AES_BLOCK_SIZE * 4) {
+            SAVE_VECTOR_REGISTERS();
+
             AesCcmCtrIncSet4(B, lenSz);
 
-            SAVE_VECTOR_REGISTERS();
             AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
                             aes->rounds);
-            RESTORE_VECTOR_REGISTERS();
+
             xorbuf(A, in, AES_BLOCK_SIZE * 4);
             XMEMCPY(o, A, AES_BLOCK_SIZE * 4);
 
+            RESTORE_VECTOR_REGISTERS();
+
             oSz -= AES_BLOCK_SIZE * 4;
             in += AES_BLOCK_SIZE * 4;
             o += AES_BLOCK_SIZE * 4;

+ 7 - 1
wolfssl/wolfcrypt/wc_port.h

@@ -88,7 +88,13 @@
     #endif
     #include <linux/net.h>
     #include <linux/slab.h>
-    #include <asm/simd.h>
+    #if defined(WOLFSSL_AESNI) || defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
+        #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+            #include <asm/i387.h>
+        #else
+            #include <asm/simd.h>
+        #endif
+    #endif
     _Pragma("GCC diagnostic pop");
 
     /* remove this multifariously conflicting macro, picked up from