Browse Source

Optimise some common instructions (lea nop; cmp x, 0; test x, x; xor x, x)

Fabian 3 years ago
parent
commit
530aaba1ea

+ 46 - 13
src/rust/jit_instructions.rs

@@ -29,6 +29,18 @@ impl<'a> LocalOrImmediate<'a> {
             LocalOrImmediate::Immediate(i) => builder.const_i32(*i),
         }
     }
+    pub fn eq_local(&self, other_local: &WasmLocal) -> bool {
+        match self {
+            &LocalOrImmediate::WasmLocal(local) => local == other_local,
+            LocalOrImmediate::Immediate(_) => false,
+        }
+    }
+    pub fn is_zero(&self) -> bool {
+        match self {
+            LocalOrImmediate::Immediate(0) => true,
+            _ => false,
+        }
+    }
 }
 
 pub fn jit_instruction(ctx: &mut JitContext, instr_flags: &mut u32) {
@@ -882,9 +894,14 @@ fn gen_cmp(
     size: i32,
 ) {
     builder.const_i32(global_pointers::last_result as i32);
-    builder.get_local(&dest_operand);
-    source_operand.gen_get(builder);
-    builder.sub_i32();
+    if source_operand.is_zero() {
+        builder.get_local(&dest_operand);
+    }
+    else {
+        builder.get_local(&dest_operand);
+        source_operand.gen_get(builder);
+        builder.sub_i32();
+    }
     if size == OPSIZE_8 || size == OPSIZE_16 {
         builder.const_i32(if size == OPSIZE_8 { 0xFF } else { 0xFFFF });
         builder.and_i32();
@@ -959,9 +976,14 @@ fn gen_test(
     size: i32,
 ) {
     builder.const_i32(global_pointers::last_result as i32);
-    builder.get_local(&dest_operand);
-    source_operand.gen_get(builder);
-    builder.and_i32();
+    if source_operand.eq_local(dest_operand) {
+        builder.get_local(&dest_operand);
+    }
+    else {
+        builder.get_local(&dest_operand);
+        source_operand.gen_get(builder);
+        builder.and_i32();
+    }
     builder.store_aligned_i32(0);
 
     codegen::gen_set_last_op_size(builder, size);
@@ -1005,10 +1027,19 @@ fn gen_xor32(
     dest_operand: &WasmLocal,
     source_operand: &LocalOrImmediate,
 ) {
-    builder.get_local(&dest_operand);
-    source_operand.gen_get(builder);
-    builder.xor_i32();
-    builder.set_local(dest_operand);
+    if source_operand.eq_local(dest_operand) {
+        builder.const_i32(0);
+        builder.set_local(dest_operand);
+    // TODO:
+    // - Set last_result to zero rather than reading from local
+    // - Skip setting opsize (not relevant for SF, ZF, and PF on zero)
+    }
+    else {
+        builder.get_local(&dest_operand);
+        source_operand.gen_get(builder);
+        builder.xor_i32();
+        builder.set_local(dest_operand);
+    }
 
     codegen::gen_set_last_result(builder, &dest_operand);
     codegen::gen_set_last_op_size(builder, OPSIZE_32);
@@ -2518,9 +2549,11 @@ pub fn instr16_8D_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, reg: u32)
     codegen::gen_set_reg16(ctx, reg);
 }
 pub fn instr32_8D_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, reg: u32) {
-    ctx.cpu.prefixes |= SEG_PREFIX_ZERO;
-    codegen::gen_modrm_resolve(ctx, modrm_byte);
-    codegen::gen_set_reg32(ctx, reg);
+    if !modrm_byte.is_nop(reg) {
+        ctx.cpu.prefixes |= SEG_PREFIX_ZERO;
+        codegen::gen_modrm_resolve(ctx, modrm_byte);
+        codegen::gen_set_reg32(ctx, reg);
+    }
 }
 
 pub fn instr16_8D_reg_jit(ctx: &mut JitContext, _r1: u32, _r2: u32) {

+ 9 - 0
src/rust/modrm.rs

@@ -16,6 +16,15 @@ pub struct ModrmByte {
     immediate: i32,
     is_16: bool,
 }
+impl ModrmByte {
+    pub fn is_nop(&self, reg: u32) -> bool {
+        self.first_reg == Some(reg)
+            && self.second_reg.is_none()
+            && self.shift == 0
+            && self.immediate == 0
+            && !self.is_16
+    }
+}
 
 pub fn decode(ctx: &mut CpuContext, modrm_byte: u8) -> ModrmByte {
     if ctx.asize_32() { decode32(ctx, modrm_byte) } else { decode16(ctx, modrm_byte) }

+ 1 - 0
src/rust/wasmgen/wasm_builder.rs

@@ -73,6 +73,7 @@ pub struct WasmBuilder {
     pub arg_local_initial_state: WasmLocal,
 }
 
+#[derive(Eq, PartialEq)]
 pub struct WasmLocal(u8);
 impl WasmLocal {
     pub fn idx(&self) -> u8 { self.0 }

+ 17 - 0
tests/nasm/arith-optimisations.asm

@@ -0,0 +1,17 @@
+global _start
+
+%include "header.inc"
+
+    xor eax, eax
+    pushf
+    and dword [esp], 8ffh
+
+    test ebx, ebx
+    pushf
+    and dword [esp], 8ffh
+
+    cmp ecx, 0
+    pushf
+    and dword [esp], 8ffh
+
+%include "footer.inc"

+ 17 - 0
tests/nasm/lea-nop.asm

@@ -0,0 +1,17 @@
+global _start
+
+%include "header.inc"
+
+    ; nops
+    lea edx, [edx]
+    db 8Dh, 40h, 00h
+    db 8Dh, 0B6h, 00h, 00h, 00h, 00h
+    db 8Dh, 0BCh, 27h, 00h, 00h, 00h, 00h
+
+    ; non-nops, but similar encodings
+    lea eax, [bx+si]
+    lea cx, [bx+di]
+    lea edx, [edx+42]
+    lea ebp, [ebp*2]
+
+%include "footer.inc"