Selaa lähdekoodia

Optimize push/pop JIT instructions to not check stack_size_32

We generate a version of the push/pop instruction with the stack_size_32 fixed,
since the state tends not to change much. If it does change, state_flags won't
match the output of pack_current_state_flags and the cache entry will therefore
be invalidated.
Amaan Cheval 6 vuotta sitten
vanhempi
commit
3512d34314

+ 16 - 16
gen/x86_table.js

@@ -49,23 +49,23 @@ const encodings = [
     { opcode: 0x4E, nonfaulting: 1, os: 1, },
     { opcode: 0x4F, nonfaulting: 1, os: 1, },
 
-    { opcode: 0x50, os: 1, },
-    { opcode: 0x51, os: 1, },
-    { opcode: 0x52, os: 1, },
-    { opcode: 0x53, os: 1, },
-    { opcode: 0x54, os: 1, },
-    { opcode: 0x55, os: 1, },
-    { opcode: 0x56, os: 1, },
-    { opcode: 0x57, os: 1, },
-
-    { opcode: 0x58, os: 1, },
-    { opcode: 0x59, os: 1, },
-    { opcode: 0x5A, os: 1, },
-    { opcode: 0x5B, os: 1, },
+    { opcode: 0x50, custom: 1, os: 1, },
+    { opcode: 0x51, custom: 1, os: 1, },
+    { opcode: 0x52, custom: 1, os: 1, },
+    { opcode: 0x53, custom: 1, os: 1, },
+    { opcode: 0x54, custom: 1, os: 1, },
+    { opcode: 0x55, custom: 1, os: 1, },
+    { opcode: 0x56, custom: 1, os: 1, },
+    { opcode: 0x57, custom: 1, os: 1, },
+
+    { opcode: 0x58, custom: 1, os: 1, },
+    { opcode: 0x59, custom: 1, os: 1, },
+    { opcode: 0x5A, custom: 1, os: 1, },
+    { opcode: 0x5B, custom: 1, os: 1, },
     { opcode: 0x5C, os: 1, },
-    { opcode: 0x5D, os: 1, },
-    { opcode: 0x5E, os: 1, },
-    { opcode: 0x5F, os: 1, },
+    { opcode: 0x5D, custom: 1, os: 1, },
+    { opcode: 0x5E, custom: 1, os: 1, },
+    { opcode: 0x5F, custom: 1, os: 1, },
 
     { opcode: 0x60, os: 1, },
     { opcode: 0x61, os: 1, },

+ 17 - 0
src/native/codegen/codegen.c

@@ -205,6 +205,15 @@ void gen_fn0(char const* fn, uint8_t fn_len)
     call_fn(&instruction_body, fn_idx);
 }
 
+void gen_fn0_store_ret(char const* fn, uint8_t fn_len, int32_t *ptr)
+{
+    // generates: *ptr = fn()
+    int32_t fn_idx = get_fn_index(fn, fn_len, FN0_RET_TYPE_INDEX);
+    push_i32(&instruction_body, (int32_t) ptr);
+    call_fn(&instruction_body, fn_idx);
+    store_i32(&instruction_body);
+}
+
 void gen_fn1(char const* fn, uint8_t fn_len, int32_t arg0)
 {
     int32_t fn_idx = get_fn_index(fn, fn_len, FN1_TYPE_INDEX);
@@ -212,6 +221,14 @@ void gen_fn1(char const* fn, uint8_t fn_len, int32_t arg0)
     call_fn(&instruction_body, fn_idx);
 }
 
+void gen_fn1_ptr(char const* fn, uint8_t fn_len, int32_t *ptr0)
+{
+    // generates: fn(*ptr0)
+    int32_t fn_idx = get_fn_index(fn, fn_len, FN1_TYPE_INDEX);
+    load_i32(&instruction_body, (int32_t) ptr0);
+    call_fn(&instruction_body, fn_idx);
+}
+
 void gen_fn2(char const* fn, uint8_t fn_len, int32_t arg0, int32_t arg1)
 {
     int32_t fn_idx = get_fn_index(fn, fn_len, FN2_TYPE_INDEX);

+ 2 - 0
src/native/codegen/codegen.h

@@ -29,7 +29,9 @@ static uint32_t import_table_size_reset_value;
 static uint32_t initial_import_count;
 
 void gen_fn0(char const* fn, uint8_t fn_len);
+void gen_fn0_store_ret(char const* fn, uint8_t fn_len, int32_t *ptr);
 void gen_fn1(char const* fn, uint8_t fn_len, int32_t arg0);
+void gen_fn1_ptr(char const* fn, uint8_t fn_len, int32_t *ptr0);
 void gen_fn2(char const* fn, uint8_t fn_len, int32_t arg0, int32_t arg1);
 
 void gen_fn0_ret(char const* fn, uint8_t fn_len);

+ 1 - 1
src/native/cpu.c

@@ -578,7 +578,7 @@ static void jit_run_interpreted(int32_t phys_addr)
 
 static cached_state_flags pack_current_state_flags()
 {
-    return *is_32 << 0;
+    return *is_32 << 0 | *stack_size_32 << 1;
 }
 
 static struct code_cache* create_cache_entry(uint32_t phys_addr)

+ 32 - 0
src/native/instructions.c

@@ -246,6 +246,38 @@ void instr32_5E() { reg32s[ESI] = pop32s(); }
 void instr16_5F() { reg16[DI] = pop16(); }
 void instr32_5F() { reg32s[EDI] = pop32s(); }
 
+void instr16_50_jit() { push16_jit(&reg16[AX]); }
+void instr32_50_jit() { push32_jit(&reg32s[EAX]); }
+void instr16_51_jit() { push16_jit(&reg16[CX]); }
+void instr32_51_jit() { push32_jit(&reg32s[ECX]); }
+void instr16_52_jit() { push16_jit(&reg16[DX]); }
+void instr32_52_jit() { push32_jit(&reg32s[EDX]); }
+void instr16_53_jit() { push16_jit(&reg16[BX]); }
+void instr32_53_jit() { push32_jit(&reg32s[EBX]); }
+void instr16_54_jit() { push16_jit(&reg16[SP]); }
+void instr32_54_jit() { push32_jit(&reg32s[ESP]); }
+void instr16_55_jit() { push16_jit(&reg16[BP]); }
+void instr32_55_jit() { push32_jit(&reg32s[EBP]); }
+void instr16_56_jit() { push16_jit(&reg16[SI]); }
+void instr32_56_jit() { push32_jit(&reg32s[ESI]); }
+void instr16_57_jit() { push16_jit(&reg16[DI]); }
+void instr32_57_jit() { push32_jit(&reg32s[EDI]); }
+
+void instr16_58_jit() { pop16_jit(&reg16[AX]); }
+void instr32_58_jit() { pop32s_jit(&reg32s[EAX]); }
+void instr16_59_jit() { pop16_jit(&reg16[CX]); }
+void instr32_59_jit() { pop32s_jit(&reg32s[ECX]); }
+void instr16_5A_jit() { pop16_jit(&reg16[DX]); }
+void instr32_5A_jit() { pop32s_jit(&reg32s[EDX]); }
+void instr16_5B_jit() { pop16_jit(&reg16[BX]); }
+void instr32_5B_jit() { pop32s_jit(&reg32s[EBX]); }
+void instr16_5D_jit() { pop16_jit(&reg16[BP]); }
+void instr32_5D_jit() { pop32s_jit(&reg32s[EBP]); }
+void instr16_5E_jit() { pop16_jit(&reg16[SI]); }
+void instr32_5E_jit() { pop32s_jit(&reg32s[ESI]); }
+void instr16_5F_jit() { pop16_jit(&reg16[DI]); }
+void instr32_5F_jit() { pop32s_jit(&reg32s[EDI]); }
+
 
 void instr16_60() { pusha16(); }
 void instr32_60() { pusha32(); }

+ 49 - 0
src/native/misc_instr.c

@@ -5,6 +5,7 @@
 
 #include <stdio.h>
 
+#include "codegen/codegen.h"
 #include "const.h"
 #include "global_pointers.h"
 #include "fpu.h"
@@ -273,6 +274,18 @@ void push16(int32_t imm16)
     }
 }
 
+void push16_jit(uint16_t *ptr)
+{
+    if(*stack_size_32)
+    {
+        gen_fn1_ptr("push16_ss32", 11, (int32_t*) ptr);
+    }
+    else
+    {
+        gen_fn1_ptr("push16_ss16", 11, (int32_t*) ptr);
+    }
+}
+
 __attribute__((always_inline))
 void push32_ss16(int32_t imm32)
 {
@@ -302,6 +315,18 @@ void push32(int32_t imm32)
     }
 }
 
+void push32_jit(int32_t *ptr)
+{
+    if(*stack_size_32)
+    {
+        gen_fn1_ptr("push32_ss32", 11, ptr);
+    }
+    else
+    {
+        gen_fn1_ptr("push32_ss16", 11, ptr);
+    }
+}
+
 __attribute__((always_inline))
 int32_t pop16_ss16()
 {
@@ -335,6 +360,18 @@ int32_t pop16()
     }
 }
 
+void pop16_jit(uint16_t *ptr)
+{
+    if(*stack_size_32)
+    {
+        gen_fn0_store_ret("pop16_ss32", 10, (int32_t*) ptr);
+    }
+    else
+    {
+        gen_fn0_store_ret("pop16_ss16", 10, (int32_t*) ptr);
+    }
+}
+
 __attribute__((always_inline))
 int32_t pop32s_ss16()
 {
@@ -366,6 +403,18 @@ int32_t pop32s()
     }
 }
 
+void pop32s_jit(int32_t *ptr)
+{
+    if(*stack_size_32)
+    {
+        gen_fn0_store_ret("pop32s_ss32", 11, ptr);
+    }
+    else
+    {
+        gen_fn0_store_ret("pop32s_ss16", 11, ptr);
+    }
+}
+
 void pusha16()
 {
     uint16_t temp = reg16[SP];

+ 4 - 0
src/native/misc_instr.h

@@ -44,13 +44,17 @@ void adjust_stack_reg(int32_t adjustment);
 void push16_ss16(int32_t imm16);
 void push16_ss32(int32_t imm16);
 void push16(int32_t imm16);
+void push16_jit(uint16_t *ptr);
 void push32_ss16(int32_t imm32);
 void push32_ss32(int32_t imm32);
 void push32(int32_t imm32);
+void push32_jit(int32_t *ptr);
 int32_t pop16(void);
+void pop16_jit(uint16_t *ptr);
 int32_t pop32_ss16(void);
 int32_t pop32_ss32(void);
 int32_t pop32s(void);
+void pop32s_jit(int32_t *ptr);
 void pusha16(void);
 void pusha32(void);
 void setcc_reg(bool condition, int32_t r);