Browse Source

jit: generate code for sse moves (f20f1[01], 660f6e, f30f7e, 660fd6)

Fabian 1 year ago
parent
commit
4932c821e8
2 changed files with 46 additions and 27 deletions
  1. 9 7
      src/rust/cpu/instructions_0f.rs
  2. 37 20
      src/rust/jit_instructions.rs

+ 9 - 7
src/rust/cpu/instructions_0f.rs

@@ -491,7 +491,6 @@ pub unsafe fn instr_660F10_reg(r1: i32, r2: i32) { instr_660F10(read_xmm128s(r1)
 pub unsafe fn instr_660F10_mem(addr: i32, r: i32) {
     instr_660F10(return_on_pagefault!(safe_read128s(addr)), r);
 }
-#[no_mangle]
 pub unsafe fn instr_F20F10_reg(r1: i32, r2: i32) {
     // movsd xmm, xmm/m64
     let data = read_xmm128s(r1);
@@ -528,7 +527,6 @@ pub unsafe fn instr_660F11_mem(addr: i32, r: i32) {
     // movupd xmm/m128, xmm
     mov_r_m128(addr, r);
 }
-#[no_mangle]
 pub unsafe fn instr_F20F11_reg(r1: i32, r2: i32) {
     // movsd xmm/m64, xmm
     let data = read_xmm128s(r2);
@@ -563,6 +561,7 @@ pub unsafe fn instr_F20F12_reg(_r1: i32, _r2: i32) { unimplemented_sse(); }
 pub unsafe fn instr_F30F12_mem(_addr: i32, _r: i32) { unimplemented_sse(); }
 #[no_mangle]
 pub unsafe fn instr_F30F12_reg(_r1: i32, _r2: i32) { unimplemented_sse(); }
+
 pub unsafe fn instr_0F13_mem(addr: i32, r: i32) {
     // movlps m64, xmm
     movl_r128_m64(addr, r);
@@ -573,6 +572,7 @@ pub unsafe fn instr_660F13_mem(addr: i32, r: i32) {
     // movlpd xmm/m64, xmm
     movl_r128_m64(addr, r);
 }
+
 #[no_mangle]
 pub unsafe fn instr_0F14(source: u64, r: i32) {
     // unpcklps xmm, xmm/m128
@@ -835,6 +835,7 @@ pub unsafe fn instr_0F25() { undefined_instruction(); }
 pub unsafe fn instr_0F26() { undefined_instruction(); }
 #[no_mangle]
 pub unsafe fn instr_0F27() { undefined_instruction(); }
+
 pub unsafe fn instr_0F28(source: reg128, r: i32) {
     // movaps xmm, xmm/m128
     // XXX: Aligned read or #gp
@@ -2571,6 +2572,7 @@ pub unsafe fn instr_660F6D_reg(r1: i32, r2: i32) { instr_660F6D(read_xmm128s(r1)
 pub unsafe fn instr_660F6D_mem(addr: i32, r: i32) {
     instr_660F6D(return_on_pagefault!(safe_read128s(addr)), r);
 }
+
 #[no_mangle]
 pub unsafe fn instr_0F6E(source: i32, r: i32) {
     // movd mm, r/m32
@@ -2581,7 +2583,6 @@ pub unsafe fn instr_0F6E_reg(r1: i32, r2: i32) { instr_0F6E(read_reg32(r1), r2);
 pub unsafe fn instr_0F6E_mem(addr: i32, r: i32) {
     instr_0F6E(return_on_pagefault!(safe_read32s(addr)), r);
 }
-#[no_mangle]
 pub unsafe fn instr_660F6E(source: i32, r: i32) {
     // movd mm, r/m32
     write_xmm128(r, source, 0, 0, 0);
@@ -2939,6 +2940,7 @@ pub unsafe fn instr_0F7B() { unimplemented_sse(); }
 pub unsafe fn instr_0F7C() { unimplemented_sse(); }
 #[no_mangle]
 pub unsafe fn instr_0F7D() { unimplemented_sse(); }
+
 #[no_mangle]
 pub unsafe fn instr_0F7E(r: i32) -> i32 {
     // movd r/m32, mm
@@ -2964,7 +2966,6 @@ pub unsafe fn instr_F30F7E_mem(addr: i32, r: i32) {
     let data = return_on_pagefault!(safe_read64s(addr));
     write_xmm128_2(r, data, 0);
 }
-#[no_mangle]
 pub unsafe fn instr_F30F7E_reg(r1: i32, r2: i32) {
     // movq xmm, xmm/mem64
     write_xmm128_2(r2, read_xmm64s(r1), 0);
@@ -3981,6 +3982,7 @@ pub unsafe fn instr_660FD5_reg(r1: i32, r2: i32) { instr_660FD5(read_xmm128s(r1)
 pub unsafe fn instr_660FD5_mem(addr: i32, r: i32) {
     instr_660FD5(return_on_pagefault!(safe_read128s(addr)), r);
 }
+
 #[no_mangle]
 pub unsafe fn instr_0FD6_mem(_addr: i32, _r: i32) { trigger_ud(); }
 #[no_mangle]
@@ -3989,12 +3991,11 @@ pub unsafe fn instr_660FD6_mem(addr: i32, r: i32) {
     // movq xmm/m64, xmm
     movl_r128_m64(addr, r);
 }
-#[no_mangle]
 pub unsafe fn instr_660FD6_reg(r1: i32, r2: i32) {
     // movq xmm/m64, xmm
-    let data = read_xmm64s(r2);
-    write_xmm128_2(r1, data, 0);
+    write_xmm128_2(r1, read_xmm64s(r2), 0);
 }
+
 #[no_mangle]
 pub unsafe fn instr_F20FD6_mem(_addr: i32, _r: i32) { trigger_ud(); }
 #[no_mangle]
@@ -4012,6 +4013,7 @@ pub unsafe fn instr_F30FD6_reg(r1: i32, r2: i32) {
     write_xmm_reg128(r2, reg128 { u64: [source, 0] });
     transition_fpu_to_mmx();
 }
+
 pub unsafe fn instr_0FD7_mem(_addr: i32, _r: i32) { trigger_ud(); }
 #[no_mangle]
 pub unsafe fn instr_0FD7(r1: i32) -> i32 {

+ 37 - 20
src/rust/jit_instructions.rs

@@ -5688,9 +5688,11 @@ pub fn instr_F20F10_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32)
     instr_F30F7E_mem_jit(ctx, modrm_byte, r)
 }
 pub fn instr_F20F10_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
-    ctx.builder.const_i32(r1 as i32);
-    ctx.builder.const_i32(r2 as i32);
-    ctx.builder.call_fn2("instr_F20F10_reg");
+    ctx.builder.const_i32(0);
+    ctx.builder
+        .load_fixed_i64(global_pointers::get_reg_xmm_offset(r1));
+    ctx.builder
+        .store_aligned_i64(global_pointers::get_reg_xmm_offset(r2));
 }
 pub fn instr_F30F10_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32) {
     instr_660F6E_mem_jit(ctx, modrm_byte, r)
@@ -5715,9 +5717,11 @@ pub fn instr_F20F11_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32)
     instr_660FD6_mem_jit(ctx, modrm_byte, r)
 }
 pub fn instr_F20F11_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
-    ctx.builder.const_i32(r1 as i32);
-    ctx.builder.const_i32(r2 as i32);
-    ctx.builder.call_fn2("instr_F20F11_reg");
+    ctx.builder.const_i32(0);
+    ctx.builder
+        .load_fixed_i64(global_pointers::get_reg_xmm_offset(r2));
+    ctx.builder
+        .store_aligned_i64(global_pointers::get_reg_xmm_offset(r1));
 }
 pub fn instr_F30F11_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32) {
     instr_660F7E_mem_jit(ctx, modrm_byte, r)
@@ -6494,14 +6498,26 @@ pub fn instr_0F6E_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
 }
 
 pub fn instr_660F6E_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32) {
+    ctx.builder.const_i32(0);
     codegen::gen_modrm_resolve_safe_read32(ctx, modrm_byte);
-    ctx.builder.const_i32(r as i32);
-    ctx.builder.call_fn2("instr_660F6E")
+    ctx.builder.extend_unsigned_i32_to_i64();
+    ctx.builder
+        .store_aligned_i64(global_pointers::get_reg_xmm_offset(r));
+    ctx.builder.const_i32(0);
+    ctx.builder.const_i64(0);
+    ctx.builder
+        .store_aligned_i64(global_pointers::get_reg_xmm_offset(r) + 8);
 }
 pub fn instr_660F6E_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
+    ctx.builder.const_i32(0);
     codegen::gen_get_reg32(ctx, r1);
-    ctx.builder.const_i32(r2 as i32);
-    ctx.builder.call_fn2("instr_660F6E")
+    ctx.builder.extend_unsigned_i32_to_i64();
+    ctx.builder
+        .store_aligned_i64(global_pointers::get_reg_xmm_offset(r2));
+    ctx.builder.const_i32(0);
+    ctx.builder.const_i64(0);
+    ctx.builder
+        .store_aligned_i64(global_pointers::get_reg_xmm_offset(r2) + 8);
 }
 
 pub fn instr_0F6F_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32) {
@@ -6824,9 +6840,7 @@ pub fn instr_F30F7E_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32)
     ctx.builder.store_aligned_i64(0);
 }
 pub fn instr_F30F7E_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
-    ctx.builder.const_i32(r1 as i32);
-    ctx.builder.const_i32(r2 as i32);
-    ctx.builder.call_fn2("instr_F30F7E_reg");
+    instr_660FD6_reg_jit(ctx, r2, r1)
 }
 
 pub fn instr_660F7F_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32) {
@@ -7313,20 +7327,23 @@ pub fn instr_660FD5_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
 pub fn instr_660FD6_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32) {
     codegen::gen_modrm_resolve(ctx, modrm_byte);
     let address_local = ctx.builder.set_new_local();
-
     ctx.builder
-        .const_i32(global_pointers::get_reg_xmm_offset(r) as i32);
-    ctx.builder.load_aligned_i64(0);
+        .load_fixed_i64(global_pointers::get_reg_xmm_offset(r));
     let value_local = ctx.builder.set_new_local_i64();
-
     codegen::gen_safe_write64(ctx, &address_local, &value_local);
     ctx.builder.free_local(address_local);
     ctx.builder.free_local_i64(value_local);
 }
 pub fn instr_660FD6_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
-    ctx.builder.const_i32(r1 as i32);
-    ctx.builder.const_i32(r2 as i32);
-    ctx.builder.call_fn2("instr_660FD6_reg");
+    ctx.builder.const_i32(0);
+    ctx.builder
+        .load_fixed_i64(global_pointers::get_reg_xmm_offset(r2));
+    ctx.builder
+        .store_aligned_i64(global_pointers::get_reg_xmm_offset(r1));
+    ctx.builder.const_i32(0);
+    ctx.builder.const_i64(0);
+    ctx.builder
+        .store_aligned_i64(global_pointers::get_reg_xmm_offset(r1) + 8);
 }
 
 pub fn instr_660FD7_mem_jit(ctx: &mut JitContext, _modrm_byte: ModrmByte, _r: u32) {