Browse Source

generate code for pshufd (660F70)

Fabian 1 year ago
parent
commit
baffa6c0d6
2 changed files with 17 additions and 3 deletions
  1. 0 1
      src/rust/cpu/instructions_0f.rs
  2. 17 2
      src/rust/jit_instructions.rs

+ 0 - 1
src/rust/cpu/instructions_0f.rs

@@ -2635,7 +2635,6 @@ pub unsafe fn instr_0F70_reg(r1: i32, r2: i32, imm: i32) { instr_0F70(read_mmx64
 pub unsafe fn instr_0F70_mem(addr: i32, r: i32, imm: i32) {
     instr_0F70(return_on_pagefault!(safe_read64s(addr)), r, imm);
 }
-#[no_mangle]
 pub unsafe fn instr_660F70(source: reg128, r: i32, imm8: i32) {
     // pshufd xmm, xmm/mem128, imm8
     // XXX: Aligned access or #gp

+ 17 - 2
src/rust/jit_instructions.rs

@@ -6456,10 +6456,25 @@ pub fn instr_0F70_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32, imm8: u32) {
     ctx.builder.call_fn3_i64_i32_i32("instr_0F70");
 }
 pub fn instr_660F70_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32, imm8: u32) {
-    sse_read128_xmm_mem_imm(ctx, "instr_660F70", modrm_byte, r, imm8)
+    let src = global_pointers::sse_scratch_register as u32;
+    codegen::gen_modrm_resolve_safe_read128(ctx, modrm_byte, src);
+    for i in 0..4 {
+        ctx.builder.const_i32(0);
+        ctx.builder.load_fixed_i32(src + 4 * (imm8 >> 2 * i & 3));
+        ctx.builder
+            .store_aligned_i32(global_pointers::get_reg_xmm_offset(r) + 4 * i);
+    }
 }
 pub fn instr_660F70_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32, imm8: u32) {
-    sse_read128_xmm_xmm_imm(ctx, "instr_660F70", r1, r2, imm8)
+    codegen::gen_read_reg_xmm128_into_scratch(ctx, r1);
+    // TODO: perf: copy less (handle aliased src/dst), use 64-bit loads/stores if possible
+    let src = global_pointers::sse_scratch_register as u32;
+    for i in 0..4 {
+        ctx.builder.const_i32(0);
+        ctx.builder.load_fixed_i32(src + 4 * (imm8 >> 2 * i & 3));
+        ctx.builder
+            .store_aligned_i32(global_pointers::get_reg_xmm_offset(r2) + 4 * i);
+    }
 }
 pub fn instr_F20F70_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte, r: u32, imm8: u32) {
     sse_read128_xmm_mem_imm(ctx, "instr_F20F70", modrm_byte, r, imm8)