Browse Source

Improved code generation for jitted memory reads and writes

Fabian 3 years ago
parent
commit
d31e0edb5a
4 changed files with 696 additions and 810 deletions
  1. 5 1
      Makefile
  2. 330 364
      src/rust/codegen.rs
  3. 181 258
      src/rust/cpu2/cpu.rs
  4. 180 187
      src/rust/wasmgen/wasm_builder.rs

+ 5 - 1
Makefile

@@ -275,9 +275,13 @@ devices-test: all-debug
 	./tests/devices/virtio_9p.js
 
 rust-test: $(RUST_FILES)
-	env RUST_BACKTRACE=full RUST_TEST_THREADS=1 RUSTFLAGS="-D warnings" cargo +nightly test -- --nocapture
+	# RUSTFLAGS="-D warnings"
+	env RUST_BACKTRACE=full RUST_TEST_THREADS=1 cargo +nightly test -- --nocapture
 	./tests/rust/verify-wasmgen-dummy-output.js
 
+rust-no-warnings:
+	RUSTFLAGS="-D warnings" make all all-debug
+
 rust-test-intensive:
 	QUICKCHECK_TESTS=100000000 make rust-test
 

+ 330 - 364
src/rust/codegen.rs

@@ -9,8 +9,7 @@ use jit_instructions::LocalOrImmedate;
 use modrm;
 use profiler;
 use regs;
-use wasmgen::wasm_builder;
-use wasmgen::wasm_builder::{WasmBuilder, WasmLocal, WasmLocalI64};
+use wasmgen::wasm_builder::{FunctionType, WasmBuilder, WasmLocal, WasmLocalI64};
 
 const CONDITION_FUNCTIONS: [&str; 16] = [
     "test_o", "test_no", "test_b", "test_nb", "test_z", "test_nz", "test_be", "test_nbe", "test_s",
@@ -211,53 +210,47 @@ pub fn sign_extend_i16(builder: &mut WasmBuilder) {
 }
 
 pub fn gen_fn0_const(builder: &mut WasmBuilder, name: &str) {
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN0_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN0_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_fn0_const_ret(builder: &mut WasmBuilder, name: &str) {
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN0_RET_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN0_RET_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_fn1_const(builder: &mut WasmBuilder, name: &str, arg0: u32) {
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN1_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN1_TYPE_INDEX);
     builder.const_i32(arg0 as i32);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_call_fn1_ret(builder: &mut WasmBuilder, name: &str) {
     // generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN1_RET_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN1_RET_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_call_fn1_ret_f64(builder: &mut WasmBuilder, name: &str) {
     // generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN1_RET_F64_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN1_RET_F64_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_call_fn1_f64_ret_i32(builder: &mut WasmBuilder, name: &str) {
     // generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN1_F64_RET_I32_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN1_F64_RET_I32_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_call_fn1_f64_ret_i64(builder: &mut WasmBuilder, name: &str) {
     // generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN1_F64_RET_I64_TYPE_INDEX);
-    builder.call_fn(fn_idx);
-}
-
-pub fn gen_call_fn1_ret_i64(builder: &mut WasmBuilder, name: &str) {
-    // generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN1_RET_I64_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN1_F64_RET_I64_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_fn2_const(builder: &mut WasmBuilder, name: &str, arg0: u32, arg1: u32) {
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN2_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN2_TYPE_INDEX);
     builder.const_i32(arg0 as i32);
     builder.const_i32(arg1 as i32);
     builder.call_fn(fn_idx);
@@ -265,58 +258,56 @@ pub fn gen_fn2_const(builder: &mut WasmBuilder, name: &str, arg0: u32, arg1: u32
 
 pub fn gen_call_fn1(builder: &mut WasmBuilder, name: &str) {
     // generates: fn( _ ) where _ must be left on the stack before calling this
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN1_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN1_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_call_fn2(builder: &mut WasmBuilder, name: &str) {
     // generates: fn( _, _ ) where _ must be left on the stack before calling this
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN2_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN2_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_call_fn2_i32_f64(builder: &mut WasmBuilder, name: &str) {
     // generates: fn( _, _ ) where _ must be left on the stack before calling this
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN2_I32_F64_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN2_I32_F64_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
-pub fn gen_call_fn2_i32_i64(builder: &mut WasmBuilder, name: &str) {
-    // generates: fn( _, _ ) where _ must be left on the stack before calling this
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN2_I32_I64_TYPE_INDEX);
+pub fn gen_call_fn3_i32_i64_i32_ret(builder: &mut WasmBuilder, name: &str) {
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN3_I32_I64_I32_RET_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_call_fn1_f64(builder: &mut WasmBuilder, name: &str) {
     // generates: fn( _, _ ) where _ must be left on the stack before calling this
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN1_F64_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN1_F64_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_call_fn2_ret(builder: &mut WasmBuilder, name: &str) {
     // generates: fn( _, _ ) where _ must be left on the stack before calling this, and fn returns a value
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN2_RET_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN2_RET_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_call_fn3(builder: &mut WasmBuilder, name: &str) {
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN3_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN3_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
-pub fn gen_call_fn3_i32_i64_i64(builder: &mut WasmBuilder, name: &str) {
-    // generates: fn( _, _ ) where _ must be left on the stack before calling this
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN3_I32_I64_I64_TYPE_INDEX);
+pub fn gen_call_fn4_i32_i64_i64_i32_ret(builder: &mut WasmBuilder, name: &str) {
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN4_I32_I64_I64_I32_RET_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_call_fn3_ret(builder: &mut WasmBuilder, name: &str) {
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN3_RET_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN3_RET_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_fn3_const(builder: &mut WasmBuilder, name: &str, arg0: u32, arg1: u32, arg2: u32) {
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN3_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN3_TYPE_INDEX);
     builder.const_i32(arg0 as i32);
     builder.const_i32(arg1 as i32);
     builder.const_i32(arg2 as i32);
@@ -325,20 +316,20 @@ pub fn gen_fn3_const(builder: &mut WasmBuilder, name: &str, arg0: u32, arg1: u32
 
 pub fn gen_modrm_fn0(builder: &mut WasmBuilder, name: &str) {
     // generates: fn( _ )
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN1_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN1_TYPE_INDEX);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_modrm_fn1(builder: &mut WasmBuilder, name: &str, arg0: u32) {
     // generates: fn( _, arg0 )
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN2_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN2_TYPE_INDEX);
     builder.const_i32(arg0 as i32);
     builder.call_fn(fn_idx);
 }
 
 pub fn gen_modrm_fn2(builder: &mut WasmBuilder, name: &str, arg0: u32, arg1: u32) {
     // generates: fn( _, arg0, arg1 )
-    let fn_idx = builder.get_fn_idx(name, wasm_builder::FN3_TYPE_INDEX);
+    let fn_idx = builder.get_fn_idx(name, FunctionType::FN3_TYPE_INDEX);
     builder.const_i32(arg0 as i32);
     builder.const_i32(arg1 as i32);
     builder.call_fn(fn_idx);
@@ -473,27 +464,27 @@ fn gen_safe_read(
     address_local: &WasmLocal,
     where_to_write: Option<u32>,
 ) {
-    // Assumes virtual address has been pushed to the stack, and generates safe_readXX's fast-path
-    // inline, bailing to safe_readXX_slow if necessary
+    // Execute a virtual memory read. All slow paths (memory-mapped IO, tlb miss, page fault and
+    // read across page boundary are handled in safe_read_jit_slow
+
+    //   entry <- tlb_data[addr >> 12 << 2]
+    //   if entry & MASK == TLB_VALID && (addr & 0xFFF) <= 0x1000 - bytes: goto fast
+    //   entry <- safe_read_jit_slow(addr, instruction_pointer)
+    //   if page_fault: goto exit-with-pagefault
+    //   fast: mem[(entry & ~0xFFF) ^ addr]
 
+    ctx.builder.block_void();
     ctx.builder.get_local(&address_local);
 
-    // Pseudo: base_on_stack = (uint32_t)address >> 12;
     ctx.builder.const_i32(12);
     ctx.builder.shr_u_i32();
-
-    // scale index
     ctx.builder.const_i32(2);
     ctx.builder.shl_i32();
 
-    // Pseudo: entry = tlb_data[base_on_stack];
     ctx.builder
         .load_aligned_i32_from_stack(global_pointers::TLB_DATA);
     let entry_local = ctx.builder.tee_new_local();
 
-    // Pseudo: bool can_use_fast_path =
-    //    (entry & 0xFFF & ~TLB_READONLY & ~TLB_GLOBAL & ~TLB_HAS_CODE & ~(cpl == 3 ? 0 : TLB_NO_USER) == TLB_VALID &&
-    //    (bitsize == 8 ? true : (address & 0xFFF) <= (0x1000 - (bitsize / 8)));
     ctx.builder.const_i32(
         (0xFFF
             & !TLB_READONLY
@@ -516,19 +507,51 @@ fn gen_safe_read(
         ctx.builder.and_i32();
     }
 
-    // Pseudo:
-    // if(can_use_fast_path) leave_on_stack(mem8[entry & ~0xFFF ^ address]);
-    if bits == BitSize::DQWORD {
-        ctx.builder.if_void();
+    ctx.builder.br_if(0);
+
+    if cfg!(feature = "profiler") && cfg!(feature = "profiler_instrument") {
+        ctx.builder.get_local(&address_local);
+        ctx.builder.get_local(&entry_local);
+        gen_call_fn2(ctx.builder, "report_safe_read_jit_slow");
     }
-    else if bits == BitSize::QWORD {
-        ctx.builder.if_i64();
+
+    ctx.builder.get_local(&address_local);
+    ctx.builder
+        .const_i32(ctx.start_of_current_instruction as i32);
+    match bits {
+        BitSize::BYTE => {
+            gen_call_fn2_ret(ctx.builder, "safe_read8_slow_jit");
+        },
+        BitSize::WORD => {
+            gen_call_fn2_ret(ctx.builder, "safe_read16_slow_jit");
+        },
+        BitSize::DWORD => {
+            gen_call_fn2_ret(ctx.builder, "safe_read32s_slow_jit");
+        },
+        BitSize::QWORD => {
+            gen_call_fn2_ret(ctx.builder, "safe_read64s_slow_jit");
+        },
+        BitSize::DQWORD => {
+            gen_call_fn2_ret(ctx.builder, "safe_read128s_slow_jit");
+        },
     }
-    else {
-        ctx.builder.if_i32();
+    ctx.builder.set_local(&entry_local);
+
+    if cfg!(feature = "profiler") && cfg!(feature = "profiler_instrument") {
+        ctx.builder.load_u8(global_pointers::PAGE_FAULT);
+        ctx.builder.if_void();
+        gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
+        ctx.builder.block_end();
     }
 
-    gen_profiler_stat_increment(ctx.builder, profiler::stat::SAFE_READ_FAST);
+    // -2 for the exit-with-pagefault block, +1 for leaving the nested if from this function
+    let br_offset = ctx.current_brtable_depth - 2 + 1;
+    ctx.builder.load_u8(global_pointers::PAGE_FAULT);
+    ctx.builder.br_if(br_offset);
+
+    ctx.builder.block_end();
+
+    gen_profiler_stat_increment(ctx.builder, profiler::stat::SAFE_READ_FAST); // XXX: Both fast and slow
 
     ctx.builder.get_local(&entry_local);
     ctx.builder.const_i32(!0xFFF);
@@ -539,92 +562,39 @@ fn gen_safe_read(
     // where_to_write is only used by dqword
     dbg_assert!((where_to_write != None) == (bits == BitSize::DQWORD));
 
+    ctx.builder.const_i32(unsafe { mem8 } as i32);
+    ctx.builder.add_i32();
+
     match bits {
         BitSize::BYTE => {
-            ctx.builder.load_u8_from_stack(unsafe { mem8 } as u32);
+            ctx.builder.load_u8_from_stack(0);
         },
         BitSize::WORD => {
-            ctx.builder
-                .load_unaligned_u16_from_stack(unsafe { mem8 } as u32);
+            ctx.builder.load_unaligned_u16_from_stack(0);
         },
         BitSize::DWORD => {
-            ctx.builder
-                .load_unaligned_i32_from_stack(unsafe { mem8 } as u32);
+            ctx.builder.load_unaligned_i32_from_stack(0);
         },
         BitSize::QWORD => {
-            ctx.builder
-                .load_unaligned_i64_from_stack(unsafe { mem8 } as u32);
+            ctx.builder.load_unaligned_i64_from_stack(0);
         },
         BitSize::DQWORD => {
             let where_to_write = where_to_write.unwrap();
             let virt_address_local = ctx.builder.set_new_local();
             ctx.builder.const_i32(0);
             ctx.builder.get_local(&virt_address_local);
-            ctx.builder
-                .load_unaligned_i64_from_stack(unsafe { mem8 } as u32);
+            ctx.builder.load_unaligned_i64_from_stack(0);
             ctx.builder.store_unaligned_i64(where_to_write);
 
             ctx.builder.const_i32(0);
             ctx.builder.get_local(&virt_address_local);
-            ctx.builder
-                .load_unaligned_i64_from_stack(unsafe { mem8 } as u32 + 8);
+            ctx.builder.load_unaligned_i64_from_stack(8);
             ctx.builder.store_unaligned_i64(where_to_write + 8);
 
             ctx.builder.free_local(virt_address_local);
         },
     }
 
-    // Pseudo:
-    // else {
-    //     *previous_ip = *instruction_pointer & ~0xFFF | start_of_instruction;
-    //     leave_on_stack(safe_read*_slow_jit(address));
-    //     if(page_fault) { trigger_pagefault_end_jit(); return; }
-    // }
-    ctx.builder.else_();
-
-    if cfg!(feature = "profiler") && cfg!(feature = "profiler_instrument") {
-        ctx.builder.get_local(&address_local);
-        ctx.builder.get_local(&entry_local);
-        gen_call_fn2(ctx.builder, "report_safe_read_jit_slow");
-    }
-
-    ctx.builder.get_local(&address_local);
-    match bits {
-        BitSize::BYTE => {
-            gen_call_fn1_ret(ctx.builder, "safe_read8_slow_jit");
-        },
-        BitSize::WORD => {
-            gen_call_fn1_ret(ctx.builder, "safe_read16_slow_jit");
-        },
-        BitSize::DWORD => {
-            gen_call_fn1_ret(ctx.builder, "safe_read32s_slow_jit");
-        },
-        BitSize::QWORD => {
-            gen_call_fn1_ret_i64(ctx.builder, "safe_read64s_slow_jit");
-        },
-        BitSize::DQWORD => {
-            ctx.builder.const_i32(where_to_write.unwrap() as i32);
-            gen_call_fn2(ctx.builder, "safe_read128s_slow_jit");
-        },
-    }
-
-    ctx.builder.load_u8(global_pointers::PAGE_FAULT);
-
-    ctx.builder.if_void();
-    gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
-
-    gen_set_previous_eip_offset_from_eip_with_low_bits(
-        ctx.builder,
-        ctx.start_of_current_instruction as i32 & 0xFFF,
-    );
-
-    // -2 for the exit-with-pagefault block, +2 for leaving the two nested ifs from this function
-    let br_offset = ctx.current_brtable_depth - 2 + 2;
-    ctx.builder.br(br_offset);
-    ctx.builder.block_end();
-
-    ctx.builder.block_end();
-
     ctx.builder.free_local(entry_local);
 }
 
@@ -634,25 +604,27 @@ fn gen_safe_write(
     address_local: &WasmLocal,
     value_local: GenSafeWriteValue,
 ) {
-    // Generates safe_writeXX' fast-path inline, bailing to safe_writeXX_slow if necessary.
+    // Execute a virtual memory write. All slow paths (memory-mapped IO, tlb miss, page fault,
+    // write across page boundary and page containing jitted code are handled in safe_write_jit_slow
+
+    //   entry <- tlb_data[addr >> 12 << 2]
+    //   if entry & MASK == TLB_VALID && (addr & 0xFFF) <= 0x1000 - bytes: goto fast
+    //   entry <- safe_write_jit_slow(addr, value, instruction_pointer)
+    //   if page_fault: goto exit-with-pagefault
+    //   fast: mem[(entry & ~0xFFF) ^ addr] <- value
 
+    ctx.builder.block_void();
     ctx.builder.get_local(&address_local);
 
-    // Pseudo: base_on_stack = (uint32_t)address >> 12;
     ctx.builder.const_i32(12);
     ctx.builder.shr_u_i32();
-
-    // scale index
     ctx.builder.const_i32(2);
     ctx.builder.shl_i32();
 
-    // Pseudo: entry = tlb_data[base_on_stack];
     ctx.builder
         .load_aligned_i32_from_stack(global_pointers::TLB_DATA);
     let entry_local = ctx.builder.tee_new_local();
 
-    // Pseudo: bool can_use_fast_path = (entry & 0xFFF & ~TLB_GLOBAL & ~(cpl == 3 ? 0 : TLB_NO_USER) == TLB_VALID &&
-    //                                   (address & 0xFFF) <= (0x1000 - bitsize / 8));
     ctx.builder
         .const_i32((0xFFF & !TLB_GLOBAL & !(if ctx.cpu.cpl3() { 0 } else { TLB_NO_USER })) as i32);
     ctx.builder.and_i32();
@@ -670,13 +642,59 @@ fn gen_safe_write(
         ctx.builder.and_i32();
     }
 
-    // Pseudo:
-    // if(can_use_fast_path)
-    // {
-    //     phys_addr = entry & ~0xFFF ^ address;
-    ctx.builder.if_void();
+    ctx.builder.br_if(0);
+
+    if cfg!(feature = "profiler") && cfg!(feature = "profiler_instrument") {
+        ctx.builder.get_local(&address_local);
+        ctx.builder.get_local(&entry_local);
+        gen_call_fn2(ctx.builder, "report_safe_write_jit_slow");
+    }
+
+    ctx.builder.get_local(&address_local);
+    match value_local {
+        GenSafeWriteValue::I32(local) => ctx.builder.get_local(local),
+        GenSafeWriteValue::I64(local) => ctx.builder.get_local_i64(local),
+        GenSafeWriteValue::TwoI64s(local1, local2) => {
+            ctx.builder.get_local_i64(local1);
+            ctx.builder.get_local_i64(local2)
+        },
+    }
+    ctx.builder
+        .const_i32(ctx.start_of_current_instruction as i32);
+    match bits {
+        BitSize::BYTE => {
+            gen_call_fn3_ret(ctx.builder, "safe_write8_slow_jit");
+        },
+        BitSize::WORD => {
+            gen_call_fn3_ret(ctx.builder, "safe_write16_slow_jit");
+        },
+        BitSize::DWORD => {
+            gen_call_fn3_ret(ctx.builder, "safe_write32_slow_jit");
+        },
+        BitSize::QWORD => {
+            gen_call_fn3_i32_i64_i32_ret(ctx.builder, "safe_write64_slow_jit");
+        },
+        BitSize::DQWORD => {
+            gen_call_fn4_i32_i64_i64_i32_ret(ctx.builder, "safe_write128_slow_jit");
+        },
+    }
+    ctx.builder.set_local(&entry_local);
+
+    if cfg!(feature = "profiler") && cfg!(feature = "profiler_instrument") {
+        ctx.builder.load_u8(global_pointers::PAGE_FAULT);
+        ctx.builder.if_void();
+        gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
+        ctx.builder.block_end();
+    }
 
-    gen_profiler_stat_increment(ctx.builder, profiler::stat::SAFE_WRITE_FAST);
+    // -2 for the exit-with-pagefault block, +1 for leaving the nested if from this function
+    let br_offset = ctx.current_brtable_depth - 2 + 1;
+    ctx.builder.load_u8(global_pointers::PAGE_FAULT);
+    ctx.builder.br_if(br_offset);
+
+    ctx.builder.block_end();
+
+    gen_profiler_stat_increment(ctx.builder, profiler::stat::SAFE_WRITE_FAST); // XXX: Both fast and slow
 
     ctx.builder.get_local(&entry_local);
     ctx.builder.const_i32(!0xFFF);
@@ -684,9 +702,8 @@ fn gen_safe_write(
     ctx.builder.get_local(&address_local);
     ctx.builder.xor_i32();
 
-    // Pseudo:
-    //     /* continued within can_use_fast_path branch */
-    //     mem8[phys_addr] = value;
+    ctx.builder.const_i32(unsafe { mem8 } as i32);
+    ctx.builder.add_i32();
 
     match value_local {
         GenSafeWriteValue::I32(local) => ctx.builder.get_local(local),
@@ -696,89 +713,233 @@ fn gen_safe_write(
 
             let virt_address_local = ctx.builder.tee_new_local();
             ctx.builder.get_local_i64(local1);
-            ctx.builder.store_unaligned_i64(unsafe { mem8 } as u32);
+            ctx.builder.store_unaligned_i64(0);
 
             ctx.builder.get_local(&virt_address_local);
             ctx.builder.get_local_i64(local2);
-            ctx.builder.store_unaligned_i64(unsafe { mem8 } as u32 + 8);
+            ctx.builder.store_unaligned_i64(8);
             ctx.builder.free_local(virt_address_local);
         },
     }
     match bits {
         BitSize::BYTE => {
-            ctx.builder.store_u8(unsafe { mem8 } as u32);
+            ctx.builder.store_u8(0);
         },
         BitSize::WORD => {
-            ctx.builder.store_unaligned_u16(unsafe { mem8 } as u32);
+            ctx.builder.store_unaligned_u16(0);
         },
         BitSize::DWORD => {
-            ctx.builder.store_unaligned_i32(unsafe { mem8 } as u32);
+            ctx.builder.store_unaligned_i32(0);
         },
         BitSize::QWORD => {
-            ctx.builder.store_unaligned_i64(unsafe { mem8 } as u32);
+            ctx.builder.store_unaligned_i64(0);
         },
         BitSize::DQWORD => {}, // handled above
     }
 
-    // Pseudo:
-    // else {
-    //     *previous_ip = *instruction_pointer & ~0xFFF | start_of_instruction;
-    //     safe_write*_slow_jit(address, value);
-    //     if(page_fault) { trigger_pagefault_end_jit(); return; }
-    // }
-    ctx.builder.else_();
+    ctx.builder.free_local(entry_local);
+}
+
+pub fn gen_safe_read_write(
+    ctx: &mut JitContext,
+    bits: BitSize,
+    address_local: &WasmLocal,
+    f: &dyn Fn(&mut JitContext),
+) {
+    // Execute a virtual memory read+write. All slow paths (memory-mapped IO, tlb miss, page fault,
+    // write across page boundary and page containing jitted code are handled in
+    // safe_read_write_jit_slow
+
+    //   entry <- tlb_data[addr >> 12 << 2]
+    //   can_use_fast_path <- entry & MASK == TLB_VALID && (addr & 0xFFF) <= 0x1000 - bytes
+    //   if can_use_fast_path: goto fast
+    //   entry <- safe_read_write_jit_slow(addr, instruction_pointer)
+    //   if page_fault: goto exit-with-pagefault
+    //   fast: value <- f(mem[(entry & ~0xFFF) ^ addr])
+    //   if !can_use_fast_path { safe_write_jit_slow(addr, value, instruction_pointer) }
+    //   mem[(entry & ~0xFFF) ^ addr] <- value
+
+    ctx.builder.block_void();
+    ctx.builder.get_local(address_local);
+
+    ctx.builder.const_i32(12);
+    ctx.builder.shr_u_i32();
+    ctx.builder.const_i32(2);
+    ctx.builder.shl_i32();
+
+    ctx.builder
+        .load_aligned_i32_from_stack(global_pointers::TLB_DATA);
+    let entry_local = ctx.builder.tee_new_local();
+
+    ctx.builder
+        .const_i32((0xFFF & !TLB_GLOBAL & !(if ctx.cpu.cpl3() { 0 } else { TLB_NO_USER })) as i32);
+    ctx.builder.and_i32();
+
+    ctx.builder.const_i32(TLB_VALID as i32);
+    ctx.builder.eq_i32();
+
+    if bits != BitSize::BYTE {
+        ctx.builder.get_local(&address_local);
+        ctx.builder.const_i32(0xFFF);
+        ctx.builder.and_i32();
+        ctx.builder.const_i32(0x1000 - bits.bytes() as i32);
+        ctx.builder.le_i32();
+        ctx.builder.and_i32();
+    }
+
+    let can_use_fast_path_local = ctx.builder.tee_new_local();
+
+    ctx.builder.br_if(0);
 
     if cfg!(feature = "profiler") && cfg!(feature = "profiler_instrument") {
         ctx.builder.get_local(&address_local);
         ctx.builder.get_local(&entry_local);
-        gen_call_fn2(ctx.builder, "report_safe_write_jit_slow");
+        gen_call_fn2(ctx.builder, "report_safe_read_write_jit_slow");
     }
 
     ctx.builder.get_local(&address_local);
-    match value_local {
-        GenSafeWriteValue::I32(local) => ctx.builder.get_local(local),
-        GenSafeWriteValue::I64(local) => ctx.builder.get_local_i64(local),
-        GenSafeWriteValue::TwoI64s(local1, local2) => {
-            ctx.builder.get_local_i64(local1);
-            ctx.builder.get_local_i64(local2)
-        },
-    }
+    ctx.builder
+        .const_i32(ctx.start_of_current_instruction as i32);
+
     match bits {
         BitSize::BYTE => {
-            gen_call_fn2(ctx.builder, "safe_write8_slow_jit");
+            gen_call_fn2_ret(ctx.builder, "safe_read_write8_slow_jit");
         },
         BitSize::WORD => {
-            gen_call_fn2(ctx.builder, "safe_write16_slow_jit");
+            gen_call_fn2_ret(ctx.builder, "safe_read_write16_slow_jit");
         },
         BitSize::DWORD => {
-            gen_call_fn2(ctx.builder, "safe_write32_slow_jit");
+            gen_call_fn2_ret(ctx.builder, "safe_read_write32s_slow_jit");
         },
-        BitSize::QWORD => {
-            gen_call_fn2_i32_i64(ctx.builder, "safe_write64_slow_jit");
+        BitSize::QWORD => dbg_assert!(false),
+        BitSize::DQWORD => dbg_assert!(false),
+    }
+    ctx.builder.set_local(&entry_local);
+
+    if cfg!(feature = "profiler") && cfg!(feature = "profiler_instrument") {
+        ctx.builder.load_u8(global_pointers::PAGE_FAULT);
+        ctx.builder.if_void();
+        gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
+        ctx.builder.block_end();
+    }
+
+    // -2 for the exit-with-pagefault block, +2 for leaving the two nested ifs from this function
+    ctx.builder.load_u8(global_pointers::PAGE_FAULT);
+    ctx.builder.br_if(ctx.current_brtable_depth - 2 + 1);
+
+    ctx.builder.block_end();
+
+    gen_profiler_stat_increment(ctx.builder, profiler::stat::SAFE_READ_WRITE_FAST); // XXX: Also slow
+
+    ctx.builder.get_local(&entry_local);
+    ctx.builder.const_i32(!0xFFF);
+    ctx.builder.and_i32();
+    ctx.builder.get_local(&address_local);
+    ctx.builder.xor_i32();
+
+    ctx.builder.const_i32(unsafe { mem8 } as i32);
+    ctx.builder.add_i32();
+
+    ctx.builder.free_local(entry_local);
+    let phys_addr_local = ctx.builder.tee_new_local();
+
+    match bits {
+        BitSize::BYTE => {
+            ctx.builder.load_u8_from_stack(0);
         },
-        BitSize::DQWORD => {
-            gen_call_fn3_i32_i64_i64(ctx.builder, "safe_write128_slow_jit");
+        BitSize::WORD => {
+            ctx.builder.load_unaligned_u16_from_stack(0);
         },
+        BitSize::DWORD => {
+            ctx.builder.load_unaligned_i32_from_stack(0);
+        },
+        BitSize::QWORD => assert!(false),  // not used
+        BitSize::DQWORD => assert!(false), // not used
     }
 
-    ctx.builder.load_u8(global_pointers::PAGE_FAULT);
+    // value is now on stack
+
+    f(ctx);
+    let value_local = ctx.builder.set_new_local(); // TODO: Could get rid of this local by returning one from f
 
+    ctx.builder.get_local(&can_use_fast_path_local);
+
+    ctx.builder.eqz_i32();
     ctx.builder.if_void();
-    gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
+    {
+        ctx.builder.get_local(&address_local);
+        ctx.builder.get_local(&value_local);
 
-    gen_set_previous_eip_offset_from_eip_with_low_bits(
-        ctx.builder,
-        ctx.start_of_current_instruction as i32 & 0xFFF,
-    );
+        ctx.builder
+            .const_i32(ctx.start_of_current_instruction as i32);
 
-    // -2 for the exit-with-pagefault block, +2 for leaving the two nested ifs from this function
-    let br_offset = ctx.current_brtable_depth - 2 + 2;
-    ctx.builder.br(br_offset);
-    ctx.builder.block_end();
+        match bits {
+            BitSize::BYTE => {
+                gen_call_fn3_ret(ctx.builder, "safe_write8_slow_jit");
+            },
+            BitSize::WORD => {
+                gen_call_fn3_ret(ctx.builder, "safe_write16_slow_jit");
+            },
+            BitSize::DWORD => {
+                gen_call_fn3_ret(ctx.builder, "safe_write32_slow_jit");
+            },
+            BitSize::QWORD => dbg_assert!(false),
+            BitSize::DQWORD => dbg_assert!(false),
+        }
+
+        ctx.builder.drop_();
+        ctx.builder.load_u8(global_pointers::PAGE_FAULT);
 
+        ctx.builder.if_void();
+        {
+            // handled above
+            if cfg!(debug_assertions) {
+                ctx.builder.const_i32(match bits {
+                    BitSize::BYTE => 8,
+                    BitSize::WORD => 16,
+                    BitSize::DWORD => 32,
+                    _ => {
+                        dbg_assert!(false);
+                        0
+                    },
+                });
+                ctx.builder.get_local(&address_local);
+                gen_call_fn2(ctx.builder, "bug_gen_safe_read_write_page_fault");
+            }
+            else {
+                ctx.builder.unreachable();
+            }
+        }
+        ctx.builder.block_end();
+    }
     ctx.builder.block_end();
 
-    ctx.builder.free_local(entry_local);
+    ctx.builder.get_local(&phys_addr_local);
+    ctx.builder.get_local(&value_local);
+
+    match bits {
+        BitSize::BYTE => {
+            ctx.builder.store_u8(0);
+        },
+        BitSize::WORD => {
+            ctx.builder.store_unaligned_u16(0);
+        },
+        BitSize::DWORD => {
+            ctx.builder.store_unaligned_i32(0);
+        },
+        BitSize::QWORD => dbg_assert!(false),
+        BitSize::DQWORD => dbg_assert!(false),
+    }
+
+    ctx.builder.free_local(value_local);
+    ctx.builder.free_local(can_use_fast_path_local);
+    ctx.builder.free_local(phys_addr_local);
+}
+
+#[no_mangle]
+pub fn bug_gen_safe_read_write_page_fault(bits: i32, addr: u32) {
+    dbg_log!("bug: gen_safe_read_write_page_fault {} {:x}", bits, addr);
+    dbg_assert!(false);
 }
 
 pub fn gen_jmp_rel16(builder: &mut WasmBuilder, rel16: u16) {
@@ -1140,201 +1301,6 @@ pub fn gen_get_real_eip(ctx: &mut JitContext) {
     ctx.builder.sub_i32();
 }
 
-pub fn gen_safe_read_write(
-    ctx: &mut JitContext,
-    bits: BitSize,
-    address_local: &WasmLocal,
-    f: &dyn Fn(&mut JitContext),
-) {
-    ctx.builder.get_local(address_local);
-
-    // Pseudo: base_on_stack = (uint32_t)address >> 12;
-    ctx.builder.const_i32(12);
-    ctx.builder.shr_u_i32();
-
-    // scale index
-    ctx.builder.const_i32(2);
-    ctx.builder.shl_i32();
-
-    // Pseudo: entry = tlb_data[base_on_stack];
-    ctx.builder
-        .load_aligned_i32_from_stack(global_pointers::TLB_DATA);
-    let entry_local = ctx.builder.tee_new_local();
-
-    // Pseudo: bool can_use_fast_path = (entry & 0xFFF & ~TLB_READONLY & ~TLB_GLOBAL & ~(cpl == 3 ? 0 : TLB_NO_USER) == TLB_VALID &&
-    //                                   (address & 0xFFF) <= (0x1000 - (bitsize / 8));
-    ctx.builder
-        .const_i32((0xFFF & !TLB_GLOBAL & !(if ctx.cpu.cpl3() { 0 } else { TLB_NO_USER })) as i32);
-    ctx.builder.and_i32();
-
-    ctx.builder.const_i32(TLB_VALID as i32);
-    ctx.builder.eq_i32();
-
-    if bits != BitSize::BYTE {
-        ctx.builder.get_local(&address_local);
-        ctx.builder.const_i32(0xFFF);
-        ctx.builder.and_i32();
-        ctx.builder.const_i32(0x1000 - bits.bytes() as i32);
-        ctx.builder.le_i32();
-        ctx.builder.and_i32();
-    }
-
-    let can_use_fast_path_local = ctx.builder.tee_new_local();
-
-    ctx.builder.if_i32();
-
-    gen_profiler_stat_increment(ctx.builder, profiler::stat::SAFE_READ_WRITE_FAST);
-
-    ctx.builder.get_local(&entry_local);
-    ctx.builder.const_i32(!0xFFF);
-    ctx.builder.and_i32();
-    ctx.builder.get_local(&address_local);
-    ctx.builder.xor_i32();
-
-    let phys_addr_local = ctx.builder.tee_new_local();
-
-    match bits {
-        BitSize::BYTE => {
-            ctx.builder.load_u8_from_stack(unsafe { mem8 } as u32);
-        },
-        BitSize::WORD => {
-            ctx.builder
-                .load_unaligned_u16_from_stack(unsafe { mem8 } as u32);
-        },
-        BitSize::DWORD => {
-            ctx.builder
-                .load_unaligned_i32_from_stack(unsafe { mem8 } as u32);
-        },
-        BitSize::QWORD => assert!(false),  // not used
-        BitSize::DQWORD => assert!(false), // not used
-    }
-
-    ctx.builder.else_();
-    {
-        if cfg!(feature = "profiler") && cfg!(feature = "profiler_instrument") {
-            ctx.builder.get_local(&address_local);
-            ctx.builder.get_local(&entry_local);
-            gen_call_fn2(ctx.builder, "report_safe_read_write_jit_slow");
-        }
-
-        ctx.builder.get_local(&address_local);
-
-        match bits {
-            BitSize::BYTE => {
-                gen_call_fn1_ret(ctx.builder, "safe_read_write8_slow_jit");
-            },
-            BitSize::WORD => {
-                gen_call_fn1_ret(ctx.builder, "safe_read_write16_slow_jit");
-            },
-            BitSize::DWORD => {
-                gen_call_fn1_ret(ctx.builder, "safe_read_write32s_slow_jit");
-            },
-            BitSize::QWORD => dbg_assert!(false),
-            BitSize::DQWORD => dbg_assert!(false),
-        }
-
-        ctx.builder.load_u8(global_pointers::PAGE_FAULT);
-
-        ctx.builder.if_void();
-        {
-            gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction);
-
-            gen_set_previous_eip_offset_from_eip_with_low_bits(
-                ctx.builder,
-                ctx.start_of_current_instruction as i32 & 0xFFF,
-            );
-
-            // -2 for the exit-with-pagefault block, +2 for leaving the two nested ifs from this function
-            let br_offset = ctx.current_brtable_depth - 2 + 2;
-            ctx.builder.br(br_offset);
-        }
-        ctx.builder.block_end();
-    }
-    ctx.builder.block_end();
-
-    // value is now on stack
-
-    f(ctx);
-    let value_local = ctx.builder.set_new_local();
-
-    ctx.builder.get_local(&can_use_fast_path_local);
-
-    ctx.builder.if_void();
-    {
-        ctx.builder.get_local(&phys_addr_local);
-        ctx.builder.get_local(&value_local);
-
-        match bits {
-            BitSize::BYTE => {
-                ctx.builder.store_u8(unsafe { mem8 } as u32);
-            },
-            BitSize::WORD => {
-                ctx.builder.store_unaligned_u16(unsafe { mem8 } as u32);
-            },
-            BitSize::DWORD => {
-                ctx.builder.store_unaligned_i32(unsafe { mem8 } as u32);
-            },
-            BitSize::QWORD => dbg_assert!(false),
-            BitSize::DQWORD => dbg_assert!(false),
-        }
-    }
-    ctx.builder.else_();
-    {
-        ctx.builder.get_local(&address_local);
-        ctx.builder.get_local(&value_local);
-
-        match bits {
-            BitSize::BYTE => {
-                gen_call_fn2(ctx.builder, "safe_write8_slow_jit");
-            },
-            BitSize::WORD => {
-                gen_call_fn2(ctx.builder, "safe_write16_slow_jit");
-            },
-            BitSize::DWORD => {
-                gen_call_fn2(ctx.builder, "safe_write32_slow_jit");
-            },
-            BitSize::QWORD => dbg_assert!(false),
-            BitSize::DQWORD => dbg_assert!(false),
-        }
-
-        ctx.builder.load_u8(global_pointers::PAGE_FAULT);
-
-        ctx.builder.if_void();
-        {
-            // handled above
-            if cfg!(debug_assertions) {
-                ctx.builder.const_i32(match bits {
-                    BitSize::BYTE => 8,
-                    BitSize::WORD => 16,
-                    BitSize::DWORD => 32,
-                    _ => {
-                        dbg_assert!(false);
-                        0
-                    },
-                });
-                ctx.builder.get_local(&address_local);
-                gen_call_fn2(ctx.builder, "bug_gen_safe_read_write_page_fault");
-            }
-            else {
-                ctx.builder.unreachable();
-            }
-        }
-        ctx.builder.block_end();
-    }
-    ctx.builder.block_end();
-
-    ctx.builder.free_local(value_local);
-    ctx.builder.free_local(can_use_fast_path_local);
-    ctx.builder.free_local(phys_addr_local);
-    ctx.builder.free_local(entry_local);
-}
-
-#[no_mangle]
-pub fn bug_gen_safe_read_write_page_fault(bits: i32, addr: u32) {
-    dbg_log!("bug: gen_safe_read_write_page_fault {} {:x}", bits, addr);
-    dbg_assert!(false);
-}
-
 pub fn gen_set_last_op1(builder: &mut WasmBuilder, source: &WasmLocal) {
     builder.const_i32(global_pointers::LAST_OP1 as i32);
     builder.get_local(&source);

+ 181 - 258
src/rust/cpu2/cpu.rs

@@ -2142,9 +2142,7 @@ pub unsafe fn virt_boundary_write32(low: u32, high: u32, value: i32) {
 
 pub unsafe fn safe_read8(addr: i32) -> OrPageFault<i32> { Ok(read8(translate_address_read(addr)?)) }
 
-pub unsafe fn safe_read16(address: i32) -> OrPageFault<i32> { Ok(safe_read16_slow(address)?) }
-
-pub unsafe fn safe_read16_slow(addr: i32) -> OrPageFault<i32> {
+pub unsafe fn safe_read16(addr: i32) -> OrPageFault<i32> {
     if addr & 0xFFF == 0xFFF {
         Ok(safe_read8(addr)? | safe_read8(addr + 1)? << 8)
     }
@@ -2235,134 +2233,214 @@ pub unsafe fn safe_read32s(addr: i32) -> OrPageFault<i32> {
     }
 }
 
-#[no_mangle]
-pub unsafe fn safe_read8_slow_jit(addr: i32) -> i32 {
-    match safe_read8_slow_jit2(addr) {
-        Ok(v) => {
-            *page_fault = false;
-            v
-        },
+#[repr(align(0x1000))]
+struct ScratchBuffer([u8; 0x1000 * 2]);
+static mut jit_paging_scratch_buffer: ScratchBuffer = ScratchBuffer([0; 2 * 0x1000]);
+
+pub unsafe fn safe_read_slow_jit(addr: i32, bitsize: i32, start_eip: i32, is_write: bool) -> i32 {
+    let crosses_page = (addr & 0xFFF) + bitsize / 8 > 0x1000;
+    let addr_low = match if is_write {
+        translate_address_write_jit(addr)
+    }
+    else {
+        translate_address_read_jit(addr)
+    } {
         Err(()) => {
+            *previous_ip = *instruction_pointer & !0xFFF | start_eip & 0xFFF;
             *page_fault = true;
-            -1
+            return 0; // TODO: Return value so that jit code fails when accidentally accessing this
         },
+        Ok(addr) => addr,
+    };
+    if crosses_page {
+        let boundary_addr = (addr | 0xFFF) + 1;
+        let addr_high = match if is_write {
+            translate_address_write_jit(boundary_addr)
+        }
+        else {
+            translate_address_read_jit(boundary_addr)
+        } {
+            Err(()) => {
+                *previous_ip = *instruction_pointer & !0xFFF | start_eip & 0xFFF;
+                *page_fault = true;
+                return 0; // TODO: Return value so that jit code fails when accidentally accessing this
+            },
+            Ok(addr) => addr,
+        };
+        *page_fault = false;
+        // TODO: Could check if virtual pages point to consecutive physical and go to fast path
+        // do read, write into scratch buffer
+
+        let scratch = jit_paging_scratch_buffer.0.as_mut_ptr() as u32;
+        dbg_assert!(scratch & 0xFFF == 0);
+
+        for s in addr_low..((addr_low | 0xFFF) + 1) {
+            *(scratch as *mut u8).offset((s & 0xFFF) as isize) = read8(s) as u8
+        }
+        for s in addr_high..(addr_high + (addr + bitsize / 8 & 0xFFF) as u32) {
+            *(scratch as *mut u8).offset((0x1000 | s & 0xFFF) as isize) = read8(s) as u8
+        }
+        ((scratch - mem8 as u32) as i32) ^ addr
+    }
+    else if in_mapped_range(addr_low) {
+        *page_fault = false;
+        let scratch = jit_paging_scratch_buffer.0.as_mut_ptr() as u32;
+        dbg_assert!(scratch & 0xFFF == 0);
+        for s in addr_low..(addr_low + bitsize as u32 / 8) {
+            *(scratch as *mut u8).offset((s & 0xFFF) as isize) = read8(s) as u8
+        }
+        ((scratch - mem8 as u32) as i32) ^ addr
+    }
+    else {
+        *page_fault = false;
+        addr_low as i32 ^ addr
     }
 }
 
 #[no_mangle]
-pub unsafe fn safe_read_write8_slow_jit(addr: i32) -> i32 {
-    match safe_read_write8_slow_jit2(addr) {
-        Ok(v) => {
-            *page_fault = false;
-            v
-        },
-        Err(()) => {
-            *page_fault = true;
-            -1
-        },
-    }
+pub unsafe fn safe_read8_slow_jit(addr: i32, eip: i32) -> i32 {
+    safe_read_slow_jit(addr, 8, eip, false)
 }
-
-unsafe fn safe_read8_slow_jit2(addr: i32) -> OrPageFault<i32> {
-    Ok(read8(translate_address_read_jit(addr)?))
+#[no_mangle]
+pub unsafe fn safe_read16_slow_jit(addr: i32, eip: i32) -> i32 {
+    safe_read_slow_jit(addr, 16, eip, false)
 }
-
-unsafe fn safe_read_write8_slow_jit2(addr: i32) -> OrPageFault<i32> {
-    Ok(read8(translate_address_write_jit(addr)?))
+#[no_mangle]
+pub unsafe fn safe_read32s_slow_jit(addr: i32, eip: i32) -> i32 {
+    safe_read_slow_jit(addr, 32, eip, false)
+}
+#[no_mangle]
+pub unsafe fn safe_read64s_slow_jit(addr: i32, eip: i32) -> i32 {
+    safe_read_slow_jit(addr, 64, eip, false)
+}
+#[no_mangle]
+pub unsafe fn safe_read128s_slow_jit(addr: i32, eip: i32) -> i32 {
+    safe_read_slow_jit(addr, 128, eip, false)
 }
 
-unsafe fn safe_read16_slow_jit2(addr: i32) -> OrPageFault<i32> {
-    if addr & 0xFFF == 0xFFF {
-        return Ok(safe_read8_slow_jit2(addr)? | safe_read8_slow_jit2(addr + 1)? << 8);
-    }
-    else {
-        return Ok(read16(translate_address_read_jit(addr)?));
-    };
+#[no_mangle]
+pub unsafe fn safe_read_write8_slow_jit(addr: i32, eip: i32) -> i32 {
+    safe_read_slow_jit(addr, 8, eip, true)
+}
+#[no_mangle]
+pub unsafe fn safe_read_write16_slow_jit(addr: i32, eip: i32) -> i32 {
+    safe_read_slow_jit(addr, 16, eip, true)
+}
+#[no_mangle]
+pub unsafe fn safe_read_write32s_slow_jit(addr: i32, eip: i32) -> i32 {
+    safe_read_slow_jit(addr, 32, eip, true)
 }
 
-unsafe fn safe_read_write16_slow_jit2(addr: i32) -> OrPageFault<i32> {
-    if addr & 0xFFF == 0xFFF {
-        return Ok(safe_read_write8_slow_jit2(addr)? | safe_read_write8_slow_jit2(addr + 1)? << 8);
-    }
-    else {
-        return Ok(read16(translate_address_write_jit(addr)?));
+pub unsafe fn safe_write_slow_jit(
+    addr: i32,
+    bitsize: i32,
+    value_low: u64,
+    value_high: u64,
+    start_eip: i32,
+) -> i32 {
+    let crosses_page = (addr & 0xFFF) + bitsize / 8 > 0x1000;
+    let addr_low = match translate_address_write_jit(addr) {
+        Err(()) => {
+            *previous_ip = *instruction_pointer & !0xFFF | start_eip & 0xFFF;
+            *page_fault = true;
+            return 0; // TODO: Return value so that jit code fails when accidentally accessing this
+        },
+        Ok(addr) => addr,
     };
-}
+    if crosses_page {
+        let addr_high = match translate_address_write_jit((addr | 0xFFF) + 1) {
+            Err(()) => {
+                *previous_ip = *instruction_pointer & !0xFFF | start_eip & 0xFFF;
+                *page_fault = true;
+                return 0; // TODO: Return value so that jit code fails when accidentally accessing this
+            },
+            Ok(addr) => addr,
+        };
+        *page_fault = false;
+        // TODO: Could check if virtual pages point to consecutive physical and go to fast path
 
-unsafe fn safe_read32s_slow_jit2(addr: i32) -> OrPageFault<i32> {
-    if addr & 0xFFF >= 0xFFD {
-        return Ok(safe_read16_slow_jit2(addr)? | safe_read16_slow_jit2(addr + 2)? << 16);
+        match bitsize {
+            128 => safe_write128(
+                addr,
+                reg128 {
+                    u64_0: [value_low, value_high],
+                },
+            )
+            .unwrap(),
+            64 => safe_write64(addr, value_low as i64).unwrap(),
+            32 => safe_write32(addr, value_low as i32).unwrap(),
+            16 => safe_write16(addr, value_low as i32).unwrap(),
+            8 => safe_write8(addr, value_low as i32).unwrap(),
+            _ => dbg_assert!(false),
+        }
+
+        // do write, return dummy pointer for fast path to write into
+
+        let scratch = jit_paging_scratch_buffer.0.as_mut_ptr() as u32;
+        dbg_assert!(scratch & 0xFFF == 0);
+        (scratch as i32 - mem8 as i32) ^ addr
     }
-    else {
-        return Ok(read32s(translate_address_read_jit(addr)?));
-    };
-}
+    else if in_mapped_range(addr_low) {
+        *page_fault = false;
 
-unsafe fn safe_read_write32s_slow_jit2(addr: i32) -> OrPageFault<i32> {
-    if addr & 0xFFF >= 0xFFD {
-        return Ok(
-            safe_read_write16_slow_jit2(addr)? | safe_read_write16_slow_jit2(addr + 2)? << 16
-        );
+        match bitsize {
+            128 => safe_write128(
+                addr,
+                reg128 {
+                    u64_0: [value_low, value_high],
+                },
+            )
+            .unwrap(),
+            64 => safe_write64(addr, value_low as i64).unwrap(),
+            32 => safe_write32(addr, value_low as i32).unwrap(),
+            16 => safe_write16(addr, value_low as i32).unwrap(),
+            8 => safe_write8(addr, value_low as i32).unwrap(),
+            _ => dbg_assert!(false),
+        }
+
+        let scratch = jit_paging_scratch_buffer.0.as_mut_ptr() as u32;
+        dbg_assert!(scratch & 0xFFF == 0);
+        (scratch as i32 - mem8 as i32) ^ addr
     }
     else {
-        return Ok(read32s(translate_address_write_jit(addr)?));
-    };
+        match bitsize {
+            128 => safe_write128(
+                addr,
+                reg128 {
+                    u64_0: [value_low, value_high],
+                },
+            )
+            .unwrap(),
+            64 => safe_write64(addr, value_low as i64).unwrap(),
+            32 => safe_write32(addr, value_low as i32).unwrap(),
+            16 => safe_write16(addr, value_low as i32).unwrap(),
+            8 => safe_write8(addr, value_low as i32).unwrap(),
+            _ => dbg_assert!(false),
+        }
+        *page_fault = false;
+        addr_low as i32 ^ addr
+    }
 }
 
 #[no_mangle]
-pub unsafe fn safe_read16_slow_jit(addr: i32) -> i32 {
-    match safe_read16_slow_jit2(addr) {
-        Ok(v) => {
-            *page_fault = false;
-            v
-        },
-        Err(()) => {
-            *page_fault = true;
-            -1
-        },
-    }
+pub unsafe fn safe_write8_slow_jit(addr: i32, value: u32, start_eip: i32) -> i32 {
+    safe_write_slow_jit(addr, 8, value as u64, 0, start_eip)
 }
-
 #[no_mangle]
-pub unsafe fn safe_read_write16_slow_jit(addr: i32) -> i32 {
-    match safe_read_write16_slow_jit2(addr) {
-        Ok(v) => {
-            *page_fault = false;
-            v
-        },
-        Err(()) => {
-            *page_fault = true;
-            -1
-        },
-    }
+pub unsafe fn safe_write16_slow_jit(addr: i32, value: u32, start_eip: i32) -> i32 {
+    safe_write_slow_jit(addr, 16, value as u64, 0, start_eip)
 }
-
 #[no_mangle]
-pub unsafe fn safe_read32s_slow_jit(addr: i32) -> i32 {
-    match safe_read32s_slow_jit2(addr) {
-        Ok(v) => {
-            *page_fault = false;
-            v
-        },
-        Err(()) => {
-            *page_fault = true;
-            -1
-        },
-    }
+pub unsafe fn safe_write32_slow_jit(addr: i32, value: u32, start_eip: i32) -> i32 {
+    safe_write_slow_jit(addr, 32, value as u64, 0, start_eip)
 }
-
 #[no_mangle]
-pub unsafe fn safe_read_write32s_slow_jit(addr: i32) -> i32 {
-    match safe_read_write32s_slow_jit2(addr) {
-        Ok(v) => {
-            *page_fault = false;
-            v
-        },
-        Err(()) => {
-            *page_fault = true;
-            -1
-        },
-    }
+pub unsafe fn safe_write64_slow_jit(addr: i32, value: u64, start_eip: i32) -> i32 {
+    safe_write_slow_jit(addr, 64, value, 0, start_eip)
+}
+#[no_mangle]
+pub unsafe fn safe_write128_slow_jit(addr: i32, low: u64, high: u64, start_eip: i32) -> i32 {
+    safe_write_slow_jit(addr, 128, low, high, start_eip)
 }
 
 pub unsafe fn safe_read64s(addr: i32) -> OrPageFault<reg64> {
@@ -2378,33 +2456,6 @@ pub unsafe fn safe_read64s(addr: i32) -> OrPageFault<reg64> {
     Ok(x)
 }
 
-pub unsafe fn safe_read64s_slow_jit2(addr: i32) -> OrPageFault<reg64> {
-    let mut x: reg64 = reg64 { i8_0: [0; 8] };
-    if addr & 0xFFF > 0x1000 - 8 {
-        x.u32_0[0] = safe_read32s_slow_jit2(addr)? as u32;
-        x.u32_0[1] = safe_read32s_slow_jit2(addr + 4)? as u32
-    }
-    else {
-        let addr_phys = translate_address_read_jit(addr)?;
-        x.u64_0[0] = read64s(addr_phys) as u64
-    }
-    Ok(x)
-}
-
-#[no_mangle]
-pub unsafe fn safe_read64s_slow_jit(addr: i32) -> i64 {
-    match safe_read64s_slow_jit2(addr) {
-        Ok(v) => {
-            *page_fault = false;
-            v.i64_0[0]
-        },
-        Err(()) => {
-            *page_fault = true;
-            0
-        },
-    }
-}
-
 pub unsafe fn safe_read128s(addr: i32) -> OrPageFault<reg128> {
     let mut x: reg128 = reg128 { i8_0: [0; 16] };
     if addr & 0xFFF > 0x1000 - 16 {
@@ -2418,32 +2469,6 @@ pub unsafe fn safe_read128s(addr: i32) -> OrPageFault<reg128> {
     Ok(x)
 }
 
-pub unsafe fn safe_read128s_slow_jit2(addr: i32) -> OrPageFault<reg128> {
-    let mut x: reg128 = reg128 { i8_0: [0; 16] };
-    if addr & 0xFFF > 0x1000 - 16 {
-        x.u64_0[0] = safe_read64s_slow_jit2(addr)?.u64_0[0];
-        x.u64_0[1] = safe_read64s_slow_jit2(addr + 8)?.u64_0[0]
-    }
-    else {
-        let addr_phys = translate_address_read_jit(addr)?;
-        x = read128(addr_phys)
-    }
-    Ok(x)
-}
-
-#[no_mangle]
-pub unsafe fn safe_read128s_slow_jit(addr: i32, where_to_write: u32) {
-    match safe_read128s_slow_jit2(addr) {
-        Ok(v) => {
-            *page_fault = false;
-            *(where_to_write as *mut reg128) = v;
-        },
-        Err(()) => {
-            *page_fault = true;
-        },
-    }
-}
-
 pub unsafe fn safe_write8(addr: i32, value: i32) -> OrPageFault<()> {
     write8(translate_address_write(addr)?, value);
     Ok(())
@@ -2475,61 +2500,6 @@ pub unsafe fn safe_write32(addr: i32, value: i32) -> OrPageFault<()> {
     Ok(())
 }
 
-pub unsafe fn safe_write8_slow_jit2(addr: i32, value: i32) -> OrPageFault<()> {
-    write8(translate_address_write_jit(addr)?, value);
-    Ok(())
-}
-
-#[no_mangle]
-pub unsafe fn safe_write8_slow_jit(addr: i32, value: i32) {
-    match safe_write8_slow_jit2(addr, value) {
-        Ok(()) => *page_fault = false,
-        Err(()) => *page_fault = true,
-    }
-}
-
-pub unsafe fn safe_write16_slow_jit2(addr: i32, value: i32) -> OrPageFault<()> {
-    let phys_low = translate_address_write_jit(addr)?;
-    if addr & 0xFFF == 0xFFF {
-        virt_boundary_write16(phys_low, translate_address_write_jit(addr + 1)?, value);
-    }
-    else {
-        write16(phys_low as u32, value);
-    };
-    Ok(())
-}
-
-#[no_mangle]
-pub unsafe fn safe_write16_slow_jit(addr: i32, value: i32) {
-    match safe_write16_slow_jit2(addr, value) {
-        Ok(()) => *page_fault = false,
-        Err(()) => *page_fault = true,
-    }
-}
-
-pub unsafe fn safe_write32_slow_jit2(addr: i32, value: i32) -> OrPageFault<()> {
-    let phys_low = translate_address_write_jit(addr)?;
-    if addr & 0xFFF > 0x1000 - 4 {
-        virt_boundary_write32(
-            phys_low,
-            translate_address_write_jit(addr + 3 & !3)? | (addr as u32 + 3 & 3),
-            value,
-        );
-    }
-    else {
-        write32(phys_low as u32, value);
-    };
-    Ok(())
-}
-
-#[no_mangle]
-pub unsafe fn safe_write32_slow_jit(addr: i32, value: i32) {
-    match safe_write32_slow_jit2(addr, value) {
-        Ok(()) => *page_fault = false,
-        Err(()) => *page_fault = true,
-    }
-}
-
 pub unsafe fn safe_write64(addr: i32, value: i64) -> OrPageFault<()> {
     if addr & 0xFFF > 0x1000 - 8 {
         writable_or_pagefault(addr, 8)?;
@@ -2543,27 +2513,6 @@ pub unsafe fn safe_write64(addr: i32, value: i64) -> OrPageFault<()> {
     Ok(())
 }
 
-pub unsafe fn safe_write64_slow_jit2(addr: i32, value: i64) -> OrPageFault<()> {
-    if addr & 0xFFF > 0x1000 - 8 {
-        writable_or_pagefault_jit(addr, 8)?;
-        safe_write32_slow_jit2(addr, value as i32).unwrap();
-        safe_write32_slow_jit2(addr + 4, (value >> 32) as i32).unwrap();
-    }
-    else {
-        let phys = translate_address_write_jit(addr)?;
-        write64(phys, value);
-    };
-    Ok(())
-}
-
-#[no_mangle]
-pub unsafe fn safe_write64_slow_jit(addr: i32, value: i64) {
-    match safe_write64_slow_jit2(addr, value) {
-        Ok(()) => *page_fault = false,
-        Err(()) => *page_fault = true,
-    }
-}
-
 pub unsafe fn safe_write128(addr: i32, value: reg128) -> OrPageFault<()> {
     if addr & 0xFFF > 0x1000 - 16 {
         writable_or_pagefault(addr, 16)?;
@@ -2577,32 +2526,6 @@ pub unsafe fn safe_write128(addr: i32, value: reg128) -> OrPageFault<()> {
     Ok(())
 }
 
-pub unsafe fn safe_write128_slow_jit2(addr: i32, value: reg128) -> OrPageFault<()> {
-    if addr & 0xFFF > 0x1000 - 16 {
-        writable_or_pagefault_jit(addr, 16)?;
-        safe_write64_slow_jit2(addr, value.u64_0[0] as i64).unwrap();
-        safe_write64_slow_jit2(addr + 8, value.u64_0[1] as i64).unwrap();
-    }
-    else {
-        let phys = translate_address_write_jit(addr)?;
-        write128(phys, value);
-    };
-    Ok(())
-}
-
-#[no_mangle]
-pub unsafe fn safe_write128_slow_jit(addr: i32, value_low: i64, value_high: i64) {
-    match safe_write128_slow_jit2(
-        addr,
-        reg128 {
-            i64_0: [value_low, value_high],
-        },
-    ) {
-        Ok(()) => *page_fault = false,
-        Err(()) => *page_fault = true,
-    }
-}
-
 pub fn get_reg8_index(index: i32) -> i32 { return index << 2 & 12 | index >> 2 & 1; }
 
 pub unsafe fn read_reg8(index: i32) -> i32 {

+ 180 - 187
src/rust/wasmgen/wasm_builder.rs

@@ -1,44 +1,41 @@
 use leb::{write_fixed_leb16_at_idx, write_fixed_leb32_at_idx, write_leb_i32, write_leb_u32};
+use std::mem::transmute;
 use util::{SafeToU8, SafeToU16};
 use wasmgen::wasm_opcodes as op;
 
-#[allow(dead_code)]
-pub const FN0_TYPE_INDEX: u8 = 0;
-#[allow(dead_code)]
-pub const FN1_TYPE_INDEX: u8 = 1;
-#[allow(dead_code)]
-pub const FN2_TYPE_INDEX: u8 = 2;
-#[allow(dead_code)]
-pub const FN3_TYPE_INDEX: u8 = 3;
-
-#[allow(dead_code)]
-pub const FN0_RET_TYPE_INDEX: u8 = 4;
-#[allow(dead_code)]
-pub const FN1_RET_TYPE_INDEX: u8 = 5;
-#[allow(dead_code)]
-pub const FN2_RET_TYPE_INDEX: u8 = 6;
-
-#[allow(dead_code)]
-pub const FN1_RET_F64_TYPE_INDEX: u8 = 7;
-#[allow(dead_code)]
-pub const FN2_I32_F64_TYPE_INDEX: u8 = 8;
-#[allow(dead_code)]
-pub const FN1_F64_TYPE_INDEX: u8 = 9;
-#[allow(dead_code)]
-pub const FN1_RET_I64_TYPE_INDEX: u8 = 10;
-#[allow(dead_code)]
-pub const FN2_I32_I64_TYPE_INDEX: u8 = 11;
-#[allow(dead_code)]
-pub const FN1_F64_RET_I32_TYPE_INDEX: u8 = 12;
-#[allow(dead_code)]
-pub const FN1_F64_RET_I64_TYPE_INDEX: u8 = 13;
-
-#[allow(dead_code)]
-pub const FN3_RET_TYPE_INDEX: u8 = 14;
-#[allow(dead_code)]
-pub const FN3_I32_I64_I64_TYPE_INDEX: u8 = 15;
-
-pub const NR_FN_TYPE_INDEXES: u8 = 16;
+#[derive(PartialEq)]
+#[allow(non_camel_case_types)]
+pub enum FunctionType {
+    FN0_TYPE_INDEX,
+    FN1_TYPE_INDEX,
+    FN2_TYPE_INDEX,
+    FN3_TYPE_INDEX,
+
+    FN0_RET_TYPE_INDEX,
+    FN1_RET_TYPE_INDEX,
+    FN2_RET_TYPE_INDEX,
+
+    FN1_RET_F64_TYPE_INDEX,
+    FN2_I32_F64_TYPE_INDEX,
+    FN1_F64_TYPE_INDEX,
+    FN1_F64_RET_I32_TYPE_INDEX,
+    FN1_F64_RET_I64_TYPE_INDEX,
+
+    FN3_RET_TYPE_INDEX,
+
+    FN3_I32_I64_I32_RET_TYPE_INDEX,
+    FN4_I32_I64_I64_I32_RET_TYPE_INDEX,
+    // When adding at the end, update LAST below
+}
+
+impl FunctionType {
+    pub fn of_u8(x: u8) -> FunctionType {
+        dbg_assert!(x <= FunctionType::LAST as u8);
+        unsafe { transmute(x) }
+    }
+    pub fn to_u8(self: FunctionType) -> u8 { self as u8 }
+    pub const LAST: FunctionType = FunctionType::FN4_I32_I64_I64_I32_RET_TYPE_INDEX;
+}
 
 pub const WASM_MODULE_ARGUMENT_COUNT: u8 = 1;
 
@@ -205,119 +202,122 @@ impl WasmBuilder {
         let idx_section_size = self.output.len();
         self.output.push(0);
 
-        self.output.push(NR_FN_TYPE_INDEXES); // number of type descriptors
-
-        // FN0
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(0); // no args
-        self.output.push(0); // no return val
-
-        // FN1
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(1);
-        self.output.push(op::TYPE_I32);
-        self.output.push(0);
-
-        // FN2
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(2);
-        self.output.push(op::TYPE_I32);
-        self.output.push(op::TYPE_I32);
-        self.output.push(0);
-
-        // FN3
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(3);
-        self.output.push(op::TYPE_I32);
-        self.output.push(op::TYPE_I32);
-        self.output.push(op::TYPE_I32);
-        self.output.push(0);
-
-        // FN0_RET
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(0);
-        self.output.push(1);
-        self.output.push(op::TYPE_I32);
-
-        // FN1_RET
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(1);
-        self.output.push(op::TYPE_I32);
-        self.output.push(1);
-        self.output.push(op::TYPE_I32);
-
-        // FN2_RET
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(2);
-        self.output.push(op::TYPE_I32);
-        self.output.push(op::TYPE_I32);
-        self.output.push(1);
-        self.output.push(op::TYPE_I32);
-
-        // FN1_RET_F64
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(1);
-        self.output.push(op::TYPE_I32);
-        self.output.push(1);
-        self.output.push(op::TYPE_F64);
-
-        // FN2_I32_F64
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(2);
-        self.output.push(op::TYPE_I32);
-        self.output.push(op::TYPE_F64);
-        self.output.push(0);
-
-        // FN1_F64
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(1);
-        self.output.push(op::TYPE_F64);
-        self.output.push(0);
-
-        // FN1_RET_I64
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(1);
-        self.output.push(op::TYPE_I32);
-        self.output.push(1);
-        self.output.push(op::TYPE_I64);
-
-        // FN2_I32_I64
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(2);
-        self.output.push(op::TYPE_I32);
-        self.output.push(op::TYPE_I64);
-        self.output.push(0);
-
-        // FN1_F64_RET_I32
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(1);
-        self.output.push(op::TYPE_F64);
-        self.output.push(1);
-        self.output.push(op::TYPE_I32);
-
-        // FN1_F64_RET_I64
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(1);
-        self.output.push(op::TYPE_F64);
-        self.output.push(1);
-        self.output.push(op::TYPE_I64);
-
-        // FN3_RET
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(3);
-        self.output.push(op::TYPE_I32);
-        self.output.push(op::TYPE_I32);
-        self.output.push(op::TYPE_I32);
-        self.output.push(1);
-        self.output.push(op::TYPE_I32);
-
-        // FN3_I32_I64_I64
-        self.output.push(op::TYPE_FUNC);
-        self.output.push(3);
-        self.output.push(op::TYPE_I32);
-        self.output.push(op::TYPE_I64);
-        self.output.push(op::TYPE_I64);
-        self.output.push(0);
+        let nr_of_function_types = FunctionType::to_u8(FunctionType::LAST) + 1;
+        self.output.push(nr_of_function_types);
+
+        for i in 0..(nr_of_function_types) {
+            match FunctionType::of_u8(i) {
+                FunctionType::FN0_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(0); // no args
+                    self.output.push(0); // no return val
+                },
+                FunctionType::FN1_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(0);
+                },
+                FunctionType::FN2_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(2);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(0);
+                },
+                FunctionType::FN3_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(3);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(0);
+                },
+                FunctionType::FN0_RET_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(0);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_I32);
+                },
+                FunctionType::FN1_RET_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_I32);
+                },
+                FunctionType::FN2_RET_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(2);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_I32);
+                },
+                FunctionType::FN1_RET_F64_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_F64);
+                },
+                FunctionType::FN2_I32_F64_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(2);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(op::TYPE_F64);
+                    self.output.push(0);
+                },
+                FunctionType::FN1_F64_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_F64);
+                    self.output.push(0);
+                },
+                FunctionType::FN1_F64_RET_I32_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_F64);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_I32);
+                },
+                FunctionType::FN1_F64_RET_I64_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_F64);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_I64);
+                },
+                FunctionType::FN3_RET_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(3);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_I32);
+                },
+                FunctionType::FN3_I32_I64_I32_RET_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(3);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(op::TYPE_I64);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_I32);
+                },
+                FunctionType::FN4_I32_I64_I64_I32_RET_TYPE_INDEX => {
+                    self.output.push(op::TYPE_FUNC);
+                    self.output.push(4);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(op::TYPE_I64);
+                    self.output.push(op::TYPE_I64);
+                    self.output.push(op::TYPE_I32);
+                    self.output.push(1);
+                    self.output.push(op::TYPE_I32);
+                },
+            }
+        }
 
         let new_len = self.output.len();
         let size = (new_len - 1) - idx_section_size;
@@ -393,13 +393,13 @@ impl WasmBuilder {
         self.set_import_table_size(new_table_size);
     }
 
-    pub fn write_import_entry(&mut self, fn_name: &str, type_index: u8) -> u16 {
+    fn write_import_entry(&mut self, fn_name: &str, type_index: FunctionType) -> u16 {
         self.output.push(1); // length of module name
         self.output.push('e' as u8); // module name
         self.output.push(fn_name.len().safe_to_u8());
         self.output.extend(fn_name.as_bytes());
         self.output.push(op::EXT_FUNCTION);
-        self.output.push(type_index);
+        self.output.push(type_index.to_u8());
 
         let new_import_count = self.import_count + 1;
         self.set_import_count(new_import_count);
@@ -414,7 +414,7 @@ impl WasmBuilder {
         self.output.push(op::SC_FUNCTION);
         self.output.push(2); // length of this section
         self.output.push(1); // count of signature indices
-        self.output.push(FN1_TYPE_INDEX);
+        self.output.push(FunctionType::FN1_TYPE_INDEX.to_u8());
     }
 
     pub fn write_export_section(&mut self) {
@@ -435,7 +435,7 @@ impl WasmBuilder {
         write_fixed_leb16_at_idx(&mut self.output, next_op_idx, self.import_count - 1);
     }
 
-    pub fn get_fn_idx(&mut self, fn_name: &str, type_index: u8) -> u16 {
+    pub fn get_fn_idx(&mut self, fn_name: &str, type_index: FunctionType) -> u16 {
         match self.get_import_index(fn_name) {
             Some(idx) => idx,
             None => {
@@ -521,18 +521,6 @@ impl WasmBuilder {
         local
     }
 
-    //fn write_leb_i32(&mut self, v: i32) { write_leb_i32(self, v) }
-
-    //fn write_leb_u32(&mut self, v: u32) { write_leb_u32(self, v) }
-
-    //fn write_fixed_leb16_at_idx(&mut self, idx: usize, x: u16) {
-    //    write_fixed_leb16_at_idx(self, idx, x)
-    //}
-
-    //fn write_fixed_leb32_at_idx(&mut self, idx: usize, x: u32) {
-    //    write_fixed_leb32_at_idx(self, idx, x)
-    //}
-
     pub fn const_i32(&mut self, v: i32) {
         self.instruction_body.push(op::OP_I32CONST);
         write_leb_i32(&mut self.instruction_body, v);
@@ -583,32 +571,32 @@ impl WasmBuilder {
     pub fn call_fn(&mut self, fn_idx: u16) {
         self.instruction_body.push(op::OP_CALL);
         write_leb_u32(&mut self.instruction_body, fn_idx as u32);
-        //let buf_len = self.len();
-        //self.instruction_body.push(0);
-        //self.instruction_body.push(0);
-        //self.write_fixed_leb16_at_idx(buf_len, fn_idx);
     }
 
     pub fn eq_i32(&mut self) { self.instruction_body.push(op::OP_I32EQ); }
     pub fn ne_i32(&mut self) { self.instruction_body.push(op::OP_I32NE); }
     pub fn le_i32(&mut self) { self.instruction_body.push(op::OP_I32LES); }
-    //pub fn lt_i32(&mut self) { self.instruction_body.push(op::OP_I32LTS); }
-    //pub fn ge_i32(&mut self) { self.instruction_body.push(op::OP_I32GES); }
-    //pub fn gt_i32(&mut self) { self.instruction_body.push(op::OP_I32GTS); }
+    #[allow(dead_code)]
+    pub fn lt_i32(&mut self) { self.instruction_body.push(op::OP_I32LTS); }
+    #[allow(dead_code)]
+    pub fn ge_i32(&mut self) { self.instruction_body.push(op::OP_I32GES); }
+    #[allow(dead_code)]
+    pub fn gt_i32(&mut self) { self.instruction_body.push(op::OP_I32GTS); }
 
     pub fn if_i32(&mut self) {
         self.instruction_body.push(op::OP_IF);
         self.instruction_body.push(op::TYPE_I32);
     }
+    #[allow(dead_code)]
     pub fn if_i64(&mut self) {
         self.instruction_body.push(op::OP_IF);
         self.instruction_body.push(op::TYPE_I64);
     }
-
-    //pub fn block_i32(&mut self) {
-    //    self.instruction_body.push(op::OP_BLOCK);
-    //    self.instruction_body.push(op::TYPE_I32);
-    //}
+    #[allow(dead_code)]
+    pub fn block_i32(&mut self) {
+        self.instruction_body.push(op::OP_BLOCK);
+        self.instruction_body.push(op::TYPE_I32);
+    }
 
     pub fn xor_i32(&mut self) { self.instruction_body.push(op::OP_I32XOR); }
 
@@ -739,7 +727,7 @@ impl WasmBuilder {
 
     pub fn return_(&mut self) { self.instruction_body.push(op::OP_RETURN); }
 
-    //pub fn drop_(&mut self) { self.instruction_body.push(op::OP_DROP); }
+    pub fn drop_(&mut self) { self.instruction_body.push(op::OP_DROP); }
 
     // Generate a br_table where an input of [i] will branch [i]th outer block,
     // where [i] is passed on the wasm stack
@@ -757,6 +745,11 @@ impl WasmBuilder {
         write_leb_u32(&mut self.instruction_body, depth);
     }
 
+    pub fn br_if(&mut self, depth: u32) {
+        self.instruction_body.push(op::OP_BRIF);
+        write_leb_u32(&mut self.instruction_body, depth);
+    }
+
     pub fn get_local(&mut self, local: &WasmLocal) {
         self.instruction_body.push(op::OP_GETLOCAL);
         self.instruction_body.push(local.idx());
@@ -803,20 +796,20 @@ mod tests {
     fn import_table_management() {
         let mut w = WasmBuilder::new();
 
-        assert_eq!(0, w.get_fn_idx("foo", FN0_TYPE_INDEX));
-        assert_eq!(1, w.get_fn_idx("bar", FN1_TYPE_INDEX));
-        assert_eq!(0, w.get_fn_idx("foo", FN0_TYPE_INDEX));
-        assert_eq!(2, w.get_fn_idx("baz", FN2_TYPE_INDEX));
+        assert_eq!(0, w.get_fn_idx("foo", FunctionType::FN0_TYPE_INDEX));
+        assert_eq!(1, w.get_fn_idx("bar", FunctionType::FN1_TYPE_INDEX));
+        assert_eq!(0, w.get_fn_idx("foo", FunctionType::FN0_TYPE_INDEX));
+        assert_eq!(2, w.get_fn_idx("baz", FunctionType::FN2_TYPE_INDEX));
     }
 
     #[test]
     fn builder_test() {
         let mut m = WasmBuilder::new();
 
-        let mut foo_index = m.get_fn_idx("foo", FN0_TYPE_INDEX);
+        let mut foo_index = m.get_fn_idx("foo", FunctionType::FN0_TYPE_INDEX);
         m.call_fn(foo_index);
 
-        let bar_index = m.get_fn_idx("bar", FN0_TYPE_INDEX);
+        let bar_index = m.get_fn_idx("bar", FunctionType::FN0_TYPE_INDEX);
         m.call_fn(bar_index);
 
         let local0 = m.alloc_local(); // for ensuring that reset clears previous locals
@@ -827,9 +820,9 @@ mod tests {
 
         m.const_i32(2);
 
-        let baz_index = m.get_fn_idx("baz", FN1_RET_TYPE_INDEX);
+        let baz_index = m.get_fn_idx("baz", FunctionType::FN1_RET_TYPE_INDEX);
         m.call_fn(baz_index);
-        foo_index = m.get_fn_idx("foo", FN1_TYPE_INDEX);
+        foo_index = m.get_fn_idx("foo", FunctionType::FN1_TYPE_INDEX);
         m.call_fn(foo_index);
 
         m.const_i32(10);