Prechádzať zdrojové kódy

Optimise lazy flag handling

- remove last_add_result and last_op2 from arithmetic instructions
- optimise getcf
Fabian 3 rokov pred
rodič
commit
ee542c5f84

+ 3 - 11
src/cpu.js

@@ -100,13 +100,10 @@ function CPU(bus, wm)
     this.flags_changed = v86util.view(Int32Array, memory, 116, 1);
 
     /**
-     * the last 2 operators and the result and size of the last arithmetic operation
+     * enough infos about the last arithmetic operation to compute eflags
      */
     this.last_op1 = v86util.view(Int32Array, memory, 96, 1);
-    this.last_op2 = v86util.view(Int32Array, memory, 100, 1);
     this.last_op_size = v86util.view(Int32Array, memory, 104, 1);
-
-    this.last_add_result = v86util.view(Int32Array, memory, 108, 1);
     this.last_result = v86util.view(Int32Array, memory, 112, 1);
 
     this.current_tsc = v86util.view(Uint32Array, memory, 960, 2); // 64 bit
@@ -392,9 +389,8 @@ CPU.prototype.get_state = function()
     state[26] = this.flags[0];
     state[27] = this.flags_changed[0];
     state[28] = this.last_op1[0];
-    state[29] = this.last_op2[0];
+
     state[30] = this.last_op_size[0];
-    state[31] = this.last_add_result[0];
 
     state[37] = this.instruction_pointer[0];
     state[38] = this.previous_ip[0];
@@ -488,9 +484,8 @@ CPU.prototype.set_state = function(state)
     this.flags[0] = state[26];
     this.flags_changed[0] = state[27];
     this.last_op1[0] = state[28];
-    this.last_op2[0] = state[29];
+
     this.last_op_size[0] = state[30];
-    this.last_add_result[0] = state[31];
 
     this.instruction_pointer[0] = state[37];
     this.previous_ip[0] = state[38];
@@ -713,11 +708,8 @@ CPU.prototype.reset = function()
 
     this.flags[0] = flags_default;
     this.flags_changed.fill(0);
-
     this.last_result.fill(0);
-    this.last_add_result.fill(0);
     this.last_op1.fill(0);
-    this.last_op2.fill(0);
     this.last_op_size.fill(0);
 
     this.set_tsc(0, 0);

+ 35 - 36
src/rust/codegen.rs

@@ -1313,18 +1313,6 @@ pub fn gen_set_last_op1(builder: &mut WasmBuilder, source: &WasmLocal) {
     builder.store_aligned_i32(0);
 }
 
-pub fn gen_set_last_op2(builder: &mut WasmBuilder, source: &LocalOrImmedate) {
-    builder.const_i32(global_pointers::LAST_OP2 as i32);
-    source.gen_get(builder);
-    builder.store_aligned_i32(0);
-}
-
-pub fn gen_set_last_add_result(builder: &mut WasmBuilder, source: &WasmLocal) {
-    builder.const_i32(global_pointers::LAST_ADD_RESULT as i32);
-    builder.get_local(&source);
-    builder.store_aligned_i32(0);
-}
-
 pub fn gen_set_last_result(builder: &mut WasmBuilder, source: &WasmLocal) {
     builder.const_i32(global_pointers::LAST_RESULT as i32);
     builder.get_local(&source);
@@ -1388,40 +1376,34 @@ pub fn gen_getzf(builder: &mut WasmBuilder) {
 
 pub fn gen_getcf(builder: &mut WasmBuilder) {
     builder.load_aligned_i32(global_pointers::FLAGS_CHANGED);
+    let flags_changed = builder.tee_new_local();
     builder.const_i32(FLAG_CARRY);
     builder.and_i32();
     builder.if_i32();
 
-    builder.load_aligned_i32(global_pointers::LAST_OP1);
-    let last_op1 = builder.tee_new_local();
-
-    builder.load_aligned_i32(global_pointers::LAST_OP2);
-    let last_op2 = builder.tee_new_local();
-
-    builder.xor_i32();
+    builder.get_local(&flags_changed);
+    builder.const_i32(31);
+    builder.shr_s_i32();
+    builder.free_local(flags_changed);
+    let sub_mask = builder.set_new_local();
 
-    builder.get_local(&last_op2);
-    builder.load_aligned_i32(global_pointers::LAST_ADD_RESULT);
+    builder.load_aligned_i32(global_pointers::LAST_RESULT);
+    builder.get_local(&sub_mask);
     builder.xor_i32();
 
-    builder.and_i32();
-
-    builder.get_local(&last_op1);
+    builder.load_aligned_i32(global_pointers::LAST_OP1);
+    builder.get_local(&sub_mask);
     builder.xor_i32();
 
-    builder.free_local(last_op1);
-    builder.free_local(last_op2);
-
-    builder.load_aligned_i32(global_pointers::LAST_OP_SIZE);
-    builder.shr_u_i32();
-    builder.const_i32(1);
-    builder.and_i32();
+    builder.ltu_i32();
 
     builder.else_();
     builder.load_aligned_i32(global_pointers::FLAGS);
     builder.const_i32(FLAG_CARRY);
     builder.and_i32();
     builder.block_end();
+
+    builder.free_local(sub_mask);
 }
 
 pub fn gen_getsf(builder: &mut WasmBuilder) {
@@ -1447,22 +1429,37 @@ pub fn gen_getsf(builder: &mut WasmBuilder) {
 
 pub fn gen_getof(builder: &mut WasmBuilder) {
     builder.load_aligned_i32(global_pointers::FLAGS_CHANGED);
+    let flags_changed = builder.tee_new_local();
     builder.const_i32(FLAG_OVERFLOW);
     builder.and_i32();
     builder.if_i32();
     {
         builder.load_aligned_i32(global_pointers::LAST_OP1);
-        builder.load_aligned_i32(global_pointers::LAST_ADD_RESULT);
+        let last_op1 = builder.tee_new_local();
+        builder.load_aligned_i32(global_pointers::LAST_RESULT);
+        let last_result = builder.tee_new_local();
         builder.xor_i32();
-        builder.load_aligned_i32(global_pointers::LAST_OP2);
-        builder.load_aligned_i32(global_pointers::LAST_ADD_RESULT);
+
+        builder.get_local(&last_result);
+        builder.get_local(&last_op1);
+        builder.sub_i32();
+        builder.get_local(&flags_changed);
+        builder.const_i32(31);
+        builder.shr_u_i32();
+        builder.sub_i32();
+
+        builder.get_local(&last_result);
         builder.xor_i32();
+
         builder.and_i32();
 
         builder.load_aligned_i32(global_pointers::LAST_OP_SIZE);
         builder.shr_u_i32();
         builder.const_i32(1);
         builder.and_i32();
+
+        builder.free_local(last_op1);
+        builder.free_local(last_result);
     }
     builder.else_();
     {
@@ -1471,16 +1468,18 @@ pub fn gen_getof(builder: &mut WasmBuilder) {
         builder.and_i32();
     }
     builder.block_end();
+    builder.free_local(flags_changed);
 }
 
 pub fn gen_test_be(builder: &mut WasmBuilder) {
-    // TODO: Could be made lazy
+    // TODO: A more efficient implementation is possible
     gen_getcf(builder);
     gen_getzf(builder);
     builder.or_i32();
 }
 
 pub fn gen_test_l(builder: &mut WasmBuilder) {
+    // TODO: A more efficient implementation is possible
     gen_getsf(builder);
     builder.eqz_i32();
     gen_getof(builder);
@@ -1489,7 +1488,7 @@ pub fn gen_test_l(builder: &mut WasmBuilder) {
 }
 
 pub fn gen_test_le(builder: &mut WasmBuilder) {
-    // TODO: Could be made lazy
+    // TODO: A more efficient implementation is possible
     gen_test_l(builder);
     gen_getzf(builder);
     builder.or_i32();

+ 33 - 44
src/rust/cpu2/arith.rs

@@ -7,11 +7,9 @@ pub fn int_log2(x: i32) -> i32 { 31 - x.leading_zeros() as i32 }
 
 #[no_mangle]
 pub unsafe fn add(dest_operand: i32, source_operand: i32, op_size: i32) -> i32 {
-    *last_op1 = dest_operand;
-    *last_op2 = source_operand;
     let res = dest_operand + source_operand;
-    *last_result = res;
-    *last_add_result = res;
+    *last_op1 = dest_operand;
+    *last_result = res & (2 << op_size) - 1;
     *last_op_size = op_size;
     *flags_changed = FLAGS_ALL;
     return res;
@@ -19,36 +17,40 @@ pub unsafe fn add(dest_operand: i32, source_operand: i32, op_size: i32) -> i32 {
 #[no_mangle]
 pub unsafe fn adc(dest_operand: i32, source_operand: i32, op_size: i32) -> i32 {
     let cf = getcf() as i32;
-    *last_op1 = dest_operand;
-    *last_op2 = source_operand;
     let res = dest_operand + source_operand + cf;
+    *last_op1 = dest_operand;
     *last_result = res;
-    *last_add_result = res;
     *last_op_size = op_size;
-    *flags_changed = FLAGS_ALL;
+    *flags_changed = FLAGS_ALL & !FLAG_CARRY & !FLAG_ADJUST & !FLAG_OVERFLOW;
+    *flags = *flags & !FLAG_CARRY & !FLAG_ADJUST & !FLAG_OVERFLOW
+        | (dest_operand ^ ((dest_operand ^ source_operand) & (source_operand ^ res))) >> op_size
+            & FLAG_CARRY
+        | (dest_operand ^ source_operand ^ res) & FLAG_ADJUST
+        | ((source_operand ^ res) & (dest_operand ^ res)) >> op_size << 11 & FLAG_OVERFLOW;
     return res;
 }
 #[no_mangle]
 pub unsafe fn sub(dest_operand: i32, source_operand: i32, op_size: i32) -> i32 {
-    *last_add_result = dest_operand;
-    *last_op2 = source_operand;
     let res = dest_operand - source_operand;
-    *last_result = res;
-    *last_op1 = res;
+    *last_op1 = dest_operand;
+    *last_result = res & (2 << op_size) - 1;
     *last_op_size = op_size;
-    *flags_changed = FLAGS_ALL;
+    *flags_changed = FLAGS_ALL | FLAG_SUB;
     return res;
 }
 #[no_mangle]
 pub unsafe fn sbb(dest_operand: i32, source_operand: i32, op_size: i32) -> i32 {
     let cf = getcf() as i32;
-    *last_add_result = dest_operand;
-    *last_op2 = source_operand;
     let res = dest_operand - source_operand - cf;
+    *last_op1 = dest_operand;
     *last_result = res;
-    *last_op1 = res;
     *last_op_size = op_size;
-    *flags_changed = FLAGS_ALL;
+    *flags_changed = FLAGS_ALL & !FLAG_CARRY & !FLAG_ADJUST & !FLAG_OVERFLOW | FLAG_SUB;
+    *flags = *flags & !FLAG_CARRY & !FLAG_ADJUST & !FLAG_OVERFLOW
+        | (res ^ ((res ^ source_operand) & (source_operand ^ dest_operand))) >> op_size
+            & FLAG_CARRY
+        | (dest_operand ^ source_operand ^ res) & FLAG_ADJUST
+        | ((source_operand ^ dest_operand) & (res ^ dest_operand)) >> op_size << 11 & FLAG_OVERFLOW;
     return res;
 }
 #[no_mangle]
@@ -84,11 +86,9 @@ pub unsafe fn cmp32(x: i32, y: i32) { sub(x, y, OPSIZE_32); }
 #[no_mangle]
 pub unsafe fn inc(dest_operand: i32, op_size: i32) -> i32 {
     *flags = *flags & !1 | getcf() as i32;
-    *last_op1 = dest_operand;
-    *last_op2 = 1;
     let res = dest_operand + 1;
-    *last_result = res;
-    *last_add_result = res;
+    *last_op1 = dest_operand;
+    *last_result = res & (2 << op_size) - 1;
     *last_op_size = op_size;
     *flags_changed = FLAGS_ALL & !1;
     return res;
@@ -96,13 +96,11 @@ pub unsafe fn inc(dest_operand: i32, op_size: i32) -> i32 {
 #[no_mangle]
 pub unsafe fn dec(dest_operand: i32, op_size: i32) -> i32 {
     *flags = *flags & !1 | getcf() as i32;
-    *last_add_result = dest_operand;
-    *last_op2 = 1;
     let res = dest_operand - 1;
-    *last_result = res;
-    *last_op1 = res;
+    *last_op1 = dest_operand;
+    *last_result = res & (2 << op_size) - 1;
     *last_op_size = op_size;
-    *flags_changed = FLAGS_ALL & !1;
+    *flags_changed = FLAGS_ALL & !1 | FLAG_SUB;
     return res;
 }
 #[no_mangle]
@@ -126,16 +124,7 @@ pub unsafe fn not16(x: i32) -> i32 { return !x; }
 pub unsafe fn not32(x: i32) -> i32 { return !x; }
 
 #[no_mangle]
-pub unsafe fn neg(dest_operand: i32, op_size: i32) -> i32 {
-    let res = -dest_operand;
-    *last_result = res;
-    *last_op1 = res;
-    *flags_changed = FLAGS_ALL;
-    *last_add_result = 0;
-    *last_op2 = dest_operand;
-    *last_op_size = op_size;
-    return res;
-}
+pub unsafe fn neg(dest_operand: i32, op_size: i32) -> i32 { sub(0, dest_operand, op_size) }
 #[no_mangle]
 pub unsafe fn neg8(x: i32) -> i32 { return neg(x, OPSIZE_8); }
 #[no_mangle]
@@ -340,8 +329,6 @@ pub unsafe fn bcd_daa() {
     }
     *last_result = *reg8.offset(AL as isize) as i32;
     *last_op_size = OPSIZE_8;
-    *last_op2 = 0;
-    *last_op1 = *last_op2;
     *flags_changed = FLAGS_ALL & !1 & !FLAG_ADJUST & !FLAG_OVERFLOW;
 }
 #[no_mangle]
@@ -363,8 +350,6 @@ pub unsafe fn bcd_das() {
     }
     *last_result = *reg8.offset(AL as isize) as i32;
     *last_op_size = OPSIZE_8;
-    *last_op2 = 0;
-    *last_op1 = *last_op2;
     *flags_changed = FLAGS_ALL & !1 & !FLAG_ADJUST & !FLAG_OVERFLOW;
 }
 #[no_mangle]
@@ -1107,7 +1092,8 @@ pub unsafe fn bts_mem(virt_addr: i32, mut bit_offset: i32) {
 }
 #[no_mangle]
 pub unsafe fn bsf16(old: i32, bit_base: i32) -> i32 {
-    *flags_changed = FLAGS_ALL & !FLAG_ZERO;
+    *flags_changed = FLAGS_ALL & !FLAG_ZERO & !FLAG_CARRY;
+    *flags &= !FLAG_CARRY;
     *last_op_size = OPSIZE_16;
     if bit_base == 0 {
         *flags |= FLAG_ZERO;
@@ -1123,7 +1109,8 @@ pub unsafe fn bsf16(old: i32, bit_base: i32) -> i32 {
 }
 #[no_mangle]
 pub unsafe fn bsf32(old: i32, bit_base: i32) -> i32 {
-    *flags_changed = FLAGS_ALL & !FLAG_ZERO;
+    *flags_changed = FLAGS_ALL & !FLAG_ZERO & !FLAG_CARRY;
+    *flags &= !FLAG_CARRY;
     *last_op_size = OPSIZE_32;
     if bit_base == 0 {
         *flags |= FLAG_ZERO;
@@ -1138,7 +1125,8 @@ pub unsafe fn bsf32(old: i32, bit_base: i32) -> i32 {
 }
 #[no_mangle]
 pub unsafe fn bsr16(old: i32, bit_base: i32) -> i32 {
-    *flags_changed = FLAGS_ALL & !FLAG_ZERO;
+    *flags_changed = FLAGS_ALL & !FLAG_ZERO & !FLAG_CARRY;
+    *flags &= !FLAG_CARRY;
     *last_op_size = OPSIZE_16;
     if bit_base == 0 {
         *flags |= FLAG_ZERO;
@@ -1153,7 +1141,8 @@ pub unsafe fn bsr16(old: i32, bit_base: i32) -> i32 {
 }
 #[no_mangle]
 pub unsafe fn bsr32(old: i32, bit_base: i32) -> i32 {
-    *flags_changed = FLAGS_ALL & !FLAG_ZERO;
+    *flags_changed = FLAGS_ALL & !FLAG_ZERO & !FLAG_CARRY;
+    *flags &= !FLAG_CARRY;
     *last_op_size = OPSIZE_32;
     if bit_base == 0 {
         *flags |= FLAG_ZERO;

+ 1 - 0
src/rust/cpu2/cpu.rs

@@ -74,6 +74,7 @@ pub const CHECK_MISSED_ENTRY_POINTS: bool = false;
 
 pub const INTERPRETER_ITERATION_LIMIT: u32 = 1000;
 
+pub const FLAG_SUB: i32 = -0x8000_0000;
 pub const FLAG_CARRY: i32 = 1;
 pub const FLAG_PARITY: i32 = 4;
 pub const FLAG_ADJUST: i32 = 16;

+ 3 - 2
src/rust/cpu2/global_pointers.rs

@@ -7,10 +7,11 @@ pub const reg16: *mut u16 = 64 as *mut u16;
 pub const reg8s: *mut i8 = 64 as *mut i8;
 pub const reg16s: *mut i16 = 64 as *mut i16;
 pub const reg32: *mut i32 = 64 as *mut i32;
+
 pub const last_op1: *mut i32 = 96 as *mut i32;
-pub const last_op2: *mut i32 = 100 as *mut i32;
+
 pub const last_op_size: *mut i32 = 104 as *mut i32;
-pub const last_add_result: *mut i32 = 108 as *mut i32;
+
 pub const last_result: *mut i32 = 112 as *mut i32;
 pub const flags_changed: *mut i32 = 116 as *mut i32;
 pub const flags: *mut i32 = 120 as *mut i32;

+ 18 - 7
src/rust/cpu2/misc_instr.rs

@@ -5,10 +5,15 @@ use paging::OrPageFault;
 
 pub unsafe fn getcf() -> bool {
     if 0 != *flags_changed & 1 {
-        return 0
-            != (*last_op1 ^ (*last_op1 ^ *last_op2) & (*last_op2 ^ *last_add_result))
-                >> *last_op_size
-                & 1;
+        let m = (2 << *last_op_size) - 1;
+        dbg_assert!((*last_op1 as u32) <= m);
+        dbg_assert!((*last_result as u32) <= m);
+
+        let sub_mask = *flags_changed >> 31;
+
+        // sub: last_op1 < last_result  (or last_op1 < last_op2) (or (result ^ ((result ^ b) & (b ^ a))))
+        // add: last_result < last_op1  (or last_result < last_op2) (or a ^ ((a ^ b) & (b ^ result)))
+        return ((*last_result as i32 ^ sub_mask) as u32) < (*last_op1 ^ sub_mask) as u32;
     }
     else {
         return 0 != *flags & 1;
@@ -26,7 +31,9 @@ pub unsafe fn getpf() -> bool {
 }
 pub unsafe fn getaf() -> bool {
     if 0 != *flags_changed & FLAG_ADJUST {
-        return 0 != (*last_op1 ^ *last_op2 ^ *last_add_result) & FLAG_ADJUST;
+        let is_sub = *flags_changed & FLAG_SUB != 0;
+        let last_op2 = (*last_result - *last_op1) * if is_sub { -1 } else { 1 };
+        return 0 != (*last_op1 ^ last_op2 ^ *last_result) & FLAG_ADJUST;
     }
     else {
         return 0 != *flags & FLAG_ADJUST;
@@ -50,9 +57,13 @@ pub unsafe fn getsf() -> bool {
 }
 pub unsafe fn getof() -> bool {
     if 0 != *flags_changed & FLAG_OVERFLOW {
+        let is_sub = (*flags_changed as u32) >> 31;
+
+        // add: (a ^ result) & (b ^ result)
+        // sub: (a ^ result) & (b ^ result ^ 1) (or (a ^ b) & (result ^ a))
+        let b_xor_1_if_sub = (*last_result - *last_op1) - is_sub as i32;
         return 0
-            != ((*last_op1 ^ *last_add_result) & (*last_op2 ^ *last_add_result)) >> *last_op_size
-                & 1;
+            != ((*last_op1 ^ *last_result) & (b_xor_1_if_sub ^ *last_result)) >> *last_op_size & 1;
     }
     else {
         return 0 != *flags & FLAG_OVERFLOW;

+ 0 - 2
src/rust/global_pointers.rs

@@ -1,8 +1,6 @@
 pub const REG: u32 = 64;
 pub const LAST_OP1: u32 = 96;
-pub const LAST_OP2: u32 = 100;
 pub const LAST_OP_SIZE: u32 = 104;
-pub const LAST_ADD_RESULT: u32 = 108;
 pub const LAST_RESULT: u32 = 112;
 pub const FLAGS_CHANGED: u32 = 116;
 pub const FLAGS: u32 = 120;

+ 44 - 28
src/rust/jit_instructions.rs

@@ -4,7 +4,7 @@ use codegen;
 use cpu::BitSize;
 use cpu2::cpu::{
     FLAGS_ALL, FLAGS_DEFAULT, FLAGS_MASK, FLAG_ADJUST, FLAG_CARRY, FLAG_DIRECTION, FLAG_OVERFLOW,
-    OPSIZE_8, OPSIZE_16, OPSIZE_32,
+    FLAG_SUB, OPSIZE_8, OPSIZE_16, OPSIZE_32,
 };
 use global_pointers;
 use jit::JitContext;
@@ -849,8 +849,6 @@ pub fn gen_add32(
     builder.add_i32();
     builder.set_local(dest_operand);
 
-    codegen::gen_set_last_op2(builder, &source_operand);
-    codegen::gen_set_last_add_result(builder, &dest_operand);
     codegen::gen_set_last_result(builder, &dest_operand);
     codegen::gen_set_last_op_size(builder, OPSIZE_32);
     codegen::gen_set_flags_changed(builder, FLAGS_ALL);
@@ -861,18 +859,16 @@ pub fn gen_sub32(
     dest_operand: &WasmLocal,
     source_operand: &LocalOrImmedate,
 ) {
-    codegen::gen_set_last_add_result(builder, &dest_operand);
+    codegen::gen_set_last_op1(builder, &dest_operand);
 
     builder.get_local(&dest_operand);
     source_operand.gen_get(builder);
     builder.sub_i32();
     builder.set_local(dest_operand);
 
-    codegen::gen_set_last_op1(builder, &dest_operand);
-    codegen::gen_set_last_op2(builder, &source_operand);
     codegen::gen_set_last_result(builder, &dest_operand);
     codegen::gen_set_last_op_size(builder, OPSIZE_32);
-    codegen::gen_set_flags_changed(builder, FLAGS_ALL);
+    codegen::gen_set_flags_changed(builder, FLAGS_ALL | FLAG_SUB);
 }
 
 pub fn gen_cmp(
@@ -881,19 +877,25 @@ pub fn gen_cmp(
     source_operand: &LocalOrImmedate,
     size: i32,
 ) {
+    builder.const_i32(global_pointers::LAST_RESULT as i32);
     builder.get_local(&dest_operand);
     source_operand.gen_get(builder);
     builder.sub_i32();
-    let result = builder.set_new_local();
+    if size == OPSIZE_8 || size == OPSIZE_16 {
+        builder.const_i32(if size == OPSIZE_8 { 0xFF } else { 0xFFFF });
+        builder.and_i32();
+    }
+    builder.store_aligned_i32(0);
 
-    codegen::gen_set_last_op1(builder, &result);
-    codegen::gen_set_last_op2(builder, &source_operand);
-    codegen::gen_set_last_add_result(builder, dest_operand);
-    codegen::gen_set_last_result(builder, &result);
+    builder.const_i32(global_pointers::LAST_OP1 as i32);
+    builder.get_local(&dest_operand);
+    if size == OPSIZE_8 || size == OPSIZE_16 {
+        builder.const_i32(if size == OPSIZE_8 { 0xFF } else { 0xFFFF });
+        builder.and_i32();
+    }
+    builder.store_aligned_i32(0);
     codegen::gen_set_last_op_size(builder, size);
-    codegen::gen_set_flags_changed(builder, FLAGS_ALL);
-
-    builder.free_local(result);
+    codegen::gen_set_flags_changed(builder, FLAGS_ALL | FLAG_SUB);
 }
 pub fn gen_cmp8(builder: &mut WasmBuilder, dest: &WasmLocal, source: &LocalOrImmedate) {
     gen_cmp(builder, dest, source, OPSIZE_8)
@@ -1302,9 +1304,12 @@ fn gen_inc(builder: &mut WasmBuilder, dest_operand: &WasmLocal, size: i32) {
     builder.or_i32();
     builder.store_aligned_i32(0);
 
-    codegen::gen_set_last_op1(builder, dest_operand);
-    builder.const_i32(global_pointers::LAST_OP2 as i32);
-    builder.const_i32(1);
+    builder.const_i32(global_pointers::LAST_OP1 as i32);
+    builder.get_local(&dest_operand);
+    if size == OPSIZE_8 || size == OPSIZE_16 {
+        builder.const_i32(if size == OPSIZE_8 { 0xFF } else { 0xFFFF });
+        builder.and_i32();
+    }
     builder.store_aligned_i32(0);
 
     builder.get_local(dest_operand);
@@ -1317,9 +1322,13 @@ fn gen_inc(builder: &mut WasmBuilder, dest_operand: &WasmLocal, size: i32) {
         builder.set_local(dest_operand);
     }
 
-    codegen::gen_set_last_add_result(builder, dest_operand);
-    codegen::gen_set_last_result(builder, dest_operand);
-
+    builder.const_i32(global_pointers::LAST_RESULT as i32);
+    builder.get_local(&dest_operand);
+    if size == OPSIZE_16 {
+        builder.const_i32(0xFFFF);
+        builder.and_i32();
+    }
+    builder.store_aligned_i32(0);
     codegen::gen_set_last_op_size(builder, size);
     codegen::gen_set_flags_changed(builder, FLAGS_ALL & !1);
 }
@@ -1339,9 +1348,12 @@ fn gen_dec(builder: &mut WasmBuilder, dest_operand: &WasmLocal, size: i32) {
     builder.or_i32();
     builder.store_aligned_i32(0);
 
-    codegen::gen_set_last_add_result(builder, dest_operand);
-    builder.const_i32(global_pointers::LAST_OP2 as i32);
-    builder.const_i32(1);
+    builder.const_i32(global_pointers::LAST_OP1 as i32);
+    builder.get_local(&dest_operand);
+    if size == OPSIZE_8 || size == OPSIZE_16 {
+        builder.const_i32(if size == OPSIZE_8 { 0xFF } else { 0xFFFF });
+        builder.and_i32();
+    }
     builder.store_aligned_i32(0);
 
     builder.get_local(dest_operand);
@@ -1354,11 +1366,15 @@ fn gen_dec(builder: &mut WasmBuilder, dest_operand: &WasmLocal, size: i32) {
         builder.set_local(dest_operand);
     }
 
-    codegen::gen_set_last_op1(builder, dest_operand);
-    codegen::gen_set_last_result(builder, dest_operand);
-
+    builder.const_i32(global_pointers::LAST_RESULT as i32);
+    builder.get_local(&dest_operand);
+    if size == OPSIZE_16 {
+        builder.const_i32(0xFFFF);
+        builder.and_i32();
+    }
+    builder.store_aligned_i32(0);
     codegen::gen_set_last_op_size(builder, size);
-    codegen::gen_set_flags_changed(builder, FLAGS_ALL & !1);
+    codegen::gen_set_flags_changed(builder, FLAGS_ALL & !1 | FLAG_SUB);
 }
 fn gen_dec16(builder: &mut WasmBuilder, dest_operand: &WasmLocal) {
     gen_dec(builder, dest_operand, OPSIZE_16)

+ 2 - 0
src/rust/wasmgen/wasm_builder.rs

@@ -583,6 +583,8 @@ impl WasmBuilder {
     #[allow(dead_code)]
     pub fn gt_i32(&mut self) { self.instruction_body.push(op::OP_I32GTS); }
 
+    pub fn ltu_i32(&mut self) { self.instruction_body.push(op::OP_I32LTU); }
+
     pub fn if_i32(&mut self) {
         self.instruction_body.push(op::OP_IF);
         self.instruction_body.push(op::TYPE_I32);