Browse Source

Port jit to Rust

The following files and functions were ported:
- jit.c
- codegen.c
- _jit functions in instructions*.c and misc_instr.c
- generate_{analyzer,jit}.js (produces Rust code)
- jit_* from cpu.c

And the following data structures:
- hot_code_addresses
- wasm_table_index_free_list
- entry_points
- jit_cache_array
- page_first_jit_cache_entry

Other miscellaneous changes:
- Page is an abstract type
- Addresses, locals and bitflags are unsigned
- Make the number of entry points a growable type
- Avoid use of global state wherever possible
- Delete string packing
- Make CachedStateFlags abstract
- Make AnalysisType product type
- Make BasicBlockType product type
- Restore opcode assertion
- Set opt-level=2 in debug mode (for test performance)
- Delete JIT_ALWAYS instrumentation (now possible via api)
- Refactor generate_analyzer.js
- Refactor generate_jit.js
Fabian 5 years ago
parent
commit
3a8d644d75
67 changed files with 4143 additions and 4421 deletions
  1. 0 1
      .rustfmt.toml
  2. 1 0
      Cargo.toml
  3. 16 27
      Makefile
  4. 179 279
      gen/generate_analyzer.js
  5. 265 314
      gen/generate_jit.js
  6. 88 0
      gen/rust_ast.js
  7. 10 0
      gen/util.js
  8. 23 18
      gen/x86_table.js
  9. 4 1
      src/browser/lib.js
  10. 19 19
      src/browser/print_stats.js
  11. 26 60
      src/browser/starter.js
  12. 1 1
      src/config.js
  13. 1 1
      src/const.js
  14. 29 16
      src/cpu.js
  15. 1 1
      src/memory.js
  16. 0 678
      src/native/codegen/codegen.c
  17. 0 87
      src/native/codegen/codegen.h
  18. 0 74
      src/native/codegen/wasmgen.h
  19. 0 17
      src/native/config.h
  20. 0 4
      src/native/const.h
  21. 204 1398
      src/native/cpu.c
  22. 4 104
      src/native/cpu.h
  23. 6 10
      src/native/global_pointers.h
  24. 0 274
      src/native/instructions.c
  25. 1 16
      src/native/instructions.h
  26. 2 52
      src/native/instructions_0f.c
  27. 0 2
      src/native/instructions_0f.h
  28. 0 377
      src/native/jit.c
  29. 0 12
      src/native/jit.h
  30. 0 4
      src/native/js_imports.h
  31. 1 1
      src/native/memory.c
  32. 0 101
      src/native/misc_instr.c
  33. 4 8
      src/native/misc_instr.h
  34. 0 82
      src/native/profiler/opstats.c
  35. 0 5
      src/native/profiler/opstats.h
  36. 1 1
      src/native/profiler/profiler.h
  37. 17 0
      src/native/rust_imports.h
  38. 0 9
      src/native/shared.h
  39. 96 0
      src/rust/analysis.rs
  40. 132 0
      src/rust/c_api.rs
  41. 382 0
      src/rust/codegen.rs
  42. 50 0
      src/rust/cpu.rs
  43. 74 0
      src/rust/cpu_context.rs
  44. 12 8
      src/rust/dbg.rs
  45. 7 0
      src/rust/gen/mod.rs
  46. 18 0
      src/rust/global_pointers.rs
  47. 1360 0
      src/rust/jit.rs
  48. 396 0
      src/rust/jit_instructions.rs
  49. 52 0
      src/rust/leb.rs
  50. 20 3
      src/rust/lib.rs
  51. 345 0
      src/rust/modrm.rs
  52. 49 0
      src/rust/opstats.rs
  53. 8 0
      src/rust/page.rs
  54. 14 0
      src/rust/prefix.rs
  55. 63 0
      src/rust/profiler.rs
  56. 25 0
      src/rust/regs.rs
  57. 19 0
      src/rust/state_flags.rs
  58. 6 0
      src/rust/tlb.rs
  59. 0 105
      src/rust/util.rs
  60. 0 95
      src/rust/wasmgen/c_api.rs
  61. 1 3
      src/rust/wasmgen/mod.rs
  62. 46 33
      src/rust/wasmgen/module_init.rs
  63. 58 113
      src/rust/wasmgen/wasm_util.rs
  64. 1 1
      tests/expect/run.js
  65. 4 4
      tests/expect/tests/mov32-mem.wast
  66. 1 1
      tests/nasm/run.js
  67. 1 1
      tests/rust/verify-wasmgen-dummy-output.js

+ 0 - 1
.rustfmt.toml

@@ -1,4 +1,3 @@
-normalize_comments = true
 use_field_init_shorthand = true
 match_block_trailing_comma = true
 fn_single_line = true

+ 1 - 0
Cargo.toml

@@ -12,6 +12,7 @@ path = "src/rust/lib.rs"
 
 [profile.dev]
 lto = false
+opt-level=2
 panic = "abort"
 
 [profile.release]

+ 16 - 27
Makefile

@@ -4,9 +4,9 @@ BROWSER=chromium
 NASM_TEST_DIR=./tests/nasm
 COVERAGE_DIR=./tests/coverage
 
-INSTRUCTION_TABLES=build/jit.c build/jit0f_16.c build/jit0f_32.c \
+INSTRUCTION_TABLES=src/rust/gen/jit.rs src/rust/gen/jit0f_16.rs src/rust/gen/jit0f_32.rs \
 		   build/interpreter.c build/interpreter0f_16.c build/interpreter0f_32.c \
-		   build/analyzer.c build/analyzer0f_16.c build/analyzer0f_32.c \
+		   src/rust/gen/analyzer.rs src/rust/gen/analyzer0f_16.rs src/rust/gen/analyzer0f_32.rs \
 
 # Only the dependencies common to both generate_{jit,interpreter}.js
 GEN_DEPENDENCIES=$(filter-out gen/generate_interpreter.js gen/generate_jit.js gen/generate_analyzer.js, $(wildcard gen/*.js))
@@ -19,10 +19,6 @@ ifeq ($(ENABLE_COV), 1)
 CC_COVERAGE_FLAGS=--coverage -fprofile-instr-generate
 endif
 
-ifeq ($(JIT_ALWAYS),)
-JIT_ALWAYS=false
-endif
-
 all: build/v86_all.js build/libv86.js build/v86.wasm build/v86oxide.wasm
 all-debug: build/libv86-debug.js build/v86-debug.wasm build/v86oxide-debug.wasm
 browser: build/v86_all.js
@@ -118,7 +114,9 @@ BROWSER_FILES=screen.js \
 		  keyboard.js mouse.js serial.js \
 		  network.js lib.js starter.js worker_bus.js dummy_screen.js print_stats.js
 
-RUST_FILES=$(shell find src/rust/ -name '*.rs')
+RUST_FILES=$(shell find src/rust/ -name '*.rs') \
+	   src/rust/gen/jit.rs src/rust/gen/jit0f_16.rs src/rust/gen/jit0f_32.rs \
+	   src/rust/gen/analyzer.rs src/rust/gen/analyzer0f_16.rs src/rust/gen/analyzer0f_32.rs
 
 CORE_FILES:=$(addprefix src/,$(CORE_FILES))
 LIB_FILES:=$(addprefix lib/,$(LIB_FILES))
@@ -177,11 +175,11 @@ build/libv86-debug.js: $(CLOSURE) src/*.js lib/*.js src/browser/*.js
 .PHONY: instruction_tables
 instruction_tables: $(INSTRUCTION_TABLES)
 
-build/jit.c: $(JIT_DEPENDENCIES)
+src/rust/gen/jit.rs: $(JIT_DEPENDENCIES)
 	./gen/generate_jit.js --output-dir build/ --table jit
-build/jit0f_16.c: $(JIT_DEPENDENCIES)
+src/rust/gen/jit0f_16.rs: $(JIT_DEPENDENCIES)
 	./gen/generate_jit.js --output-dir build/ --table jit0f_16
-build/jit0f_32.c: $(JIT_DEPENDENCIES)
+src/rust/gen/jit0f_32.rs: $(JIT_DEPENDENCIES)
 	./gen/generate_jit.js --output-dir build/ --table jit0f_32
 
 build/interpreter.c: $(INTERPRETER_DEPENDENCIES)
@@ -191,40 +189,32 @@ build/interpreter0f_16.c: $(INTERPRETER_DEPENDENCIES)
 build/interpreter0f_32.c: $(INTERPRETER_DEPENDENCIES)
 	./gen/generate_interpreter.js --output-dir build/ --table interpreter0f_32
 
-build/analyzer.c: $(ANALYZER_DEPENDENCIES)
+src/rust/gen/analyzer.rs: $(ANALYZER_DEPENDENCIES)
 	./gen/generate_analyzer.js --output-dir build/ --table analyzer
-build/analyzer0f_16.c: $(ANALYZER_DEPENDENCIES)
+src/rust/gen/analyzer0f_16.rs: $(ANALYZER_DEPENDENCIES)
 	./gen/generate_analyzer.js --output-dir build/ --table analyzer0f_16
-build/analyzer0f_32.c: $(ANALYZER_DEPENDENCIES)
+src/rust/gen/analyzer0f_32.rs: $(ANALYZER_DEPENDENCIES)
 	./gen/generate_analyzer.js --output-dir build/ --table analyzer0f_32
 
-.PHONY: phony
-build/JIT_ALWAYS: phony
-	@if [[ `cat build/JIT_ALWAYS 2>&1` != '$(JIT_ALWAYS)' ]]; then \
-	    echo -n $(JIT_ALWAYS) > build/JIT_ALWAYS ; \
-	fi
-
-build/v86.wasm: src/native/*.c src/native/*.h src/native/codegen/*.c src/native/codegen/*.h src/native/profiler/* src/native/*.ll $(INSTRUCTION_TABLES) build/JIT_ALWAYS
+build/v86.wasm: src/native/*.c src/native/*.h src/native/profiler/* src/native/*.ll $(INSTRUCTION_TABLES)
 	mkdir -p build
 	-ls -lh build/v86.wasm
-	emcc src/native/*.c src/native/profiler/*.c src/native/codegen/codegen.c src/native/*.ll \
+	emcc src/native/*.c src/native/profiler/*.c src/native/*.ll \
 		$(CC_FLAGS) \
 		-DDEBUG=false \
 		-DNDEBUG \
-		-D"ENABLE_JIT_ALWAYS=$(JIT_ALWAYS)" \
 		-O3 \
 		--llvm-opts 3 \
 		--llvm-lto 3 \
 		-o build/v86.wasm
 	ls -lh build/v86.wasm
 
-build/v86-debug.wasm: src/native/*.c src/native/*.h src/native/codegen/*.c src/native/codegen/*.h src/native/profiler/* src/native/*.ll $(INSTRUCTION_TABLES) build/JIT_ALWAYS
+build/v86-debug.wasm: src/native/*.c src/native/*.h src/native/profiler/* src/native/*.ll $(INSTRUCTION_TABLES)
 	mkdir -p build/coverage
 	-ls -lh build/v86-debug.wasm
-	emcc src/native/*.c src/native/profiler/*.c src/native/codegen/codegen.c src/native/*.ll \
+	emcc src/native/*.c src/native/profiler/*.c src/native/*.ll \
 		$(CC_FLAGS) \
 		$(CC_COVERAGE_FLAGS) \
-		-D"ENABLE_JIT_ALWAYS=$(JIT_ALWAYS)" \
 		-Os \
 		-o build/v86-debug.wasm
 	ls -lh build/v86-debug.wasm
@@ -317,7 +307,7 @@ expect-tests: all-debug build/libwabt.js
 devices-test: all-debug
 	./tests/devices/virtio_9p.js
 
-rust-test:
+rust-test: $(RUST_FILES)
 	env RUST_BACKTRACE=full RUST_TEST_THREADS=1 cargo test -- --nocapture
 	./tests/rust/verify-wasmgen-dummy-output.js
 
@@ -348,5 +338,4 @@ clang-tidy:
 	clang-tidy \
 	     src/native/*.c src/native/*.h \
 	     src/native/profiler/*.c src/native/profiler/*.h \
-	     src/native/codegen/*.c src/native/codegen/*.h \
 	     -- -I src/native/ -Wall -Wno-bitwise-op-parentheses -Wno-gnu-binary-literal

+ 179 - 279
gen/generate_analyzer.js

@@ -3,13 +3,11 @@
 
 const fs = require("fs");
 const path = require("path");
-const encodings = require("./x86_table");
-const c_ast = require("./c_ast");
-const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table } = require("./util");
+const x86_table = require("./x86_table");
+const rust_ast = require("./rust_ast");
+const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table_rust } = require("./util");
 
-const APPEND_NONFAULTING_FLAG = "analysis.flags |= JIT_INSTR_NONFAULTING_FLAG;";
-
-const OUT_DIR = get_switch_value("--output-dir") || path.join(__dirname, "..", "build");
+const OUT_DIR = path.join(__dirname, "..", "src/rust/gen/");
 
 mkdirpSync(OUT_DIR);
 
@@ -36,18 +34,18 @@ function gen_read_imm_call(op, size_variant)
     {
         if(op.imm8)
         {
-            return "read_imm8()";
+            return "cpu.read_imm8()";
         }
         else if(op.imm8s)
         {
-            return "read_imm8s()";
+            return "cpu.read_imm8s()";
         }
         else
         {
             if(op.immaddr)
             {
                 // immaddr: depends on address size
-                return "read_moffs()";
+                return "cpu.read_moffs()";
             }
             else
             {
@@ -55,12 +53,12 @@ function gen_read_imm_call(op, size_variant)
 
                 if(op.imm1632 && size === 16 || op.imm16)
                 {
-                    return "read_imm16()";
+                    return "cpu.read_imm16()";
                 }
                 else
                 {
                     console.assert(op.imm1632 && size === 32 || op.imm32);
-                    return "read_imm32s()";
+                    return "cpu.read_imm32()";
                 }
             }
         }
@@ -77,93 +75,97 @@ function gen_call(name, args)
     return `${name}(${args.join(", ")});`;
 }
 
-function gen_codegen_call(args)
-{
-    return args.map(arg => arg + ";");
-}
-
-function gen_codegen_call_modrm(args)
-{
-    args = args.map(arg => arg + ";");
-    return [].concat(gen_call("modrm_skip", ["modrm_byte"]), args);
-}
-
-function gen_modrm_mem_reg_split(mem_args, reg_args, postfixes={})
-{
-    const { mem_postfix=[], reg_postfix=[] } = postfixes;
-
-    return {
-        type: "if-else",
-        if_blocks: [{
-            condition: "modrm_byte < 0xC0",
-            body: []
-                .concat(gen_codegen_call_modrm(mem_args))
-                .concat(mem_postfix),
-        }],
-        else_block: {
-            body: gen_codegen_call(reg_args).concat(reg_postfix),
-        },
-    };
-}
-
 /*
  * Current naming scheme:
  * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
  */
 
-function make_instruction_name(encoding, size, prefix_variant)
+function make_instruction_name(encoding, size)
 {
     const suffix = encoding.os ? String(size) : "";
     const opcode_hex = hex(encoding.opcode & 0xFF, 2);
     const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
-    const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
+    const prefix = (encoding.opcode & 0xFF0000) === 0 ? "" : hex(encoding.opcode >> 16 & 0xFF, 2);
     const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
 
     return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
 }
 
-function get_nonfaulting_mem_reg_postfix(encoding)
-{
-    const lea_special_case = encoding.opcode === 0x8D;
-    const mem_postfix = (encoding.nonfaulting && lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
-    const reg_postfix = (encoding.nonfaulting && !lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
-
-    return {
-        mem_postfix,
-        reg_postfix,
-    };
-}
-
-function create_instruction_postfix(encoding)
-{
-    return [].concat(
-        encoding.block_boundary ? ["analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [],
-        encoding.no_next_instruction ? ["analysis.flags |= JIT_INSTR_NO_NEXT_INSTRUCTION_FLAG;"] : []
-    );
-}
-
 function gen_instruction_body(encodings, size)
 {
     const encoding = encodings[0];
 
-    let has_66 = false;
-    let has_F2 = false;
-    let has_F3 = false;
+    let has_66 = [];
+    let has_F2 = [];
+    let has_F3 = [];
+    let no_prefix = [];
 
     for(let e of encodings)
     {
-        if((e.opcode >>> 16) === 0x66) has_66 = true;
-        if((e.opcode >>> 16) === 0xF2) has_F2 = true;
-        if((e.opcode >>> 16) === 0xF3) has_F3 = true;
+        if((e.opcode >>> 16) === 0x66) has_66.push(e);
+        else if((e.opcode >>> 16) === 0xF2) has_F2.push(e);
+        else if((e.opcode >>> 16) === 0xF3) has_F3.push(e);
+        else no_prefix.push(e);
     }
 
-    if(has_66 || has_F2 || has_F3)
+    if(has_66.length || has_F2.length || has_F3.length)
     {
         console.assert((encoding.opcode & 0xFF00) === 0x0F00);
     }
 
+    const code = [];
+
+    if(encoding.e)
+    {
+        code.push("let modrm_byte = cpu.read_imm8();");
+    }
+
+    if(has_66.length || has_F2.length || has_F3.length)
+    {
+        const if_blocks = [];
+
+        if(has_66.length) {
+            const body = gen_instruction_body_after_prefix(has_66, size);
+            if_blocks.push({ condition: "cpu.prefixes & ::prefix::PREFIX_66 != 0", body, });
+        }
+        if(has_F2.length) {
+            const body = gen_instruction_body_after_prefix(has_F2, size);
+            if_blocks.push({ condition: "cpu.prefixes & ::prefix::PREFIX_F2 != 0", body, });
+        }
+        if(has_F3.length) {
+            const body = gen_instruction_body_after_prefix(has_F3, size);
+            if_blocks.push({ condition: "cpu.prefixes & ::prefix::PREFIX_F3 != 0", body, });
+        }
+
+        const else_block = {
+            body: gen_instruction_body_after_prefix(no_prefix, size),
+        };
+
+        return [].concat(
+            code,
+            {
+                type: "if-else",
+                if_blocks,
+                else_block,
+            }
+        );
+    }
+    else {
+        return [].concat(
+            code,
+            gen_instruction_body_after_prefix(encodings, size)
+        );
+    }
+}
+
+function gen_instruction_body_after_prefix(encodings, size)
+{
+    const encoding = encodings[0];
+
     if(encoding.fixed_g !== undefined)
     {
+        console.assert(encoding.e);
+
         // instruction with modrm byte where the middle 3 bits encode the instruction
 
         // group by opcode without prefix plus middle bits of modrm byte
@@ -175,158 +177,65 @@ function gen_instruction_body(encodings, size)
         cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
 
         return [
-            "int32_t modrm_byte = read_imm8();",
             {
                 type: "switch",
                 condition: "modrm_byte >> 3 & 7",
                 cases: cases.map(case_ => {
                     const fixed_g = case_.fixed_g;
-                    const instruction_postfix = create_instruction_postfix(case_);
-
-                    const mem_args = [];
-                    const reg_args = [];
-
-                    const imm_read = gen_read_imm_call(case_, size);
-
-                    if(imm_read)
-                    {
-                        mem_args.push(imm_read);
-                        reg_args.push(imm_read);
-                    }
-
-                    if(has_66 || has_F2 || has_F3)
-                    {
-                        const if_blocks = [];
-
-                        if(has_66) {
-                            const name = make_instruction_name(case_, size, 0x66);
-                            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
-                            if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
-                        }
-                        if(has_F2) {
-                            const name = make_instruction_name(case_, size, 0xF2);
-                            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
-                            if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
-                        }
-                        if(has_F3) {
-                            const name = make_instruction_name(case_, size, 0xF3);
-                            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
-                            if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
-                        }
-
-                        const else_block = {
-                            body: [
-                                gen_modrm_mem_reg_split(
-                                    mem_args,
-                                    reg_args,
-                                    {}
-                                )
-                            ],
-                        };
-
-                        return {
-                            conditions: [fixed_g],
-                            body: [
-                                "int32_t prefixes_ = *prefixes;",
-                                {
-                                    type: "if-else",
-                                    if_blocks,
-                                    else_block,
-                                },
-                            ].concat(instruction_postfix),
-                        };
-                    }
-                    else
-                    {
-                        const body = [
-                            gen_modrm_mem_reg_split(
-                                mem_args,
-                                reg_args,
-                                get_nonfaulting_mem_reg_postfix(case_)
-                            )
-                        ].concat(instruction_postfix);
-
-                        return {
-                            conditions: [fixed_g],
-                            body,
-                        };
-                    }
+                    const body = gen_instruction_body_after_fixed_g(case_, size);
+
+                    return {
+                        conditions: [fixed_g],
+                        body,
+                    };
                 }),
 
                 default_case: {
                     body: [
-                        "analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;",
-                        "analysis.flags |= JIT_INSTR_NO_NEXT_INSTRUCTION_FLAG;",
+                        "analysis.ty = ::analysis::AnalysisType::BlockBoundary;",
+                        "analysis.no_next_instruction = true;",
                     ],
                 }
             },
         ];
     }
-    else if(has_66 || has_F2 || has_F3)
-    {
-        // instruction without modrm byte but with prefix
-
-        console.assert(encoding.e);
-        console.assert(!encoding.ignore_mod);
-
-        const instruction_postfix = create_instruction_postfix(encoding);
-
-        const imm_read = gen_read_imm_call(encoding, size);
+    else {
+        console.assert(encodings.length === 1);
+        return gen_instruction_body_after_fixed_g(encodings[0], size);
+    }
+}
 
-        const mem_args = [];
-        const reg_args = [];
+function gen_instruction_body_after_fixed_g(encoding, size)
+{
+    const imm_read = gen_read_imm_call(encoding, size);
+    const instruction_postfix = [];
 
-        if(imm_read)
-        {
-            mem_args.push(imm_read);
-            reg_args.push(imm_read);
-        }
+    if(encoding.block_boundary && !encoding.jump_offset_imm)
+    {
+        instruction_postfix.push("analysis.ty = ::analysis::AnalysisType::BlockBoundary;");
+    }
 
-        const if_blocks = [];
+    if(encoding.no_next_instruction)
+    {
+        instruction_postfix.push("analysis.no_next_instruction = true;");
+    }
 
-        if(has_66) {
-            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
-            if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
-        }
-        if(has_F2) {
-            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
-            if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
-        }
-        if(has_F3) {
-            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
-            if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
-        }
+    if(encoding.prefix)
+    {
+        const instruction_name = "::analysis::" + make_instruction_name(encoding, size) + "_analyze";
+        const args = ["cpu", "analysis"];
 
-        const else_block = {
-            body: [
-                gen_modrm_mem_reg_split(
-                    mem_args,
-                    reg_args,
-                    {}
-                )
-            ],
-        };
+        console.assert(!imm_read);
 
-        return [
-            "int32_t modrm_byte = read_imm8();",
-            "int32_t prefixes_ = *prefixes;",
-            {
-                type: "if-else",
-                if_blocks,
-                else_block,
-            }
-        ].concat(instruction_postfix);
+        return [].concat(
+            gen_call(instruction_name, args),
+            instruction_postfix
+        );
     }
-    else if(encoding.fixed_g === undefined && encoding.e)
+    else if(encoding.e)
     {
         // instruction with modrm byte where the middle 3 bits encode a register
 
-        console.assert(encodings.length === 1);
-
-        const instruction_postfix = create_instruction_postfix(encoding);
-
-        const imm_read = gen_read_imm_call(encoding, size);
-
         if(encoding.ignore_mod)
         {
             console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
@@ -334,102 +243,69 @@ function gen_instruction_body(encodings, size)
             // Has modrm byte, but the 2 mod bits are ignored and both
             // operands are always registers (0f20-0f24)
 
-            if(encoding.nonfaulting)
-            {
-                instruction_postfix.push(APPEND_NONFAULTING_FLAG);
-            }
-
-            return ["int32_t modrm_byte = read_imm8();"]
-                .concat(gen_codegen_call([]))
-                .concat(instruction_postfix);
+            return instruction_postfix;
         }
         else
         {
-            const mem_args = [];
-            const reg_args = [];
-
-            if(imm_read)
-            {
-                mem_args.push(imm_read);
-                reg_args.push(imm_read);
-            }
-
-            return [
-                "int32_t modrm_byte = read_imm8();",
-                gen_modrm_mem_reg_split(
-                    mem_args,
-                    reg_args,
-                    get_nonfaulting_mem_reg_postfix(encoding)
-                ),
-            ].concat(instruction_postfix);
-        }
-    }
-    else if(encoding.prefix)
-    {
-        console.assert(!encoding.nonfaulting, "Prefix/custom instructions cannot be marked as nonfaulting.");
-
-        const instruction_postfix = create_instruction_postfix(encoding);
-
-        const instruction_name = make_instruction_name(encoding, size) + "_analyze";
-        const imm_read = gen_read_imm_call(encoding, size);
-        const args = [];
-
-        if(imm_read)
-        {
-            args.push(imm_read);
+            return [].concat(
+                {
+                    type: "if-else",
+                    if_blocks: [{
+                        condition: "modrm_byte < 0xC0",
+                        body: [
+                            gen_call("::analysis::modrm_analyze", ["cpu", "modrm_byte"])
+                        ],
+                    }],
+                },
+                imm_read ? [imm_read + ";"] : [],
+                instruction_postfix
+            );
         }
-
-        const call_prefix = encoding.prefix ? "return " : "";
-        // Prefix calls can add to the return flags
-        return [call_prefix + gen_call(instruction_name, args)].concat(instruction_postfix);
     }
     else
     {
         // instruction without modrm byte or prefix
 
-        const instruction_postfix = create_instruction_postfix(encoding);
-
-        const imm_read = gen_read_imm_call(encoding, size);
-
-        const args = [];
+        const body = [];
 
         if(imm_read)
         {
             if(encoding.jump_offset_imm)
             {
-                args.push("int32_t jump_offset = " + imm_read + ";");
-                args.push("analysis.jump_offset = jump_offset;");
-                args.push("analysis.flags |= is_osize_32() ? JIT_INSTR_IMM_JUMP32_FLAG : JIT_INSTR_IMM_JUMP16_FLAG;");
+                body.push("let jump_offset = " + imm_read + ";");
+
+                if(encoding.conditional_jump)
+                {
+                    console.assert((encoding.opcode & ~0xF) === 0x70 || (encoding.opcode & ~0xF) === 0x0F80);
+                    const condition_index = encoding.opcode & 0xF;
+                    body.push(`analysis.ty = ::analysis::AnalysisType::Jump { offset: jump_offset as i32, condition: Some(${condition_index}), is_32: cpu.osize_32() };`);
+                }
+                else
+                {
+                    body.push(`analysis.ty = ::analysis::AnalysisType::Jump { offset: jump_offset as i32, condition: None, is_32: cpu.osize_32() };`);
+                }
             }
             else
             {
-                args.push(imm_read + ";");
+                body.push(imm_read + ";");
             }
         }
 
         if(encoding.extra_imm16)
         {
             console.assert(imm_read);
-            args.push(gen_call("read_imm16"));
+            body.push(gen_call("cpu.read_imm16"));
         }
         else if(encoding.extra_imm8)
         {
             console.assert(imm_read);
-            args.push(gen_call("read_imm8"));
-        }
-
-        if(encoding.nonfaulting)
-        {
-            instruction_postfix.push(APPEND_NONFAULTING_FLAG);
-        }
-
-        if(encoding.conditional_jump)
-        {
-            console.assert((encoding.opcode & ~0xF) === 0x70 || (encoding.opcode & ~0xF) === 0x0F80);
-            instruction_postfix.push("analysis.condition_index = " + (encoding.opcode & 0xF) + ";");
+            body.push(gen_call("cpu.read_imm8"));
         }
 
-        return args.concat(instruction_postfix);
+        return [].concat(
+            body,
+            instruction_postfix
+        );
     }
 }
 
@@ -438,7 +314,7 @@ function gen_table()
     let by_opcode = Object.create(null);
     let by_opcode0f = Object.create(null);
 
-    for(let o of encodings)
+    for(let o of x86_table)
     {
         let opcode = o.opcode;
 
@@ -465,6 +341,7 @@ function gen_table()
         console.assert(encoding && encoding.length);
 
         let opcode_hex = hex(opcode, 2);
+        let opcode_high_hex = hex(opcode | 0x100, 2);
 
         if(encoding[0].os)
         {
@@ -473,14 +350,14 @@ function gen_table()
                 body: gen_instruction_body(encoding, 16),
             });
             cases.push({
-                conditions: [`0x${opcode_hex}|0x100`],
+                conditions: [`0x${opcode_high_hex}`],
                 body: gen_instruction_body(encoding, 32),
             });
         }
         else
         {
             cases.push({
-                conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
+                conditions: [`0x${opcode_hex}`, `0x${opcode_high_hex}`],
                 body: gen_instruction_body(encoding, undefined),
             });
         }
@@ -490,16 +367,23 @@ function gen_table()
         condition: "opcode",
         cases,
         default_case: {
-            body: ["assert(false);"]
+            body: ["dbg_assert!(false);"]
         },
     };
 
     if(to_generate.analyzer)
     {
-        finalize_table(
+        const code = [
+            "#[cfg_attr(rustfmt, rustfmt_skip)]",
+            "pub fn analyzer(opcode: u32, cpu: &mut ::cpu_context::CpuContext, analysis: &mut ::analysis::Analysis) {",
+            table,
+            "}",
+        ];
+
+        finalize_table_rust(
             OUT_DIR,
-            "analyzer",
-            c_ast.print_syntax_tree([table]).join("\n") + "\n"
+            "analyzer.rs",
+            rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
         );
     }
 
@@ -540,7 +424,7 @@ function gen_table()
         condition: "opcode",
         cases: cases0f_16,
         default_case: {
-            body: ["assert(false);"]
+            body: ["dbg_assert!(false);"]
         },
     };
     const table0f_32 = {
@@ -548,25 +432,41 @@ function gen_table()
         condition: "opcode",
         cases: cases0f_32,
         default_case: {
-            body: ["assert(false);"]
+            body: ["dbg_assert!(false);"]
         },
     };
 
     if(to_generate.analyzer0f_16)
     {
-        finalize_table(
+        const code = [
+            "#![allow(unused)]",
+            "#[cfg_attr(rustfmt, rustfmt_skip)]",
+            "pub fn analyzer(opcode: u8, cpu: &mut ::cpu_context::CpuContext, analysis: &mut ::analysis::Analysis) {",
+            table0f_16,
+            "}"
+        ];
+
+        finalize_table_rust(
             OUT_DIR,
-            "analyzer0f_16",
-            c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n"
+            "analyzer0f_16.rs",
+            rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
         );
     }
 
     if(to_generate.analyzer0f_32)
     {
-        finalize_table(
+        const code = [
+            "#![allow(unused)]",
+            "#[cfg_attr(rustfmt, rustfmt_skip)]",
+            "pub fn analyzer(opcode: u8, cpu: &mut ::cpu_context::CpuContext, analysis: &mut ::analysis::Analysis) {",
+            table0f_32,
+            "}"
+        ];
+
+        finalize_table_rust(
             OUT_DIR,
-            "analyzer0f_32",
-            c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n"
+            "analyzer0f_32.rs",
+            rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
         );
     }
 }

+ 265 - 314
gen/generate_jit.js

@@ -3,14 +3,11 @@
 
 const fs = require("fs");
 const path = require("path");
-const encodings = require("./x86_table");
-const c_ast = require("./c_ast");
-const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table } = require("./util");
+const x86_table = require("./x86_table");
+const rust_ast = require("./rust_ast");
+const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table_rust } = require("./util");
 
-const APPEND_NONFAULTING_FLAG = "instr_flags |= JIT_INSTR_NONFAULTING_FLAG;";
-
-const OUT_DIR = get_switch_value("--output-dir") ||
-          path.join(__dirname, "..", "build");
+const OUT_DIR = path.join(__dirname, "..", "src/rust/gen/");
 
 mkdirpSync(OUT_DIR);
 
@@ -37,18 +34,18 @@ function gen_read_imm_call(op, size_variant)
     {
         if(op.imm8)
         {
-            return "read_imm8()";
+            return "ctx.cpu.read_imm8()";
         }
         else if(op.imm8s)
         {
-            return "read_imm8s()";
+            return "ctx.cpu.read_imm8s()";
         }
         else
         {
             if(op.immaddr)
             {
                 // immaddr: depends on address size
-                return "read_moffs()";
+                return "ctx.cpu.read_moffs()";
             }
             else
             {
@@ -56,12 +53,12 @@ function gen_read_imm_call(op, size_variant)
 
                 if(op.imm1632 && size === 16 || op.imm16)
                 {
-                    return "read_imm16()";
+                    return "ctx.cpu.read_imm16()";
                 }
                 else
                 {
                     console.assert(op.imm1632 && size === 32 || op.imm32);
-                    return "read_imm32s()";
+                    return "ctx.cpu.read_imm32()";
                 }
             }
         }
@@ -80,49 +77,27 @@ function gen_call(name, args)
 
 function gen_codegen_call(name, args)
 {
-    args = args || [];
-    const args_count = args.length;
-    args = [].concat([`"${name}"`, name.length], args);
-    return gen_call(`gen_fn${args_count}_const`, args);
-}
+    const IMM_VAR_NAME = "imm";
+    let imm_read;
 
-function gen_codegen_call_modrm(name, args)
-{
-    args = (args || []).slice();
+    args = (args || []).map(arg => {
+        if(arg.includes("read_imm"))
+        {
+            imm_read = arg;
+            return IMM_VAR_NAME;
+        }
+        else
+        {
+            return arg;
+        }
+    });
     const args_count = args.length;
 
-    args = [].concat([`"${name}"`, name.length], args);
-
-    return [
-        gen_call(`gen_modrm_resolve`, ["modrm_byte"]),
-        gen_call(`gen_modrm_fn${args_count}`, args),
-    ].join(" ");
-}
-
-function gen_custom_jit_call(name, args)
-{
-    return gen_call(`${name}_jit`, args);
-}
-
-function gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, postfixes={})
-{
-    const { mem_call_fn, reg_call_fn } = gen_call_fns;
-    const { mem_postfix=[], reg_postfix=[] } = postfixes;
-
-    return {
-        type: "if-else",
-        if_blocks: [{
-            condition: "modrm_byte < 0xC0",
-            body: []
-                .concat([mem_call_fn(`${name}_mem`, mem_args)])
-                .concat(mem_postfix),
-        }],
-        else_block: {
-            body: [
-                reg_call_fn(`${name}_reg`, reg_args)
-            ].concat(reg_postfix),
-        },
-    };
+    args = [].concat(["ctx", `"${name}"`], args);
+    return [].concat(
+        imm_read ? `let ${IMM_VAR_NAME} = ${imm_read};` : [],
+        gen_call(`::codegen::gen_fn${args_count}_const`, args)
+    );
 }
 
 /*
@@ -130,69 +105,92 @@ function gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, postfix
  * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
  */
 
-function make_instruction_name(encoding, size, prefix_variant)
+function make_instruction_name(encoding, size)
 {
     const suffix = encoding.os ? String(size) : "";
     const opcode_hex = hex(encoding.opcode & 0xFF, 2);
     const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
-    const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
+    const prefix = (encoding.opcode & 0xFF0000) === 0 ? "" : hex(encoding.opcode >> 16 & 0xFF, 2);
     const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
 
     return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
 }
 
-function get_nonfaulting_mem_reg_postfix(encoding)
-{
-    const lea_special_case = encoding.opcode === 0x8D;
-    // In general only reg_postfixes will append the nonfaulting flag, except in the special case
-    // for LEA - it doesn't actually access memory, so the memory variant can be nonfaulting
-    const mem_postfix = (encoding.nonfaulting && lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
-    const reg_postfix = (encoding.nonfaulting && !lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
-
-    return {
-        mem_postfix,
-        reg_postfix,
-    };
-}
-
 function gen_instruction_body(encodings, size)
 {
     const encoding = encodings[0];
 
-    let has_66 = false;
-    let has_F2 = false;
-    let has_F3 = false;
+    let has_66 = [];
+    let has_F2 = [];
+    let has_F3 = [];
+    let no_prefix = [];
 
     for(let e of encodings)
     {
-        if((e.opcode >>> 16) === 0x66) has_66 = true;
-        if((e.opcode >>> 16) === 0xF2) has_F2 = true;
-        if((e.opcode >>> 16) === 0xF3) has_F3 = true;
+        if((e.opcode >>> 16) === 0x66) has_66.push(e);
+        else if((e.opcode >>> 16) === 0xF2) has_F2.push(e);
+        else if((e.opcode >>> 16) === 0xF3) has_F3.push(e);
+        else no_prefix.push(e);
     }
 
-    if(has_66 || has_F2 || has_F3)
+    if(has_66.length || has_F2.length || has_F3.length)
     {
         console.assert((encoding.opcode & 0xFF00) === 0x0F00);
-        // Leaving unsupported because:
-        // 1. Instructions that use these prefixes are usually faulting
-        // 2. It would need a refactor to allow us to pass the correct prefixed encoding object to
-        // where the nonfaulting flags are set
-        console.assert(
-            !encodings.some(e => e.nonfaulting),
-            "Unsupported: instruction with 66/f2/f3 prefix marked as nonfaulting. Opcode: 0x" + hex(encoding.opcode)
-        );
     }
 
-    const instruction_postfix = encoding.block_boundary ? ["instr_flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [];
+    const code = [];
 
-    // May be overridden for custom encodings
-    const gen_call_fns = {
-        mem_call_fn: gen_codegen_call_modrm,
-        reg_call_fn: gen_codegen_call,
-    };
+    if(encoding.e)
+    {
+        code.push("let modrm_byte = ctx.cpu.read_imm8();");
+    }
+
+    if(has_66.length || has_F2.length || has_F3.length)
+    {
+        const if_blocks = [];
+
+        if(has_66.length) {
+            const body = gen_instruction_body_after_prefix(has_66, size);
+            if_blocks.push({ condition: "ctx.cpu.prefixes & ::prefix::PREFIX_66 != 0", body, });
+        }
+        if(has_F2.length) {
+            const body = gen_instruction_body_after_prefix(has_F2, size);
+            if_blocks.push({ condition: "ctx.cpu.prefixes & ::prefix::PREFIX_F2 != 0", body, });
+        }
+        if(has_F3.length) {
+            const body = gen_instruction_body_after_prefix(has_F3, size);
+            if_blocks.push({ condition: "ctx.cpu.prefixes & ::prefix::PREFIX_F3 != 0", body, });
+        }
+
+        const else_block = {
+            body: gen_instruction_body_after_prefix(no_prefix, size),
+        };
+
+        return [].concat(
+            code,
+            {
+                type: "if-else",
+                if_blocks,
+                else_block,
+            }
+        );
+    }
+    else {
+        return [].concat(
+            code,
+            gen_instruction_body_after_prefix(encodings, size)
+        );
+    }
+}
+
+function gen_instruction_body_after_prefix(encodings, size)
+{
+    const encoding = encodings[0];
 
     if(encoding.fixed_g !== undefined)
     {
+        console.assert(encoding.e);
+
         // instruction with modrm byte where the middle 3 bits encode the instruction
 
         // group by opcode without prefix plus middle bits of modrm byte
@@ -204,171 +202,57 @@ function gen_instruction_body(encodings, size)
         cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
 
         return [
-            "int32_t modrm_byte = read_imm8();",
             {
                 type: "switch",
                 condition: "modrm_byte >> 3 & 7",
                 cases: cases.map(case_ => {
                     const fixed_g = case_.fixed_g;
-                    let instruction_name = make_instruction_name(case_, size, undefined);
-                    const instruction_postfix = case_.block_boundary ? ["instr_flags |=  JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [];
-
-                    const mem_args = [];
-                    const reg_args = ["modrm_byte & 7"];
-
-                    const imm_read = gen_read_imm_call(case_, size);
-                    if(imm_read)
-                    {
-                        mem_args.push(imm_read);
-                        reg_args.push(imm_read);
-                    }
-
-                    if(case_.custom)
-                    {
-                        console.assert(!case_.nonfaulting, "Unsupported: custom fixed_g instruction as nonfaulting");
-                        instruction_name += "_jit";
-                        mem_args.push("modrm_byte");
-                        gen_call_fns.mem_call_fn = gen_call;
-                        gen_call_fns.reg_call_fn = gen_call;
-                    }
-
-                    if(has_66 || has_F2 || has_F3)
-                    {
-                        const if_blocks = [];
-
-                        if(has_66) {
-                            const name = make_instruction_name(case_, size, 0x66);
-                            const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
-                            if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
-                        }
-                        if(has_F2) {
-                            const name = make_instruction_name(case_, size, 0xF2);
-                            const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
-                            if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
-                        }
-                        if(has_F3) {
-                            const name = make_instruction_name(case_, size, 0xF3);
-                            const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
-                            if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
-                        }
-
-                        const else_block = {
-                            body: [
-                                gen_modrm_mem_reg_split(
-                                    instruction_name,
-                                    gen_call_fns,
-                                    mem_args,
-                                    reg_args,
-                                    {}
-                                )
-                            ],
-                        };
-
-                        return {
-                            conditions: [fixed_g],
-                            body: [
-                                "int32_t prefixes_ = *prefixes;",
-                                {
-                                    type: "if-else",
-                                    if_blocks,
-                                    else_block,
-                                },
-                            ].concat(instruction_postfix),
-                        };
-                    }
-                    else
-                    {
-                        const body = [
-                            gen_modrm_mem_reg_split(
-                                instruction_name,
-                                gen_call_fns,
-                                mem_args,
-                                reg_args,
-                                get_nonfaulting_mem_reg_postfix(case_)
-                            )
-                        ].concat(instruction_postfix);
-
-                        return {
-                            conditions: [fixed_g],
-                            body,
-                        };
-                    }
+                    const body = gen_instruction_body_after_fixed_g(case_, size);
+
+                    return {
+                        conditions: [fixed_g],
+                        body,
+                    };
                 }),
 
                 default_case: {
-                    body: [
-                        gen_codegen_call("trigger_ud"),
-                    ],
+                    body: [].concat(
+                        gen_call(`::codegen::gen_fn0_const`, ["ctx", '"trigger_ud"'])
+                    ),
                 }
             },
-        ].concat(instruction_postfix);
+        ];
     }
-    else if(has_66 || has_F2 || has_F3)
-    {
-        // instruction without modrm byte but with prefix
-
-        console.assert(encoding.e);
-        console.assert(!encoding.ignore_mod);
-
-        const imm_read = gen_read_imm_call(encoding, size);
-
-        const mem_args = ["modrm_byte >> 3 & 7"];
-        const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
-
-        if(imm_read)
-        {
-            mem_args.push(imm_read);
-            reg_args.push(imm_read);
-        }
+    else {
+        console.assert(encodings.length === 1);
+        return gen_instruction_body_after_fixed_g(encodings[0], size);
+    }
+}
 
-        const if_blocks = [];
+function gen_instruction_body_after_fixed_g(encoding, size)
+{
+    const instruction_postfix = [];
 
-        if(has_66) {
-            const name = make_instruction_name(encoding, size, 0x66);
-            const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
-            if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
-        }
-        if(has_F2) {
-            const name = make_instruction_name(encoding, size, 0xF2);
-            const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
-            if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
-        }
-        if(has_F3) {
-            const name = make_instruction_name(encoding, size, 0xF3);
-            const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
-            if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
-        }
+    if(encoding.block_boundary)
+    {
+        instruction_postfix.push("*instr_flags |= ::jit::JIT_INSTR_BLOCK_BOUNDARY_FLAG;");
+    }
 
-        const else_block = {
-            body: [
-                gen_modrm_mem_reg_split(
-                    make_instruction_name(encoding, size),
-                    gen_call_fns,
-                    mem_args,
-                    reg_args,
-                    {}
-                )
-            ],
-        };
+    const APPEND_NONFAULTING_FLAG = "*instr_flags |= ::jit::JIT_INSTR_NONFAULTING_FLAG;";
 
-        return [
-            "int32_t modrm_byte = read_imm8();",
-            "int32_t prefixes_ = *prefixes;",
-            {
-                type: "if-else",
-                if_blocks,
-                else_block,
-            }
-        ].concat(instruction_postfix);
-    }
-    else if(encoding.fixed_g === undefined && encoding.e)
+    const imm_read = gen_read_imm_call(encoding, size);
+    const imm_read_bindings = [];
+    if(imm_read)
     {
-        // instruction with modrm byte where the middle 3 bits encode a register
+        imm_read_bindings.push(`let imm = ${imm_read} as u32;`);
+    }
 
-        console.assert(encodings.length === 1);
+    const instruction_name = make_instruction_name(encoding, size);
 
-        const instruction_name = make_instruction_name(encoding, size);
-        const imm_read = gen_read_imm_call(encoding, size);
+    if(encoding.e)
+    {
+        const reg_postfix = encoding.nonfaulting ? [APPEND_NONFAULTING_FLAG] : [];
+        const mem_postfix = encoding.memory_nonfaulting ? [APPEND_NONFAULTING_FLAG] : [];
 
         if(encoding.ignore_mod)
         {
@@ -376,113 +260,158 @@ function gen_instruction_body(encodings, size)
 
             // Has modrm byte, but the 2 mod bits are ignored and both
             // operands are always registers (0f20-0f24)
+            const args = ["ctx", `"${instruction_name}"`, "(modrm_byte & 7) as u32", "(modrm_byte >> 3 & 7) as u32"];
+
+            return [].concat(
+                gen_call(`::codegen::gen_fn${args.length - 2}_const`, args),
+                reg_postfix,
+                instruction_postfix
+            );
+        }
+        else if(encoding.custom)
+        {
+            const mem_args = ["ctx", "modrm_byte"];
+            const reg_args = ["ctx", "(modrm_byte & 7) as u32"];
 
-            if(encoding.nonfaulting)
+            if(encoding.fixed_g === undefined)
             {
-                instruction_postfix.push(APPEND_NONFAULTING_FLAG);
+                mem_args.push("(modrm_byte >> 3 & 7) as u32");
+                reg_args.push("(modrm_byte >> 3 & 7) as u32");
             }
 
-            return [
-                "int32_t modrm_byte = read_imm8();",
-                gen_codegen_call(instruction_name, ["modrm_byte & 7", "modrm_byte >> 3 & 7"]),
-            ].concat(instruction_postfix);
-        }
-        else if(encoding.opcode === 0x8D) // lea
-        {
-            const mem_args = ["modrm_byte"];
-            const reg_args = ["0", "0"];
-            gen_call_fns.mem_call_fn = gen_custom_jit_call;
-            return [
-                "int32_t modrm_byte = read_imm8();",
-                gen_modrm_mem_reg_split(
-                    instruction_name,
-                    gen_call_fns,
-                    mem_args,
-                    reg_args,
-                    get_nonfaulting_mem_reg_postfix(encoding)
-                ),
-            ].concat(instruction_postfix);
+            if(imm_read)
+            {
+                mem_args.push("imm");
+                reg_args.push("imm");
+            }
+
+            return [].concat(
+                imm_read_bindings,
+                {
+                    type: "if-else",
+                    if_blocks: [{
+                        condition: "modrm_byte < 0xC0",
+                        body: [].concat(
+                            gen_call(`::jit_instructions::${instruction_name}_mem_jit`, mem_args),
+                            mem_postfix
+                        ),
+                    }],
+                    else_block: {
+                        body: [].concat(
+                            gen_call(`::jit_instructions::${instruction_name}_reg_jit`, reg_args),
+                            reg_postfix
+                        ),
+                    },
+                }
+            );
         }
         else
         {
-            const mem_args = ["modrm_byte >> 3 & 7"];
-            const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
+            const mem_args = ["ctx", `"${instruction_name}_mem"`];
+            const reg_args = ["ctx", `"${instruction_name}_reg"`, "(modrm_byte & 7) as u32"];
 
-            if(imm_read)
+            if(encoding.fixed_g === undefined)
             {
-                mem_args.push(imm_read);
-                reg_args.push(imm_read);
+                mem_args.push("(modrm_byte >> 3 & 7) as u32");
+                reg_args.push("(modrm_byte >> 3 & 7) as u32");
             }
 
-            if(encoding.custom)
+            if(imm_read)
             {
-                // The default mem_call_fn adds a modrm_resolve call, but since we override it,
-                // we also need to pass it in to our custom function to resolve it however it wishes
-                mem_args.unshift("modrm_byte");
-                gen_call_fns.mem_call_fn = gen_custom_jit_call;
-                gen_call_fns.reg_call_fn = gen_custom_jit_call;
+                mem_args.push("imm");
+                reg_args.push("imm");
             }
 
-            return [
-                "int32_t modrm_byte = read_imm8();",
-                gen_modrm_mem_reg_split(
-                    instruction_name,
-                    gen_call_fns,
-                    mem_args,
-                    reg_args,
-                    get_nonfaulting_mem_reg_postfix(encoding)
-                ),
-            ].concat(instruction_postfix);
+            return [].concat(
+                {
+                    type: "if-else",
+                    if_blocks: [{
+                        condition: "modrm_byte < 0xC0",
+                        body: [].concat(
+                            gen_call(`::codegen::gen_modrm_resolve`, ["ctx", "modrm_byte"]),
+                            imm_read_bindings,
+                            gen_call(`::codegen::gen_modrm_fn${mem_args.length - 2}`, mem_args),
+                            mem_postfix
+                        ),
+                    }],
+                    else_block: {
+                        body: [].concat(
+                            imm_read_bindings,
+                            gen_call(`::codegen::gen_fn${reg_args.length - 2}_const`, reg_args),
+                            reg_postfix
+                        ),
+                    },
+                },
+                instruction_postfix
+            );
         }
     }
     else if(encoding.prefix || encoding.custom)
     {
-        console.assert(!encoding.nonfaulting, "Prefix/custom instructions cannot be marked as nonfaulting.");
+        // custom, but not modrm
+
+        if(encoding.prefix)
+        {
+            console.assert(!encoding.nonfaulting, "Prefix instructions cannot be marked as nonfaulting.");
+        }
+
+        if(encoding.nonfaulting)
+        {
+            instruction_postfix.push(APPEND_NONFAULTING_FLAG);
+        }
 
-        const instruction_name = make_instruction_name(encoding, size) + "_jit";
-        const imm_read = gen_read_imm_call(encoding, size);
-        const args = [];
+        const args = ["ctx"];
 
         if(imm_read)
         {
-            args.push(imm_read);
+            args.push("imm");
+        }
+
+        if(encoding.prefix)
+        {
+            args.push("instr_flags");
         }
 
-        const call_prefix = encoding.prefix ? "instr_flags |= " : "";
-        // Prefix calls can add to the return flags
-        return [call_prefix + gen_call(instruction_name, args)].concat(instruction_postfix);
+        return [].concat(
+            imm_read_bindings,
+            gen_call(`::jit_instructions::${instruction_name}_jit`, args),
+            instruction_postfix
+        );
     }
     else
     {
         // instruction without modrm byte or prefix
 
-        const imm_read = gen_read_imm_call(encoding, size);
-        const instruction_name = make_instruction_name(encoding, size);
+        if(encoding.nonfaulting)
+        {
+            instruction_postfix.push(APPEND_NONFAULTING_FLAG);
+        }
 
-        const args = [];
+        const args = ["ctx", `"${instruction_name}"`];
 
         if(imm_read)
         {
-            args.push(imm_read);
+            args.push("imm");
         }
 
         if(encoding.extra_imm16)
         {
             console.assert(imm_read);
-            args.push("read_imm16()");
+            imm_read_bindings.push(`let imm2 = ctx.cpu.read_imm16() as u32;`);
+            args.push("imm2");
         }
         else if(encoding.extra_imm8)
         {
             console.assert(imm_read);
-            args.push("read_imm8()");
+            imm_read_bindings.push(`let imm2 = ctx.cpu.read_imm8() as u32;`);
+            args.push("imm2");
         }
 
-        if(encoding.nonfaulting)
-        {
-            instruction_postfix.push(APPEND_NONFAULTING_FLAG);
-        }
-
-        return [gen_codegen_call(instruction_name, args)].concat(instruction_postfix);
+        return [].concat(
+            imm_read_bindings,
+            gen_call(`::codegen::gen_fn${args.length - 2}_const`, args),
+            instruction_postfix
+        );
     }
 }
 
@@ -491,7 +420,7 @@ function gen_table()
     let by_opcode = Object.create(null);
     let by_opcode0f = Object.create(null);
 
-    for(let o of encodings)
+    for(let o of x86_table)
     {
         let opcode = o.opcode;
 
@@ -518,6 +447,7 @@ function gen_table()
         console.assert(encoding && encoding.length);
 
         let opcode_hex = hex(opcode, 2);
+        let opcode_high_hex = hex(opcode | 0x100, 2);
 
         if(encoding[0].os)
         {
@@ -526,14 +456,14 @@ function gen_table()
                 body: gen_instruction_body(encoding, 16),
             });
             cases.push({
-                conditions: [`0x${opcode_hex}|0x100`],
+                conditions: [`0x${opcode_high_hex}`],
                 body: gen_instruction_body(encoding, 32),
             });
         }
         else
         {
             cases.push({
-                conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
+                conditions: [`0x${opcode_hex}`, `0x${opcode_high_hex}`],
                 body: gen_instruction_body(encoding, undefined),
             });
         }
@@ -543,16 +473,23 @@ function gen_table()
         condition: "opcode",
         cases,
         default_case: {
-            body: ["assert(false);"]
+            body: ["assert!(false);"]
         },
     };
 
     if(to_generate.jit)
     {
-        finalize_table(
+        const code = [
+            "#[cfg_attr(rustfmt, rustfmt_skip)]",
+            "pub fn jit(opcode: u32, ctx: &mut ::jit::JitContext, instr_flags: &mut u32) {",
+            table,
+            "}",
+        ];
+
+        finalize_table_rust(
             OUT_DIR,
-            "jit",
-            c_ast.print_syntax_tree([table]).join("\n") + "\n"
+            "jit.rs",
+            rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
         );
     }
 
@@ -593,7 +530,7 @@ function gen_table()
         condition: "opcode",
         cases: cases0f_16,
         default_case: {
-            body: ["assert(false);"]
+            body: ["assert!(false);"]
         },
     };
     const table0f_32 = {
@@ -601,25 +538,39 @@ function gen_table()
         condition: "opcode",
         cases: cases0f_32,
         default_case: {
-            body: ["assert(false);"]
+            body: ["assert!(false);"]
         },
     };
 
     if(to_generate.jit0f_16)
     {
-        finalize_table(
+        const code = [
+            "#[cfg_attr(rustfmt, rustfmt_skip)]",
+            "pub fn jit(opcode: u8, ctx: &mut ::jit::JitContext, instr_flags: &mut u32) {",
+            table0f_16,
+            "}",
+        ];
+
+        finalize_table_rust(
             OUT_DIR,
-            "jit0f_16",
-            c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n"
+            "jit0f_16.rs",
+            rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
         );
     }
 
     if(to_generate.jit0f_32)
     {
-        finalize_table(
+        const code = [
+            "#[cfg_attr(rustfmt, rustfmt_skip)]",
+            "pub fn jit(opcode: u8, ctx: &mut ::jit::JitContext, instr_flags: &mut u32) {",
+            table0f_32,
+            "}",
+        ];
+
+        finalize_table_rust(
             OUT_DIR,
-            "jit0f_32",
-            c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n"
+            "jit0f_32.rs",
+            rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
         );
     }
 }

+ 88 - 0
gen/rust_ast.js

@@ -0,0 +1,88 @@
+"use strict";
+
+function repeat(s, n)
+{
+    let out = "";
+    for(let i = 0; i < n; i++) out += s;
+    return out;
+}
+
+function indent(lines, how_much)
+{
+    return lines.map(line => repeat(" ", how_much) + line);
+}
+
+function print_syntax_tree(statements)
+{
+    let code = [];
+
+    for(let statement of statements)
+    {
+        if(typeof statement === "string")
+        {
+            code.push(statement);
+        }
+        else if(statement.type === "switch")
+        {
+            console.assert(statement.condition);
+
+            const cases = [];
+
+            for(let case_ of statement.cases)
+            {
+                console.assert(case_.conditions.length >= 1);
+
+                cases.push(case_.conditions.join(" | ") + " => {");
+                cases.push.apply(cases, indent(print_syntax_tree(case_.body), 4));
+                cases.push(`},`);
+            }
+
+            if(statement.default_case)
+            {
+                cases.push(`_ => {`);
+                cases.push.apply(cases, indent(print_syntax_tree(statement.default_case.body), 4));
+                cases.push(`}`);
+            }
+
+            code.push(`match ${statement.condition} {`);
+            code.push.apply(code, indent(cases, 4));
+            code.push(`}`);
+        }
+        else if(statement.type === "if-else")
+        {
+            console.assert(statement.if_blocks.length >= 1);
+
+            let first_if_block = statement.if_blocks[0];
+
+            code.push(`if ${first_if_block.condition} {`);
+            code.push.apply(code, indent(print_syntax_tree(first_if_block.body), 4));
+            code.push(`}`);
+
+            for(let i = 1; i < statement.if_blocks.length; i++)
+            {
+                let if_block = statement.if_blocks[i];
+
+                code.push(`else if ${if_block.condition} {`);
+                code.push.apply(code, indent(print_syntax_tree(if_block.body), 4));
+                code.push(`}`);
+            }
+
+            if(statement.else_block)
+            {
+                code.push(`else {`);
+                code.push.apply(code, indent(print_syntax_tree(statement.else_block.body), 4));
+                code.push(`}`);
+            }
+        }
+        else
+        {
+            console.assert(false, "Unexpected type: " + statement.type, "In:", statement);
+        }
+    }
+
+    return code;
+}
+
+module.exports = {
+    print_syntax_tree,
+};

+ 10 - 0
gen/util.js

@@ -74,10 +74,20 @@ function finalize_table(out_dir, name, contents)
     console.log(CYAN_FMT, `[+] Wrote table ${name}. Remember to check ${diff_file_path}`);
 }
 
+function finalize_table_rust(out_dir, name, contents)
+{
+    const file_path = path.join(out_dir, name);
+
+    fs.writeFileSync(file_path, contents);
+
+    console.log(CYAN_FMT, `[+] Wrote table ${name}.`);
+}
+
 module.exports = {
     hex,
     mkdirpSync,
     get_switch_value,
     get_switch_exist,
     finalize_table,
+    finalize_table_rust,
 };

+ 23 - 18
gen/x86_table.js

@@ -98,7 +98,7 @@ const encodings = [
     { opcode: 0x8B, custom: 1, nonfaulting: 1, os: 1, e: 1, },
 
     { opcode: 0x8C, os: 1, e: 1, skip: 1, },
-    { opcode: 0x8D, nonfaulting: 1, os: 1, e: 1, only_mem: 1, requires_prefix_call: 1, custom: 1, }, // lea
+    { opcode: 0x8D, memory_nonfaulting: 1, os: 1, e: 1, only_mem: 1, requires_prefix_call: 1, custom: 1, }, // lea
     { opcode: 0x8E, block_boundary: 1, e: 1, skip: 1, }, // mov sreg
     { opcode: 0x8F, os: 1, e: 1, fixed_g: 0, requires_prefix_call: 1, custom: 1, }, // pop r/m
 
@@ -196,23 +196,23 @@ const encodings = [
     { opcode: 0xF4, block_boundary: 1, no_next_instruction: 1, skip: 1, }, // hlt
     { opcode: 0xF5, nonfaulting: 1, },
 
-    { opcode: 0xF6, fixed_g: 0, nonfaulting: 1, imm8: 1, },
-    { opcode: 0xF6, fixed_g: 1, nonfaulting: 1, imm8: 1, },
-    { opcode: 0xF6, fixed_g: 2, nonfaulting: 1, },
-    { opcode: 0xF6, fixed_g: 3, nonfaulting: 1, },
-    { opcode: 0xF6, fixed_g: 4, nonfaulting: 1, mask_flags: af | zf, },
-    { opcode: 0xF6, fixed_g: 5, nonfaulting: 1, mask_flags: af | zf, },
-    { opcode: 0xF6, fixed_g: 6, },
-    { opcode: 0xF6, fixed_g: 7, },
-
-    { opcode: 0xF7, os: 1, fixed_g: 0, nonfaulting: 1, imm1632: 1, },
-    { opcode: 0xF7, os: 1, fixed_g: 1, nonfaulting: 1, imm1632: 1, },
-    { opcode: 0xF7, os: 1, fixed_g: 2, nonfaulting: 1, },
-    { opcode: 0xF7, os: 1, fixed_g: 3, nonfaulting: 1, },
-    { opcode: 0xF7, os: 1, fixed_g: 4, nonfaulting: 1, mask_flags: zf | af, },
-    { opcode: 0xF7, os: 1, fixed_g: 5, nonfaulting: 1, mask_flags: zf | af, },
-    { opcode: 0xF7, os: 1, fixed_g: 6, },
-    { opcode: 0xF7, os: 1, fixed_g: 7, },
+    { opcode: 0xF6, e: 1, fixed_g: 0, nonfaulting: 1, imm8: 1, },
+    { opcode: 0xF6, e: 1, fixed_g: 1, nonfaulting: 1, imm8: 1, },
+    { opcode: 0xF6, e: 1, fixed_g: 2, nonfaulting: 1, },
+    { opcode: 0xF6, e: 1, fixed_g: 3, nonfaulting: 1, },
+    { opcode: 0xF6, e: 1, fixed_g: 4, nonfaulting: 1, mask_flags: af | zf, },
+    { opcode: 0xF6, e: 1, fixed_g: 5, nonfaulting: 1, mask_flags: af | zf, },
+    { opcode: 0xF6, e: 1, fixed_g: 6, },
+    { opcode: 0xF6, e: 1, fixed_g: 7, },
+
+    { opcode: 0xF7, os: 1, e: 1, fixed_g: 0, nonfaulting: 1, imm1632: 1, },
+    { opcode: 0xF7, os: 1, e: 1, fixed_g: 1, nonfaulting: 1, imm1632: 1, },
+    { opcode: 0xF7, os: 1, e: 1, fixed_g: 2, nonfaulting: 1, },
+    { opcode: 0xF7, os: 1, e: 1, fixed_g: 3, nonfaulting: 1, },
+    { opcode: 0xF7, os: 1, e: 1, fixed_g: 4, nonfaulting: 1, mask_flags: zf | af, },
+    { opcode: 0xF7, os: 1, e: 1, fixed_g: 5, nonfaulting: 1, mask_flags: zf | af, },
+    { opcode: 0xF7, os: 1, e: 1, fixed_g: 6, },
+    { opcode: 0xF7, os: 1, e: 1, fixed_g: 7, },
 
     { opcode: 0xF8, nonfaulting: 1, },
     { opcode: 0xF9, nonfaulting: 1, },
@@ -408,6 +408,7 @@ const encodings = [
     { opcode: 0x0FB7, nonfaulting: 1, os: 1, e: 1, },
 
     { opcode: 0xF30FB8, os: 1, e: 1 }, // popcnt
+    { opcode: 0x0FB8, os: 1, e: 1, }, // ud
 
     { opcode: 0x0FB9, }, // ud2
 
@@ -519,7 +520,9 @@ const encodings = [
     { opcode: 0x660F6B, e: 1 },
     { opcode: 0x0F6B, e: 1 },
     { opcode: 0x660F6C, e: 1 },
+    { opcode: 0x0F6C, e: 1, }, // ud
     { opcode: 0x660F6D, e: 1 },
+    { opcode: 0x0F6D, e: 1, }, // ud
     { opcode: 0x660F6E, e: 1 },
     { opcode: 0x0F6E, e: 1 },
     { opcode: 0xF30F6F, e: 1 },
@@ -601,6 +604,7 @@ const encodings = [
     { opcode: 0x660FD6, e: 1 },
     { opcode: 0xF20FD6, only_reg: 1, e: 1 },
     { opcode: 0xF30FD6, only_reg: 1, e: 1 },
+    { opcode: 0x0FD6, e: 1, }, // ud
     { opcode: 0x0FD7, e: 1, only_reg: 1 },
     { opcode: 0x660FD7, e: 1, only_reg: 1, },
 
@@ -637,6 +641,7 @@ const encodings = [
     { opcode: 0x660FE6, e: 1, skip: 1, },
     { opcode: 0xF20FE6, e: 1, skip: 1, },
     { opcode: 0xF30FE6, e: 1, skip: 1, },
+    { opcode: 0x0FE6, e: 1, }, // ud
     { opcode: 0x0FE7, e: 1, only_mem: 1 },
     { opcode: 0x660FE7, e: 1, only_mem: 1, },
 

+ 4 - 1
src/browser/lib.js

@@ -113,10 +113,13 @@ var ASYNC_SAFE = false;
             WebAssembly.compile(buffer)
                 .then(module => {
                     const dylink = v86util.decode_dylink(module);
-                    const total_mem_pages = Math.ceil(
+                    let total_mem_pages = Math.ceil(
                         (dylink.memory_size + memory_size) / WASM_PAGE_SIZE
                     );
 
+                    // emscripten seems to require a minimum of 256 pages (16 MB)
+                    total_mem_pages = Math.max(256, total_mem_pages);
+
                     try
                     {
                         imports["env"]["memory"] = new WebAssembly.Memory({

+ 19 - 19
src/browser/print_stats.js

@@ -24,23 +24,23 @@ const print_stats = {
             "RUN_INTERPRETED",
             "RUN_INTERPRETED_PENDING",
             "RUN_INTERPRETED_NEAR_END_OF_PAGE",
-            "RUN_INTERPRETED_NOT_HOT",
+            "RUN_INTERPRETED_DIFFERENT_STATE",
             "RUN_INTERPRETED_STEPS",
             "RUN_FROM_CACHE",
             "RUN_FROM_CACHE_STEPS",
             "TRIGGER_CPU_EXCEPTION",
-            "S_SAFE_READ32_FAST",
-            "S_SAFE_READ32_SLOW_PAGE_CROSSED",
-            "S_SAFE_READ32_SLOW_NOT_VALID",
-            "S_SAFE_READ32_SLOW_NOT_USER",
-            "S_SAFE_READ32_SLOW_IN_MAPPED_RANGE",
-            "S_SAFE_WRITE32_FAST",
-            "S_SAFE_WRITE32_SLOW_PAGE_CROSSED",
-            "S_SAFE_WRITE32_SLOW_NOT_VALID",
-            "S_SAFE_WRITE32_SLOW_NOT_USER",
-            "S_SAFE_WRITE32_SLOW_IN_MAPPED_RANGE",
-            "S_SAFE_WRITE32_SLOW_READ_ONLY",
-            "S_SAFE_WRITE32_SLOW_HAS_CODE",
+            "SAFE_READ32_FAST",
+            "SAFE_READ32_SLOW_PAGE_CROSSED",
+            "SAFE_READ32_SLOW_NOT_VALID",
+            "SAFE_READ32_SLOW_NOT_USER",
+            "SAFE_READ32_SLOW_IN_MAPPED_RANGE",
+            "SAFE_WRITE32_FAST",
+            "SAFE_WRITE32_SLOW_PAGE_CROSSED",
+            "SAFE_WRITE32_SLOW_NOT_VALID",
+            "SAFE_WRITE32_SLOW_NOT_USER",
+            "SAFE_WRITE32_SLOW_IN_MAPPED_RANGE",
+            "SAFE_WRITE32_SLOW_READ_ONLY",
+            "SAFE_WRITE32_SLOW_HAS_CODE",
             "DO_RUN",
             "DO_MANY_CYCLES",
             "CYCLE_INTERNAL",
@@ -68,8 +68,8 @@ const print_stats = {
         text += "\n";
 
         text += "TLB_ENTRIES=" + cpu.wm.exports["_get_valid_tlb_entries_count"]() + "\n";
-        text += "CACHE_UNUSED=" + cpu.wm.exports["_jit_unused_cache_stat"]() + "\n";
-        text += "WASM_TABLE_FREE=" + cpu.wm.exports["_get_wasm_table_index_free_list_count"]() + "\n";
+        text += "CACHE_UNUSED=" + cpu.v86oxide.exports["jit_unused_cache_stat"]() + "\n";
+        text += "WASM_TABLE_FREE=" + cpu.v86oxide.exports["jit_get_wasm_table_index_free_list_count"]() + "\n";
 
         text += "do_many_cycles avg: " + do_many_cycles_total / do_many_cycles_count + "\n";
 
@@ -86,7 +86,7 @@ const print_stats = {
 
         for(let i = 0; i < JIT_CACHE_ARRAY_SIZE; i++)
         {
-            const address = cpu.wm.exports["_jit_get_entry_address"](i);
+            const address = cpu.v86oxide.exports["jit_get_entry_address"](i);
 
             if(address !== 0)
             {
@@ -117,10 +117,10 @@ const print_stats = {
         let pending_count = 0;
         const histogram = Object.create(null);
 
-        for(let i = 0; i < 0x10000; i++)
+        for(let i = 0; i < JIT_CACHE_ARRAY_SIZE; i++)
         {
-            const length = cpu.wm.exports["_jit_get_entry_length"](i);
-            pending_count += cpu.wm.exports["_jit_get_entry_pending"](i);
+            const length = cpu.v86oxide.exports["jit_get_entry_length"](i);
+            pending_count += cpu.v86oxide.exports["jit_get_entry_pending"](i);
             histogram[length] = (histogram[length] || 0) + 1;
         }
 

+ 26 - 60
src/browser/starter.js

@@ -136,7 +136,6 @@ function V86Starter(options)
         "_cpu_exception_hook": (n) => {
             return this["cpu_exception_hook"] && this["cpu_exception_hook"](n);
         },
-        "_jit_clear_func": function(index) { return cpu.jit_clear_func(index); },
         "_hlt_op": function() { return cpu.hlt_op(); },
         "abort": function() { dbg_assert(false); },
         "__dbg_trace": function() { return dbg_trace(); },
@@ -224,16 +223,9 @@ function V86Starter(options)
         },
         "_get_time": Date.now,
 
-        "_codegen_finalize": (wasm_table_index, start, end, first_opcode, state_flags) => {
-            cpu.codegen_finalize(wasm_table_index, start, end, first_opcode, state_flags);
-        },
         "_coverage_log": (fn_name_offset, num_blocks, visited_block) => {
             coverage_logger.log(fn_name_offset, num_blocks, visited_block);
         },
-        "_log_uncompiled_code": (start, end) => cpu.log_uncompiled_code(start, end),
-        "_dump_function_code": (blocks_ptr, count, end) => {
-            cpu.dump_function_code(blocks_ptr, count, end);
-        },
 
         // see https://github.com/kripken/emscripten/blob/incoming/src/library.js
         "_atan2": Math.atan2,
@@ -260,7 +252,7 @@ function V86Starter(options)
         "NaN": NaN,
     };
 
-    const v86oxide_mem = new WebAssembly.Memory({ "initial": 100 });
+    const v86oxide_mem = new WebAssembly.Memory({ "initial": 250 });
     const v86oxide_externs = {
         "memory": v86oxide_mem,
         "log_from_wasm": function(offset, len) {
@@ -270,6 +262,14 @@ function V86Starter(options)
         "abort": function() {
             dbg_assert(false);
         },
+
+        "read8": addr => cpu.read8(addr),
+        "read16": addr => cpu.read16(addr),
+        "read32": addr => cpu.read32s(addr),
+        "tlb_set_has_code": (page, has_code) => cpu.wm.exports["_tlb_set_has_code"](page, has_code),
+        "check_tlb_invariants": () => cpu.wm.exports["_check_tlb_invariants"](),
+        "codegen_finalize": (wasm_table_index, start, end, first_opcode, state_flags) => cpu.codegen_finalize(wasm_table_index, start, end, first_opcode, state_flags),
+        "profiler_stat_increment": (name) => cpu.wm.exports["_profiler_stat_increment"](name),
     };
 
     let wasm_file = DEBUG ? "v86-debug.wasm" : "v86.wasm";
@@ -287,56 +287,22 @@ function V86Starter(options)
     }
 
     const v86oxide_exports = [
-        "wg_get_code_section",
-        "wg_get_instruction_body",
-        "wg_commit_instruction_body_to_cs",
-        "wg_finish",
-        "wg_reset",
-        "wg_get_fn_idx",
-
-        "wg_push_i32",
-        "wg_push_u32",
-        "wg_load_aligned_u16",
-        "wg_load_aligned_i32",
-        "wg_store_aligned_u16",
-        "wg_store_aligned_i32",
-        "wg_add_i32",
-        "wg_and_i32",
-        "wg_or_i32",
-        "wg_shl_i32",
-        "wg_call_fn",
-        "wg_call_fn_with_arg",
-        "wg_eq_i32",
-        "wg_ne_i32",
-        "wg_le_i32",
-        "wg_lt_i32",
-        "wg_ge_i32",
-        "wg_gt_i32",
-        "wg_if_i32",
-        "wg_block_i32",
-        "wg_tee_local",
-        "wg_xor_i32",
-        "wg_load_unaligned_i32_from_stack",
-        "wg_load_aligned_i32_from_stack",
-        "wg_store_unaligned_i32",
-        "wg_shr_u32",
-        "wg_shr_i32",
-        "wg_eqz_i32",
-        "wg_if_void",
-        "wg_else",
-        "wg_loop_void",
-        "wg_block_void",
-        "wg_block_end",
-        "wg_return",
-        "wg_drop",
-        "wg_brtable_and_cases",
-        "wg_br",
-        "wg_get_local",
-        "wg_set_local",
-        "wg_unreachable",
-        "wg_increment_mem32",
-        "wg_increment_variable",
-        "wg_load_aligned_u16_from_stack",
+        // For C:
+        "jit_get_entry_pending",
+        "jit_get_entry_address",
+        "jit_get_entry_length",
+        "jit_unused_cache_stat",
+        "jit_dirty_cache_single",
+        "jit_dirty_cache_small",
+        "jit_page_has_code",
+        "jit_increase_hotness_and_maybe_compile",
+        "jit_find_cache_entry",
+
+        // For JS:
+        "jit_empty_cache",
+        "codegen_finalize_finished",
+        "rust_setup",
+        "jit_dirty_cache",
     ];
 
     v86util.minimal_load_wasm(v86oxide_bin, { "env": v86oxide_externs }, (v86oxide) => {
@@ -345,7 +311,7 @@ function V86Starter(options)
             dbg_assert(typeof v86oxide.exports[fn_name] === "function", `Function ${fn_name} not found in v86oxide exports`);
             wasm_shared_funcs[`_${fn_name}`] = v86oxide.exports[fn_name];
         }
-        v86oxide.exports["wg_setup"]();
+        v86oxide.exports["rust_setup"]();
 
     //XXX: fix indentation break
 

+ 1 - 1
src/config.js

@@ -12,7 +12,7 @@ var DEBUG = true;
 /** @const
  * Also needs to be set in config.h
  */
-var ENABLE_PROFILER = true;
+var ENABLE_PROFILER = false;
 
 /** @const */
 var LOG_TO_FILE = false;

+ 1 - 1
src/const.js

@@ -367,7 +367,7 @@ const WASM_EXPORT_TABLE_NAME = "table";
 
 /** @const */
 // The space we need for misc internal state before the beginning of mem8; see global_pointers.h
-const GUEST_MEMORY_START = 0x400000 + 0x100000 * 8;
+const GUEST_MEMORY_START = 0x800000;
 
 /** @const */
 const WASM_PAGE_SIZE = 64 * 1024;

+ 29 - 16
src/cpu.js

@@ -208,7 +208,7 @@ function CPU(bus, wm, v86oxide, coverage_logger)
 
     this.update_operand_size();
 
-    wm.exports["_set_tsc"](0, 0);
+    this.set_tsc(0, 0);
 
     this.debug_init();
 
@@ -217,8 +217,8 @@ function CPU(bus, wm, v86oxide, coverage_logger)
 
 CPU.prototype.wasmgen_get_module_code = function()
 {
-    const ptr = this.v86oxide.exports["wg_get_op_ptr"]();
-    const len = this.v86oxide.exports["wg_get_op_len"]();
+    const ptr = this.jit_get_op_ptr();
+    const len = this.jit_get_op_len();
 
     const output_buffer_view = new Uint8Array(this.v86oxide.memory.buffer, ptr, len);
     return output_buffer_view;
@@ -300,7 +300,25 @@ CPU.prototype.wasm_patch = function(wm)
     this.clear_tlb = this.wm.exports["_clear_tlb"];
     this.full_clear_tlb = this.wm.exports["_full_clear_tlb"];
 
-    this.jit_force_generate_unsafe = this.wm.exports["_jit_force_generate_unsafe"];
+    this.set_tsc = this.wm.exports["_set_tsc"];
+    this.store_current_tsc = this.wm.exports["_store_current_tsc"];
+
+    this.pack_current_state_flags = this.wm.exports["_pack_current_state_flags"];
+
+    this.jit_force_generate_unsafe = this.v86oxide.exports["jit_force_generate_unsafe"];
+    this.jit_empty_cache = this.v86oxide.exports["jit_empty_cache"];
+    this.jit_dirty_cache = this.v86oxide.exports["jit_dirty_cache"];
+    this.codegen_finalize_finished = this.v86oxide.exports["codegen_finalize_finished"];
+
+    this.jit_get_op_ptr = this.v86oxide.exports["jit_get_op_ptr"];
+    this.jit_get_op_len = this.v86oxide.exports["jit_get_op_len"];
+};
+
+CPU.prototype.jit_force_generate = function(addr)
+{
+    const cs_offset = this.get_seg(reg_cs);
+    const state_flags = this.pack_current_state_flags();
+    this.jit_force_generate_unsafe(addr, cs_offset, state_flags);
 };
 
 CPU.prototype.jit_clear_func = function(index)
@@ -353,7 +371,7 @@ CPU.prototype.get_state = function()
     state[41] = this.dreg;
     state[42] = this.mem8;
 
-    this.wm.exports["_store_current_tsc"]();
+    this.store_current_tsc();
     state[43] = this.current_tsc;
 
     state[45] = this.devices.virtio_9p;
@@ -440,7 +458,7 @@ CPU.prototype.set_state = function(state)
     this.dreg.set(state[41]);
     this.mem8.set(state[42]);
 
-    this.wm.exports["_set_tsc"](state[43][0], state[43][1]);
+    this.set_tsc(state[43][0], state[43][1]);
 
     this.devices.virtio_9p = state[45];
     this.devices.apic = state[46];
@@ -616,7 +634,7 @@ CPU.prototype.reset = function()
     this.last_op2.fill(0);
     this.last_op_size.fill(0);
 
-    this.wm.exports["_set_tsc"](0, 0);
+    this.set_tsc(0, 0);
 
     this.instruction_pointer[0] = 0xFFFF0;
     this.switch_cs_real_mode(0xF000);
@@ -631,7 +649,7 @@ CPU.prototype.reset = function()
 
     this.fw_value[0] = 0;
 
-    this.jit_empty_cache();
+    this.jit_clear_cache();
 };
 
 CPU.prototype.reset_memory = function()
@@ -1261,7 +1279,7 @@ CPU.prototype.codegen_finalize = function(wasm_table_index, start, end, first_op
     const result = WebAssembly.instantiate(code, { "e": jit_imports }).then(result => {
         const f = result.instance.exports["f"];
 
-        this.wm.exports["_codegen_finalize_finished"](
+        this.codegen_finalize_finished(
             wasm_table_index, start, end,
             first_opcode, state_flags);
 
@@ -1434,8 +1452,6 @@ CPU.prototype.set_cr0 = function(cr0)
     }
 
     this.protected_mode[0] = +((this.cr[0] & CR0_PE) === CR0_PE);
-
-    //this.jit_empty_cache();
 };
 
 CPU.prototype.set_cr4 = function(cr4)
@@ -1484,9 +1500,9 @@ CPU.prototype.cpl_changed = function()
     this.last_virt_esp[0] = -1;
 };
 
-CPU.prototype.jit_empty_cache = function()
+CPU.prototype.jit_clear_cache = function()
 {
-    this.wm.exports["_jit_empty_cache"]();
+    this.jit_empty_cache();
 
     const table = this.wm.imports["env"][WASM_EXPORT_TABLE_NAME];
 
@@ -3145,9 +3161,6 @@ CPU.prototype.update_cs_size = function(new_size)
 
     if(Boolean(this.is_32[0]) !== new_size)
     {
-        //dbg_log("clear instruction cache", LOG_CPU);
-        //this.jit_empty_cache();
-
         this.is_32[0] = +new_size;
         this.update_operand_size();
     }

+ 1 - 1
src/memory.js

@@ -63,7 +63,7 @@ CPU.prototype.write_blob = function(blob, offset)
     dbg_assert(!this.in_mapped_range(offset));
     dbg_assert(!this.in_mapped_range(offset + blob.length));
 
-    this.wm.exports["_jit_dirty_cache"](offset, offset + blob.length);
+    this.jit_dirty_cache(offset, offset + blob.length);
     this.mem8.set(blob, offset);
 };
 

+ 0 - 678
src/native/codegen/codegen.c

@@ -1,678 +0,0 @@
-#include <assert.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-#include "../const.h"
-#include "../cpu.h"
-#include "../global_pointers.h"
-#include "../log.h"
-#include "codegen.h"
-#include "wasmgen.h"
-
-static void jit_add_seg_offset(int32_t default_segment);
-static void jit_resolve_modrm32_(int32_t modrm_byte);
-static void jit_resolve_modrm16_(int32_t modrm_byte);
-PackedStr pack_str(char const* fn_name, uint8_t fn_len);
-
-void gen_reset(void)
-{
-    wg_reset();
-    cs = wg_get_code_section();
-    instruction_body = wg_get_instruction_body();
-    add_get_seg_import();
-}
-
-void add_get_seg_import(void)
-{
-    uint16_t _fn_get_seg_idx = get_fn_idx("get_seg", 7, FN1_RET_TYPE_INDEX);
-    assert(_fn_get_seg_idx == fn_get_seg_idx);
-    UNUSED(_fn_get_seg_idx);
-}
-
-PackedStr pack_str(char const* fn_name, uint8_t fn_len)
-{
-    assert(fn_len <= 24);
-
-    union {
-        PackedStr pstr;
-        uint8_t u8s[24];
-    } ret = { { 0, 0, 0 } };
-
-    for(int i = 0; i < fn_len; i++)
-    {
-        ret.u8s[i] = fn_name[i];
-    }
-    return ret.pstr;
-}
-
-uint16_t get_fn_idx(char const* fn, uint8_t fn_len, uint8_t fn_type)
-{
-    PackedStr pstr = pack_str(fn, fn_len);
-    return wg_get_fn_idx(pstr.a, pstr.b, pstr.c, fn_type);
-}
-
-void gen_increment_mem32(int32_t addr)
-{
-    wg_increment_mem32(cs, addr);
-}
-
-void gen_increment_variable(int32_t variable_address, int32_t n)
-{
-    wg_increment_variable(cs, variable_address, n);
-}
-
-void gen_increment_instruction_pointer(int32_t n)
-{
-    wg_push_i32(cs, (int32_t)instruction_pointer); // store address of ip
-
-    wg_load_aligned_i32(cs, (int32_t)instruction_pointer); // load ip
-
-    wg_push_i32(cs, n);
-
-    wg_add_i32(cs);
-    wg_store_aligned_i32(cs); // store it back in
-}
-
-void gen_relative_jump(int32_t n)
-{
-    // add n to instruction_pointer (without setting the offset as above)
-    wg_push_i32(instruction_body, (int32_t)instruction_pointer);
-    wg_load_aligned_i32(instruction_body, (int32_t)instruction_pointer);
-    wg_push_i32(instruction_body, n);
-    wg_add_i32(instruction_body);
-    wg_store_aligned_i32(instruction_body);
-}
-
-void gen_increment_timestamp_counter(uint32_t n)
-{
-    gen_increment_variable((int32_t)timestamp_counter, n);
-}
-
-void gen_set_previous_eip_offset_from_eip(int32_t n)
-{
-    wg_push_i32(cs, (int32_t)previous_ip); // store address of previous ip
-    wg_load_aligned_i32(cs, (int32_t)instruction_pointer); // load ip
-    if(n != 0)
-    {
-        wg_push_i32(cs, n);
-        wg_add_i32(cs); // add constant to ip value
-    }
-    wg_store_aligned_i32(cs); // store it as previous ip
-}
-
-void gen_set_previous_eip(void)
-{
-    wg_push_i32(cs, (int32_t)previous_ip); // store address of previous ip
-    wg_load_aligned_i32(cs, (int32_t)instruction_pointer); // load ip
-    wg_store_aligned_i32(cs); // store it as previous ip
-}
-
-void gen_clear_prefixes(void)
-{
-    wg_push_i32(instruction_body, (int32_t)prefixes); // load address of prefixes
-    wg_push_i32(instruction_body, 0);
-    wg_store_aligned_i32(instruction_body);
-}
-
-void gen_add_prefix_bits(int32_t mask)
-{
-    assert(mask >= 0 && mask < 0x100);
-
-    wg_push_i32(instruction_body, (int32_t)prefixes); // load address of prefixes
-
-    wg_load_aligned_i32(instruction_body, (int32_t)prefixes); // load old value
-    wg_push_i32(instruction_body, mask);
-    wg_or_i32(instruction_body);
-
-    wg_store_aligned_i32(instruction_body);
-}
-
-void gen_fn0_const_ret(char const* fn, uint8_t fn_len)
-{
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN0_RET_TYPE_INDEX);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_fn0_const(char const* fn, uint8_t fn_len)
-{
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN0_TYPE_INDEX);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_set_reg16_fn0(char const* fn, uint8_t fn_len, int32_t reg)
-{
-    // generates: reg16[reg] = fn()
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN0_RET_TYPE_INDEX);
-    wg_push_i32(instruction_body, (int32_t) &reg16[reg]);
-    wg_call_fn(instruction_body, fn_idx);
-    wg_store_aligned_u16(instruction_body);
-}
-
-void gen_set_reg32s_fn0(char const* fn, uint8_t fn_len, int32_t reg)
-{
-    // generates: reg32s[reg] = fn()
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN0_RET_TYPE_INDEX);
-    wg_push_i32(instruction_body, (int32_t) &reg32s[reg]);
-    wg_call_fn(instruction_body, fn_idx);
-    wg_store_aligned_i32(instruction_body);
-}
-
-void gen_fn1_const_ret(char const* fn, uint8_t fn_len, int32_t arg0)
-{
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN1_RET_TYPE_INDEX);
-    wg_push_i32(instruction_body, arg0);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_call_fn1_ret(char const* fn, uint8_t fn_len)
-{
-    // generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN1_RET_TYPE_INDEX);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_call_fn1(char const* fn, uint8_t fn_len)
-{
-    // generates: fn( _ ) where _ must be left on the stack before calling this
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN1_TYPE_INDEX);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_fn1_const(char const* fn, uint8_t fn_len, int32_t arg0)
-{
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN1_TYPE_INDEX);
-    wg_push_i32(instruction_body, arg0);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_set_reg16_r(int32_t r_dest, int32_t r_src)
-{
-    // generates: reg16[r_dest] = reg16[r_src]
-    wg_push_i32(instruction_body, (int32_t) &reg16[r_dest]);
-    wg_load_aligned_u16(instruction_body, (int32_t) &reg16[r_src]);
-    wg_store_aligned_u16(instruction_body);
-}
-
-void gen_set_reg32_r(int32_t r_dest, int32_t r_src)
-{
-    // generates: reg32s[r_dest] = reg32s[r_src]
-    wg_push_i32(instruction_body, (int32_t) &reg32s[r_dest]);
-    wg_load_aligned_i32(instruction_body, (int32_t) &reg32s[r_src]);
-    wg_store_aligned_i32(instruction_body);
-}
-
-void gen_fn1_reg16(char const* fn, uint8_t fn_len, int32_t reg)
-{
-    // generates: fn(reg16[reg])
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN1_TYPE_INDEX);
-    wg_load_aligned_u16(instruction_body, (int32_t) &reg16[reg]);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_fn1_reg32s(char const* fn, uint8_t fn_len, int32_t reg)
-{
-    // generates: fn(reg32s[reg])
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN1_TYPE_INDEX);
-    wg_load_aligned_i32(instruction_body, (int32_t) &reg32s[reg]);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-
-void gen_call_fn2(char const* fn, uint8_t fn_len)
-{
-    // generates: fn( _, _ ) where _ must be left on the stack before calling this
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN2_TYPE_INDEX);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_fn2_const(char const* fn, uint8_t fn_len, int32_t arg0, int32_t arg1)
-{
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN2_TYPE_INDEX);
-    wg_push_i32(instruction_body, arg0);
-    wg_push_i32(instruction_body, arg1);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_fn3_const(char const* fn, uint8_t fn_len, int32_t arg0, int32_t arg1, int32_t arg2)
-{
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN3_TYPE_INDEX);
-    wg_push_i32(instruction_body, arg0);
-    wg_push_i32(instruction_body, arg1);
-    wg_push_i32(instruction_body, arg2);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_safe_read32(void)
-{
-    // Assumes virtual address has been pushed to the stack, and generates safe_read32s' fast-path
-    // inline, bailing to safe_read32s_slow if necessary
-
-    const int32_t address_local = GEN_LOCAL_SCRATCH0;
-    wg_tee_local(instruction_body, address_local);
-
-    // Pseudo: base_on_stack = (uint32_t)address >> 12;
-    wg_push_i32(instruction_body, 12);
-    wg_shr_u32(instruction_body);
-    SCALE_INDEX_FOR_ARRAY32(tlb_data);
-
-    // Pseudo: entry = tlb_data[base_on_stack];
-    const int32_t entry_local = GEN_LOCAL_SCRATCH1;
-    wg_load_aligned_i32_from_stack(instruction_body, (uint32_t) tlb_data);
-    wg_tee_local(instruction_body, entry_local);
-
-    // Pseudo: bool can_use_fast_path = (entry & 0xFFF & ~TLB_READONLY & ~TLB_GLOBAL & ~(cpl == 3 ? 0 : TLB_NO_USER) == TLB_VALID &&
-    //                                   (address & 0xFFF) <= (0x1000 - 4));
-    wg_push_i32(instruction_body, 0xFFF & ~TLB_READONLY & ~TLB_GLOBAL & ~(*cpl == 3 ? 0 : TLB_NO_USER));
-    wg_and_i32(instruction_body);
-
-    wg_push_i32(instruction_body, TLB_VALID);
-    wg_eq_i32(instruction_body);
-
-    wg_get_local(instruction_body, address_local);
-    wg_push_i32(instruction_body, 0xFFF);
-    wg_and_i32(instruction_body);
-    wg_push_i32(instruction_body, 0x1000 - 4);
-    wg_le_i32(instruction_body);
-
-    wg_and_i32(instruction_body);
-
-    // Pseudo:
-    // if(can_use_fast_path) leave_on_stack(mem8[entry & ~0xFFF ^ address]);
-    wg_if_i32(instruction_body);
-    wg_get_local(instruction_body, entry_local);
-    wg_push_i32(instruction_body, ~0xFFF);
-    wg_and_i32(instruction_body);
-    wg_get_local(instruction_body, address_local);
-    wg_xor_i32(instruction_body);
-
-    wg_load_unaligned_i32_from_stack(instruction_body, (uint32_t) mem8);
-
-    // Pseudo:
-    // else { leave_on_stack(safe_read32s_slow(address)); }
-    wg_else(instruction_body);
-    wg_get_local(instruction_body, address_local);
-    gen_call_fn1_ret("safe_read32s_slow", 17);
-    wg_block_end(instruction_body);
-}
-
-void gen_safe_write32(int32_t local_for_address, int32_t local_for_value)
-{
-    // Generates safe_write32' fast-path inline, bailing to safe_write32_slow if necessary.
-
-    // local_for_{address,value} are the numbers of the local variables which contain the virtual
-    // address and value for safe_write32
-    // Usage:
-    // set_local(0, value);
-    // set_local(1, v_addr);
-    // gen_safe_write32();
-
-    // Since this function clobbers other variables, we confirm that the caller uses the local
-    // variables we expect them to
-    assert(local_for_address == GEN_LOCAL_SCRATCH0);
-    assert(local_for_value == GEN_LOCAL_SCRATCH1);
-
-    wg_get_local(instruction_body, local_for_address);
-
-    // Pseudo: base_on_stack = (uint32_t)address >> 12;
-    wg_push_i32(instruction_body, 12);
-    wg_shr_u32(instruction_body);
-    SCALE_INDEX_FOR_ARRAY32(tlb_data);
-
-    // entry_local is only used in the following block, so the scratch variable can be reused later
-    {
-        // Pseudo: entry = tlb_data[base_on_stack];
-        const int32_t entry_local = GEN_LOCAL_SCRATCH2;
-        wg_load_aligned_i32_from_stack(instruction_body, (uint32_t) tlb_data);
-        wg_tee_local(instruction_body, entry_local);
-
-        // Pseudo: bool can_use_fast_path = (entry & 0xFFF & ~TLB_GLOBAL & ~(cpl == 3 ? 0 : TLB_NO_USER) == TLB_VALID &&
-        //                                   (address & 0xFFF) <= (0x1000 - 4));
-        wg_push_i32(instruction_body, 0xFFF & ~TLB_GLOBAL & ~(*cpl == 3 ? 0 : TLB_NO_USER));
-        wg_and_i32(instruction_body);
-
-        wg_push_i32(instruction_body, TLB_VALID);
-        wg_eq_i32(instruction_body);
-
-        wg_get_local(instruction_body, local_for_address);
-        wg_push_i32(instruction_body, 0xFFF);
-        wg_and_i32(instruction_body);
-        wg_push_i32(instruction_body, 0x1000 - 4);
-        wg_le_i32(instruction_body);
-
-        wg_and_i32(instruction_body);
-
-        // Pseudo:
-        // if(can_use_fast_path)
-        // {
-        //     phys_addr = entry & ~0xFFF ^ address;
-        wg_if_void(instruction_body);
-
-        wg_get_local(instruction_body, entry_local);
-        wg_push_i32(instruction_body, ~0xFFF);
-        wg_and_i32(instruction_body);
-        wg_get_local(instruction_body, local_for_address);
-        wg_xor_i32(instruction_body);
-    }
-
-    // entry_local isn't needed anymore, so we overwrite it
-    const int32_t phys_addr_local = GEN_LOCAL_SCRATCH2;
-    // Pseudo:
-    //     /* continued within can_use_fast_path branch */
-    //     mem8[phys_addr] = value;
-
-    wg_tee_local(instruction_body, phys_addr_local);
-    wg_get_local(instruction_body, local_for_value);
-    wg_store_unaligned_i32(instruction_body, (uint32_t) mem8);
-
-    // Pseudo:
-    // else { safe_read32_slow(address, value); }
-    wg_else(instruction_body);
-    wg_get_local(instruction_body, local_for_address);
-    wg_get_local(instruction_body, local_for_value);
-    gen_call_fn2("safe_write32_slow", 17);
-    wg_block_end(instruction_body);
-}
-
-#define MODRM_ENTRY(n, work)\
-    case (n) | 0 << 3:\
-    case (n) | 1 << 3:\
-    case (n) | 2 << 3:\
-    case (n) | 3 << 3:\
-    case (n) | 4 << 3:\
-    case (n) | 5 << 3:\
-    case (n) | 6 << 3:\
-    case (n) | 7 << 3:\
-        (work); break;
-
-#define MODRM_ENTRY16_0(row, seg, reg1, reg2)\
-    MODRM_ENTRY(0x00 | (row), gen_modrm_entry_0((seg), (reg1), (reg2), 0))\
-    MODRM_ENTRY(0x40 | (row), gen_modrm_entry_0((seg), (reg1), (reg2), read_imm8s()))\
-    MODRM_ENTRY(0x80 | (row), gen_modrm_entry_0((seg), (reg1), (reg2), read_imm16()))
-
-#define MODRM_ENTRY16_1(row, seg, reg)\
-    MODRM_ENTRY(0x00 | (row), gen_modrm_entry_1(seg, reg, 0))\
-    MODRM_ENTRY(0x40 | (row), gen_modrm_entry_1(seg, reg, read_imm8s()))\
-    MODRM_ENTRY(0x80 | (row), gen_modrm_entry_1(seg, reg, read_imm16()))
-
-static void inline gen_modrm_entry_0(int32_t segment, int32_t reg16_idx_1, int32_t reg16_idx_2, int32_t imm)
-{
-    // generates: fn( ( reg1 + reg2 + imm ) & 0xFFFF )
-    wg_load_aligned_u16(instruction_body, reg16_idx_1);
-    wg_load_aligned_u16(instruction_body, reg16_idx_2);
-    wg_add_i32(instruction_body);
-
-    if(imm)
-    {
-        wg_push_i32(instruction_body, imm);
-        wg_add_i32(instruction_body);
-    }
-
-    wg_push_i32(instruction_body, 0xFFFF);
-    wg_and_i32(instruction_body);
-
-    jit_add_seg_offset(segment);
-}
-
-static void gen_modrm_entry_1(int32_t segment, int32_t reg16_idx, int32_t imm)
-{
-    // generates: fn ( ( reg + imm ) & 0xFFFF )
-    wg_load_aligned_u16(instruction_body, reg16_idx);
-
-    if(imm)
-    {
-        wg_push_i32(instruction_body, imm);
-        wg_add_i32(instruction_body);
-    }
-
-    wg_push_i32(instruction_body, 0xFFFF);
-    wg_and_i32(instruction_body);
-
-    jit_add_seg_offset(segment);
-}
-
-static bool can_optimize_get_seg(int32_t segment)
-{
-    return (segment == DS || segment == SS) && has_flat_segmentation();
-}
-
-/*
- * Note: Requires an existing value to be on the WASM stack! Based on optimization possibilities,
- * the value will be consumed and added to get_seg(segment), or it'll be left as-is
- */
-static void jit_add_seg_offset(int32_t default_segment)
-{
-    int32_t prefix = *prefixes & PREFIX_MASK_SEGMENT;
-    int32_t seg = prefix ? prefix - 1 : default_segment;
-
-    if(can_optimize_get_seg(seg) || prefix == SEG_PREFIX_ZERO)
-    {
-        return;
-    }
-
-    wg_push_i32(instruction_body, seg);
-    wg_call_fn(instruction_body, fn_get_seg_idx);
-    wg_add_i32(instruction_body);
-}
-
-static void gen_modrm_entry_2()
-{
-    wg_push_i32(instruction_body, read_imm16());
-    jit_add_seg_offset(DS);
-}
-
-static void jit_resolve_modrm16_(int32_t modrm_byte)
-{
-    switch(modrm_byte)
-    {
-        // The following casts cause some weird issue with emscripten and cause
-        // a performance hit. XXX: look into this later.
-        MODRM_ENTRY16_0(0, DS, (int32_t)(reg16 + BX), (int32_t)(reg16 + SI))
-        MODRM_ENTRY16_0(1, DS, (int32_t)(reg16 + BX), (int32_t)(reg16 + DI))
-        MODRM_ENTRY16_0(2, SS, (int32_t)(reg16 + BP), (int32_t)(reg16 + SI))
-        MODRM_ENTRY16_0(3, SS, (int32_t)(reg16 + BP), (int32_t)(reg16 + DI))
-        MODRM_ENTRY16_1(4, DS, (int32_t)(reg16 + SI))
-        MODRM_ENTRY16_1(5, DS, (int32_t)(reg16 + DI))
-
-        // special case
-        MODRM_ENTRY(0x00 | 6, gen_modrm_entry_2())
-        MODRM_ENTRY(0x40 | 6, gen_modrm_entry_1(SS, (int32_t)(reg16 + BP), read_imm8s()))
-        MODRM_ENTRY(0x80 | 6, gen_modrm_entry_1(SS, (int32_t)(reg16 + BP), read_imm16()))
-
-        MODRM_ENTRY16_1(7, DS, (int32_t)(reg16 + BX))
-
-        default:
-            assert(false);
-    }
-}
-
-#define MODRM_ENTRY32_0(row, seg, reg)\
-    MODRM_ENTRY(0x00 | (row), gen_modrm32_entry(seg, reg, 0))\
-    MODRM_ENTRY(0x40 | (row), gen_modrm32_entry(seg, reg, read_imm8s()))\
-    MODRM_ENTRY(0x80 | (row), gen_modrm32_entry(seg, reg, read_imm32s()))
-
-static void gen_modrm32_entry(int32_t segment, int32_t reg32s_idx, int32_t imm)
-{
-    // generates: fn ( reg + imm )
-    wg_load_aligned_i32(instruction_body, reg32s_idx);
-
-    if(imm)
-    {
-        wg_push_i32(instruction_body, imm);
-        wg_add_i32(instruction_body);
-    }
-
-    jit_add_seg_offset(segment);
-}
-
-static void jit_resolve_sib(bool mod)
-{
-    uint8_t sib_byte = read_imm8();
-    uint8_t r = sib_byte & 7;
-    uint8_t m = sib_byte >> 3 & 7;
-
-    int32_t base_addr;
-    int32_t base;
-    uint8_t seg;
-    bool base_is_mem_access = true;
-
-    if(r == 4)
-    {
-        base_addr = (int32_t)(reg32s + ESP);
-        seg = SS;
-    }
-    else if(r == 5)
-    {
-        if(mod)
-        {
-            base_addr = (int32_t)(reg32s + EBP);
-            seg = SS;
-        }
-        else
-        {
-            base = read_imm32s();
-            seg = DS;
-            base_is_mem_access = false;
-        }
-    }
-    else
-    {
-        base_addr = (int32_t)(reg32s + r);
-        seg = DS;
-    }
-
-    // generate: get_seg_prefix(seg) + base
-    // Where base is accessed from memory if base_is_mem_access or written as a constant otherwise
-    if(base_is_mem_access)
-    {
-        wg_load_aligned_i32(instruction_body, base_addr);
-    }
-    else
-    {
-        wg_push_i32(instruction_body, base);
-    }
-
-    jit_add_seg_offset(seg);
-
-    // We now have to generate an offset value to add
-
-    if(m == 4)
-    {
-        // offset is 0, we don't need to add anything
-        return;
-    }
-
-    // Offset is reg32s[m] << s, where s is:
-
-    uint8_t s = sib_byte >> 6 & 3;
-
-    wg_load_aligned_i32(instruction_body, (int32_t)(reg32s + m));
-    wg_push_i32(instruction_body, s);
-    wg_shl_i32(instruction_body);
-
-    wg_add_i32(instruction_body);
-}
-
-static void modrm32_special_case_1(void)
-{
-    jit_resolve_sib(true);
-
-    int32_t imm = read_imm8s();
-
-    if(imm)
-    {
-        wg_push_i32(instruction_body, imm);
-        wg_add_i32(instruction_body);
-    }
-}
-
-static void modrm32_special_case_2(void)
-{
-    jit_resolve_sib(true);
-
-    int32_t imm = read_imm32s();
-
-    if(imm)
-    {
-        wg_push_i32(instruction_body, imm);
-        wg_add_i32(instruction_body);
-    }
-}
-
-static void gen_modrm32_entry_1()
-{
-    int32_t imm = read_imm32s();
-
-    wg_push_i32(instruction_body, imm);
-    jit_add_seg_offset(DS);
-}
-
-static void jit_resolve_modrm32_(int32_t modrm_byte)
-{
-    switch(modrm_byte)
-    {
-        MODRM_ENTRY32_0(0, DS, (int32_t)(reg32s + EAX))
-        MODRM_ENTRY32_0(1, DS, (int32_t)(reg32s + ECX))
-        MODRM_ENTRY32_0(2, DS, (int32_t)(reg32s + EDX))
-        MODRM_ENTRY32_0(3, DS, (int32_t)(reg32s + EBX))
-
-        // special cases
-        MODRM_ENTRY(0x00 | 4, jit_resolve_sib(false))
-        MODRM_ENTRY(0x40 | 4, modrm32_special_case_1())
-        MODRM_ENTRY(0x80 | 4, modrm32_special_case_2())
-        MODRM_ENTRY(0x00 | 5, gen_modrm32_entry_1())
-        MODRM_ENTRY(0x40 | 5, gen_modrm32_entry(SS, (int32_t)(reg32s + EBP), read_imm8s()))
-        MODRM_ENTRY(0x80 | 5, gen_modrm32_entry(SS, (int32_t)(reg32s + EBP), read_imm32s()))
-
-        MODRM_ENTRY32_0(6, DS, (int32_t)(reg32s + ESI))
-        MODRM_ENTRY32_0(7, DS, (int32_t)(reg32s + EDI))
-
-        default:
-            assert(false);
-    }
-}
-
-#undef MODRM_ENTRY
-
-// This function leaves a value on the wasm stack, to be consumed by one of the
-// gen_modrm_fn* functions below
-void gen_modrm_resolve(int32_t modrm_byte)
-{
-    if(is_asize_32())
-    {
-        jit_resolve_modrm32_(modrm_byte);
-    }
-    else
-    {
-        jit_resolve_modrm16_(modrm_byte);
-    }
-}
-
-void gen_modrm_fn2(char const* fn, uint8_t fn_len, int32_t arg0, int32_t arg1)
-{
-    // generates: fn( _, arg0, arg1 )
-
-    wg_push_i32(instruction_body, arg0);
-    wg_push_i32(instruction_body, arg1);
-
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN3_TYPE_INDEX);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_modrm_fn1(char const* fn, uint8_t fn_len, int32_t arg0)
-{
-    // generates: fn( _, arg0 )
-
-    wg_push_i32(instruction_body, arg0);
-
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN2_TYPE_INDEX);
-    wg_call_fn(instruction_body, fn_idx);
-}
-
-void gen_modrm_fn0(char const* fn, uint8_t fn_len)
-{
-    // generates: fn( _ )
-
-    int32_t fn_idx = get_fn_idx(fn, fn_len, FN1_TYPE_INDEX);
-    wg_call_fn(instruction_body, fn_idx);
-}
-

+ 0 - 87
src/native/codegen/codegen.h

@@ -1,87 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-
-#include "wasmgen.h"
-
-#define FN0_TYPE_INDEX 0
-#define FN1_TYPE_INDEX 1
-#define FN2_TYPE_INDEX 2
-#define FN3_TYPE_INDEX 3
-
-#define FN0_RET_TYPE_INDEX 4
-#define FN1_RET_TYPE_INDEX 5
-#define FN2_RET_TYPE_INDEX 6
-
-#define NR_FN_TYPE_INDEXES 7
-
-// We'll need to scale the index on the stack to access arr32[i] correctly, for eg.
-// &array32[i]'s byte address is "array32 + i*4"
-// This macro simply does the "i*4" part of the address calculation
-#define SCALE_INDEX_FOR_ARRAY32(array)                                  \
-    _Static_assert(                                                     \
-        sizeof((array)[0]) == 4,                                        \
-        "codegen: Elements assumed to be 4 bytes."                      \
-    );                                                                  \
-    /* Shift the index to make it byte-indexed, not array-indexed */    \
-    wg_push_i32(instruction_body, 2);                                   \
-    wg_shl_i32(instruction_body);
-
-uint8_t* cs;
-uint8_t* instruction_body;
-
-static uint16_t const fn_get_seg_idx = 0;
-
-void gen_reset(void);
-// uintptr_t gen_finish(int32_t no_of_locals_i32);
-void add_get_seg_import(void);
-
-uint16_t get_fn_idx(char const* fn, uint8_t fn_len, uint8_t fn_type);
-
-// Generate function call with constant arguments
-void gen_fn0_const(char const* fn, uint8_t fn_len);
-void gen_fn1_const(char const* fn, uint8_t fn_len, int32_t arg0);
-void gen_fn2_const(char const* fn, uint8_t fn_len, int32_t arg0, int32_t arg1);
-void gen_fn3_const(char const* fn, uint8_t fn_len, int32_t arg0, int32_t arg1, int32_t arg2);
-
-void gen_fn0_const_ret(char const* fn, uint8_t fn_len);
-void gen_fn1_const_ret(char const* fn, uint8_t fn_len, int32_t arg0);
-
-// Generate code to set register value to result of function call
-void gen_set_reg16_fn0(char const* fn, uint8_t fn_len, int32_t reg);
-void gen_set_reg32s_fn0(char const* fn, uint8_t fn_len, int32_t reg);
-
-// Generate code for "mov reg, reg"
-void gen_set_reg16_r(int32_t r_dest, int32_t r_src);
-void gen_set_reg32_r(int32_t r_dest, int32_t r_src);
-
-// Generate function call with register value as argument (reg is index of register)
-void gen_fn1_reg16(char const* fn, uint8_t fn_len, int32_t reg);
-void gen_fn1_reg32s(char const* fn, uint8_t fn_len, int32_t reg);
-
-// Generate a function call with arguments pushed to the stack separately
-void gen_call_fn1_ret(char const* fn, uint8_t fn_len);
-void gen_call_fn1(char const* fn, uint8_t fn_len);
-void gen_call_fn2(char const* fn, uint8_t fn_len);
-
-// Generate code for safe_read32s and safe_write32 inline
-void gen_safe_read32(void);
-void gen_safe_write32(int32_t local_for_address, int32_t local_for_value);
-
-void gen_modrm_resolve(int32_t modrm_byte);
-void gen_modrm_fn0(char const* fn, uint8_t fn_len);
-void gen_modrm_fn1(char const* fn, uint8_t fn_len, int32_t arg0);
-void gen_modrm_fn2(char const* fn, uint8_t fn_len, int32_t arg0, int32_t arg1);
-
-void gen_increment_mem32(int32_t addr);
-
-void gen_relative_jump(int32_t n);
-
-void gen_set_previous_eip_offset_from_eip(int32_t n);
-void gen_set_previous_eip(void);
-void gen_increment_instruction_pointer(int32_t);
-
-void gen_increment_timestamp_counter(uint32_t);
-
-void gen_clear_prefixes(void);
-void gen_add_prefix_bits(int32_t);

+ 0 - 74
src/native/codegen/wasmgen.h

@@ -1,74 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-
-typedef struct PackedStr {
-    uint64_t a;
-    uint64_t b;
-    uint64_t c;
-} PackedStr;
-
-#define PSTR_TY uint64_t, uint64_t, uint64_t
-
-extern uint8_t* wg_get_code_section(void);
-extern uint8_t* wg_get_instruction_body(void);
-extern void wg_commit_instruction_body_to_cs(void);
-extern void wg_finish(uint8_t no_of_locals_i32);
-extern void wg_reset(void);
-extern uint16_t wg_get_fn_idx(PSTR_TY, uint8_t fn_type);
-
-extern void wg_push_i32(uint8_t* buf, int32_t v);
-extern void wg_push_u32(uint8_t* buf, uint32_t v);
-extern void wg_load_aligned_u16(uint8_t* buf, uint32_t addr);
-extern void wg_load_aligned_i32(uint8_t* buf, uint32_t addr);
-extern void wg_store_aligned_u16(uint8_t* buf);
-extern void wg_store_aligned_i32(uint8_t* buf);
-extern void wg_add_i32(uint8_t* buf);
-extern void wg_and_i32(uint8_t* buf);
-extern void wg_or_i32(uint8_t* buf);
-extern void wg_shl_i32(uint8_t* buf);
-extern void wg_call_fn(uint8_t* buf, uint16_t fn_idx);
-extern void wg_call_fn_with_arg(uint8_t* buf, uint16_t fn_idx, int32_t arg0);
-extern void wg_eq_i32(uint8_t* buf);
-extern void wg_ne_i32(uint8_t* buf);
-extern void wg_le_i32(uint8_t* buf);
-extern void wg_lt_i32(uint8_t* buf);
-extern void wg_ge_i32(uint8_t* buf);
-extern void wg_gt_i32(uint8_t* buf);
-extern void wg_if_i32(uint8_t* buf);
-extern void wg_block_i32(uint8_t* buf);
-extern void wg_tee_local(uint8_t* buf, int32_t idx);
-extern void wg_xor_i32(uint8_t* buf);
-extern void wg_load_unaligned_i32_from_stack(uint8_t* buf, uint32_t byte_offset);
-extern void wg_load_aligned_i32_from_stack(uint8_t* buf, uint32_t byte_offset);
-extern void wg_store_unaligned_i32(uint8_t* buf, uint32_t byte_offset);
-extern void wg_shr_u32(uint8_t* buf);
-extern void wg_shr_i32(uint8_t* buf);
-extern void wg_eqz_i32(uint8_t* buf);
-extern void wg_if_void(uint8_t* buf);
-extern void wg_else(uint8_t* buf);
-extern void wg_loop_void(uint8_t* buf);
-extern void wg_block_void(uint8_t* buf);
-extern void wg_block_end(uint8_t* buf);
-extern void wg_return(uint8_t* buf);
-extern void wg_drop(uint8_t* buf);
-extern void wg_brtable_and_cases(uint8_t* buf, int32_t cases_count);
-extern void wg_br(uint8_t* buf, int32_t depth);
-extern void wg_get_local(uint8_t* buf, int32_t idx);
-extern void wg_set_local(uint8_t* buf, int32_t idx);
-extern void wg_unreachable(uint8_t* buf);
-extern void wg_increment_mem32(uint8_t* buf, int32_t addr);
-extern void wg_increment_variable(uint8_t* buf, int32_t addr, int32_t n);
-extern void wg_load_aligned_u16_from_stack(uint8_t* buf, uint32_t byte_offset);
-
-extern void wg_fn0_const(uint8_t* buf, PSTR_TY);
-extern void wg_fn0_const_ret(uint8_t* buf, PSTR_TY);
-extern void wg_fn1_const(uint8_t* buf, PSTR_TY, int32_t arg0);
-extern void wg_fn1_const_ret(uint8_t* buf, PSTR_TY, int32_t arg0);
-extern void wg_fn2_const(uint8_t* buf, PSTR_TY, int32_t arg0, int32_t arg1);
-extern void wg_fn3_const(uint8_t* buf, PSTR_TY, int32_t arg0, int32_t arg1, int32_t arg2);
-extern void wg_call_fn1_ret(uint8_t* buf, PSTR_TY);
-extern void wg_call_fn2(uint8_t* buf, PSTR_TY);
-
-#undef PSTR_TY
-

+ 0 - 17
src/native/config.h

@@ -16,26 +16,9 @@
 
 #define USE_A20 false
 
-// Mask used to map physical address to index in cache array
-#define JIT_CACHE_ARRAY_SIZE 0x40000
-#define JIT_CACHE_ARRAY_MASK (JIT_CACHE_ARRAY_SIZE - 1)
-
-#define HASH_PRIME 6151
-
-#define JIT_THRESHOLD 2500
-
-#define CHECK_JIT_CACHE_ARRAY_INVARIANTS 0
 #define CHECK_TLB_INVARIANTS 0
 
-#define JIT_MAX_ITERATIONS_PER_FUNCTION 10000
-
 #define ENABLE_JIT 1
-#define ENABLE_JIT_NONFAULTING_OPTIMZATION 1
-#define JIT_ALWAYS_USE_LOOP_SAFETY 0
-
-#ifndef ENABLE_JIT_ALWAYS
-#define ENABLE_JIT_ALWAYS 0
-#endif
 
 #define ENABLE_PROFILER 0
 #define ENABLE_PROFILER_OPSTATS 0

+ 0 - 4
src/native/const.h

@@ -151,7 +151,3 @@
 #define A20_MASK32 (~(1 << (20 - 2)))
 
 #define MXCSR_MASK (0xFFFF & ~(1 << 6))
-
-#define WASM_TABLE_SIZE 0x10000
-#define DIRTY_ARR_SHIFT 12
-#define GROUP_DIRTINESS_LENGTH (1 << (32 - DIRTY_ARR_SHIFT))

File diff suppressed because it is too large
+ 204 - 1398
src/native/cpu.c


+ 4 - 104
src/native/cpu.h

@@ -8,8 +8,6 @@
 #include "config.h"
 #include "shared.h"
 
-#define CODE_CACHE_SEARCH_SIZE 8
-
 union reg128 {
     int8_t i8[16];
     int16_t i16[8];
@@ -37,86 +35,10 @@ _Static_assert(sizeof(union reg64) == 8, "reg64 is 8 bytes");
 
 typedef uint8_t cached_state_flags;
 
-struct code_cache {
-    // Address of the start of the basic block
-    uint32_t start_addr;
-#if DEBUG
-    // Address of the instruction immediately after the basic block ends
-    uint32_t end_addr;
-    int32_t opcode[1];
-    int32_t len;
-    int32_t virt_addr;
-#endif
-
-    // an index into jit_cache_arr for the next code_cache entry within the same physical page
-    int32_t next_index_same_page;
-
-    uint16_t wasm_table_index;
-    uint16_t initial_state;
-    cached_state_flags state_flags;
-    bool pending;
-};
-#if DEBUG
-#else
-_Static_assert(sizeof(struct code_cache) == 16, "code_cache uses 16 bytes");
-#endif
-struct code_cache jit_cache_arr[JIT_CACHE_ARRAY_SIZE];
-
-// XXX: Remove this limitation when page_entry_points is sparse
-#define MAX_PHYSICAL_PAGES (512 << 20 >> 12)
-
-#define MAX_ENTRIES_PER_PAGE 128
-#define ENTRY_POINT_END 0xFFFF
-
-uint16_t page_entry_points[MAX_PHYSICAL_PAGES][MAX_ENTRIES_PER_PAGE];
-
 // Flag indicating whether the instruction that just ran was at a block's boundary (jump,
 // state-altering, etc.)
 extern uint32_t jit_block_boundary;
 
-typedef uint32_t jit_instr_flags;
-
-#define JIT_INSTR_BLOCK_BOUNDARY_FLAG (1 << 0)
-#define JIT_INSTR_NO_NEXT_INSTRUCTION_FLAG (1 << 1)
-#define JIT_INSTR_NONFAULTING_FLAG (1 << 2)
-#define JIT_INSTR_IMM_JUMP16_FLAG (1 << 3)
-#define JIT_INSTR_IMM_JUMP32_FLAG (1 << 4)
-
-struct analysis {
-    jit_instr_flags flags;
-    int32_t jump_offset;
-    int32_t condition_index;
-};
-
-struct basic_block {
-    int32_t addr;
-    int32_t end_addr;
-    int32_t next_block_addr; // if 0 this is an exit block
-    int32_t next_block_branch_taken_addr;
-    int32_t condition_index; // if not -1 this block ends with a conditional jump
-    int32_t jump_offset;
-    bool jump_offset_is_32;
-    bool is_entry_block;
-};
-
-#define BASIC_BLOCK_LIST_MAX 1000
-
-struct basic_block_list {
-    int32_t length;
-    struct basic_block blocks[BASIC_BLOCK_LIST_MAX];
-};
-
-// Count of how many times prime_hash(address) has been called through a jump
-extern int32_t hot_code_addresses[HASH_PRIME];
-
-#define JIT_CACHE_ARRAY_NO_NEXT_ENTRY (-1)
-
-uint16_t wasm_table_index_free_list[WASM_TABLE_SIZE];
-int32_t wasm_table_index_free_list_count;
-
-uint16_t wasm_table_index_pending_free[WASM_TABLE_SIZE];
-int32_t wasm_table_index_pending_free_count;
-
 #define VALID_TLB_ENTRY_MAX 10000
 int32_t valid_tlb_entries[VALID_TLB_ENTRY_MAX];
 int32_t valid_tlb_entries_count;
@@ -128,29 +50,11 @@ int32_t valid_tlb_entries_count;
 #define TLB_GLOBAL (1 << 4)
 #define TLB_HAS_CODE (1 << 5)
 
-// Indices for local variables and function arguments (which are accessed as local variables) for
-// the generated WASM function
-#define GEN_LOCAL_ARG_INITIAL_STATE 0
-#define GEN_LOCAL_STATE 1
-#define GEN_LOCAL_ITERATION_COUNTER 2
-// local scratch variables for use wherever required
-#define GEN_LOCAL_SCRATCH0 3
-#define GEN_LOCAL_SCRATCH1 4
-#define GEN_LOCAL_SCRATCH2 5
-// Function arguments are not included in the local variable count
-#define GEN_NO_OF_LOCALS 5
-
 // defined in call-indirect.ll
 extern void call_indirect(int32_t index);
 extern void call_indirect1(int32_t index, int32_t arg);
 
 void after_block_boundary(void);
-struct analysis analyze_step(int32_t);
-
-void after_jump(void);
-void diverged(void);
-void branch_taken(void);
-void branch_not_taken(void);
 
 bool same_page(int32_t, int32_t);
 
@@ -158,6 +62,7 @@ int32_t get_eflags(void);
 uint32_t translate_address_read(int32_t address);
 uint32_t translate_address_write(int32_t address);
 void tlb_set_has_code(uint32_t physical_page, bool has_code);
+void check_tlb_invariants(void);
 
 void writable_or_pagefault(int32_t addr, int32_t size);
 int32_t read_imm8(void);
@@ -174,19 +79,12 @@ int32_t get_seg_prefix_ds(int32_t offset);
 int32_t get_seg_prefix_ss(int32_t offset);
 int32_t get_seg_prefix_cs(int32_t offset);
 int32_t modrm_resolve(int32_t modrm_byte);
-void modrm_skip(int32_t modrm_byte);
 
-void check_jit_cache_array_invariants(void);
-
-uint32_t jit_hot_hash_page(uint32_t page);
-void jit_link_block(int32_t target);
-void jit_link_block_conditional(int32_t offset, const char* condition);
 void cycle_internal(void);
 void run_prefix_instruction(void);
-jit_instr_flags jit_prefix_instruction(void);
 void clear_prefixes(void);
 void segment_prefix_op(int32_t seg);
-jit_instr_flags segment_prefix_op_jit(int32_t seg);
+
 bool has_flat_segmentation(void);
 void do_many_cycles_unsafe(void);
 void raise_exception(int32_t interrupt_nr);
@@ -243,3 +141,5 @@ void set_tsc(uint32_t, uint32_t);
 uint64_t read_tsc(void);
 bool vm86_mode(void);
 int32_t getiopl(void);
+
+int32_t get_opstats_buffer(int32_t index);

+ 6 - 10
src/native/global_pointers.h

@@ -86,17 +86,13 @@ static union reg64* const reg_mmx = (union reg64* const) 1064; // length 64
 
 // gap
 
-static uint8_t* const codegen_buffer_op = (uint8_t* const) 0x1000; // length 0x100000
-static uint8_t* const codegen_buffer_cs = (uint8_t* const) 0x101000; // length 0x100000
-static uint8_t* const codegen_buffer_instruction_body = (uint8_t* const) 0x201000; // length 0x100000
-static uint8_t* const codegen_string_input = (uint8_t* const) 0x301000; // length 32
+static uint32_t* const opstats_buffer = (uint32_t* const) 0x1000; // length 0x400
+static uint32_t* const opstats_buffer_0f = (uint32_t* const) 0x1400; // length 0x400
 
 // gap
 
-static int32_t* const tlb_data = (int32_t* const) (0x400000); // length 0x100000*4
-// A mapping from physical page to index into jit_cache_arr
-static int32_t* const page_first_jit_cache_entry = (int32_t* const) (0x800000); // length 0x100000*4
+static int32_t* const tlb_data = (int32_t* const) 0x400000; // length 0x100000*4
 
-static uint8_t* const mem8 = (uint8_t* const) (0x400000 + 0x100000 * 8);
-static uint16_t* const mem16 = (uint16_t* const) (0x400000 + 0x100000 * 8);
-static int32_t* const mem32s = (int32_t* const) (0x400000 + 0x100000 * 8);
+static uint8_t* const mem8 = (uint8_t* const) 0x800000;
+static uint16_t* const mem16 = (uint16_t* const) 0x800000;
+static int32_t* const mem32s = (int32_t* const) 0x800000;

+ 0 - 274
src/native/instructions.c

@@ -4,8 +4,6 @@
 #include <stdint.h>
 
 #include "arith.h"
-#include "codegen/codegen.h"
-#include "codegen/wasmgen.h"
 #include "const.h"
 #include "cpu.h"
 #include "fpu.h"
@@ -64,13 +62,6 @@ void instr32_0F() {
     run_instruction0f_32(read_imm8());
 }
 
-jit_instr_flags instr16_0F_jit() {
-    return jit_instruction0f_16(read_imm8());
-}
-jit_instr_flags instr32_0F_jit() {
-    return jit_instruction0f_32(read_imm8());
-}
-
 
 DEFINE_MODRM_INSTR_READ_WRITE_8(instr_10, adc8(___, read_reg8(r)))
 DEFINE_MODRM_INSTR_READ_WRITE_16(instr16_11, adc16(___, read_reg16(r)))
@@ -131,7 +122,6 @@ void instr32_25(int32_t imm32) { reg32s[EAX] = and32(reg32s[EAX], imm32); }
 
 
 void instr_26() { segment_prefix_op(ES); }
-jit_instr_flags instr_26_jit() { return segment_prefix_op_jit(ES); }
 void instr_27() { bcd_daa(); }
 
 DEFINE_MODRM_INSTR_READ_WRITE_8(instr_28, sub8(___, read_reg8(r)))
@@ -145,7 +135,6 @@ void instr16_2D(int32_t imm16) { reg16[AX] = sub16(reg16[AX], imm16); }
 void instr32_2D(int32_t imm32) { reg32s[EAX] = sub32(reg32s[EAX], imm32); }
 
 void instr_2E() { segment_prefix_op(CS); }
-jit_instr_flags instr_2E_jit() { return segment_prefix_op_jit(CS); }
 void instr_2F() { bcd_das(); }
 
 DEFINE_MODRM_INSTR_READ_WRITE_8(instr_30, xor8(___, read_reg8(r)))
@@ -159,7 +148,6 @@ void instr16_35(int32_t imm16) { reg16[AX] = xor16(reg16[AX], imm16); }
 void instr32_35(int32_t imm32) { reg32s[EAX] = xor32(reg32s[EAX], imm32); }
 
 void instr_36() { segment_prefix_op(SS); }
-jit_instr_flags instr_36_jit() { return segment_prefix_op_jit(SS); }
 void instr_37() { bcd_aaa(); }
 
 DEFINE_MODRM_INSTR_READ8(instr_38, cmp8(___, read_reg8(r)))
@@ -173,7 +161,6 @@ void instr16_3D(int32_t imm16) { cmp16(reg16[AX], imm16); }
 void instr32_3D(int32_t imm32) { cmp32(reg32s[EAX], imm32); }
 
 void instr_3E() { segment_prefix_op(DS); }
-jit_instr_flags instr_3E_jit() { return segment_prefix_op_jit(DS); }
 void instr_3F() { bcd_aas(); }
 
 
@@ -247,39 +234,6 @@ void instr32_5E() { reg32s[ESI] = pop32s(); }
 void instr16_5F() { reg16[DI] = pop16(); }
 void instr32_5F() { reg32s[EDI] = pop32s(); }
 
-void instr16_50_jit() { push16_reg_jit(AX); }
-void instr32_50_jit() { push32_reg_jit(EAX); }
-void instr16_51_jit() { push16_reg_jit(CX); }
-void instr32_51_jit() { push32_reg_jit(ECX); }
-void instr16_52_jit() { push16_reg_jit(DX); }
-void instr32_52_jit() { push32_reg_jit(EDX); }
-void instr16_53_jit() { push16_reg_jit(BX); }
-void instr32_53_jit() { push32_reg_jit(EBX); }
-void instr16_54_jit() { push16_reg_jit(SP); }
-void instr32_54_jit() { push32_reg_jit(ESP); }
-void instr16_55_jit() { push16_reg_jit(BP); }
-void instr32_55_jit() { push32_reg_jit(EBP); }
-void instr16_56_jit() { push16_reg_jit(SI); }
-void instr32_56_jit() { push32_reg_jit(ESI); }
-void instr16_57_jit() { push16_reg_jit(DI); }
-void instr32_57_jit() { push32_reg_jit(EDI); }
-
-void instr16_58_jit() { pop16_reg_jit(AX); }
-void instr32_58_jit() { pop32s_reg_jit(EAX); }
-void instr16_59_jit() { pop16_reg_jit(CX); }
-void instr32_59_jit() { pop32s_reg_jit(ECX); }
-void instr16_5A_jit() { pop16_reg_jit(DX); }
-void instr32_5A_jit() { pop32s_reg_jit(EDX); }
-void instr16_5B_jit() { pop16_reg_jit(BX); }
-void instr32_5B_jit() { pop32s_reg_jit(EBX); }
-void instr16_5D_jit() { pop16_reg_jit(BP); }
-void instr32_5D_jit() { pop32s_reg_jit(EBP); }
-void instr16_5E_jit() { pop16_reg_jit(SI); }
-void instr32_5E_jit() { pop32s_reg_jit(ESI); }
-void instr16_5F_jit() { pop16_reg_jit(DI); }
-void instr32_5F_jit() { pop32s_reg_jit(EDI); }
-
-
 void instr16_60() { pusha16(); }
 void instr32_60() { pusha32(); }
 void instr16_61() { popa16(); }
@@ -298,9 +252,7 @@ void instr_62_mem(int32_t addr, int32_t r) {
 DEFINE_MODRM_INSTR_READ_WRITE_16(instr_63, arpl(___, read_reg16(r)))
 
 void instr_64() { segment_prefix_op(FS); }
-jit_instr_flags instr_64_jit() { return segment_prefix_op_jit(FS); }
 void instr_65() { segment_prefix_op(GS); }
-jit_instr_flags instr_65_jit() { return segment_prefix_op_jit(GS); }
 
 void instr_66() {
     // Operand-size override prefix
@@ -308,18 +260,6 @@ void instr_66() {
     run_prefix_instruction();
     *prefixes = 0;
 }
-jit_instr_flags instr_66_jit() {
-    // Operand-size override prefix
-
-    // This affects both decoding and instructions at runtime, so we set
-    // prefixes directly *and* in the generated code
-    *prefixes |= PREFIX_MASK_OPSIZE;
-    gen_add_prefix_bits(PREFIX_MASK_OPSIZE);
-    jit_instr_flags instr_flags = jit_prefix_instruction();
-    *prefixes = 0;
-    gen_clear_prefixes();
-    return instr_flags;
-}
 
 void instr_67() {
     // Address-size override prefix
@@ -328,26 +268,10 @@ void instr_67() {
     run_prefix_instruction();
     *prefixes = 0;
 }
-jit_instr_flags instr_67_jit() {
-    // Address-size override prefix
-
-    // This affects both decoding and instructions at runtime, so we set
-    // prefixes directly *and* in the generated code
-    dbg_assert(is_asize_32() == *is_32);
-    *prefixes |= PREFIX_MASK_ADDRSIZE;
-    gen_add_prefix_bits(PREFIX_MASK_ADDRSIZE);
-    jit_instr_flags instr_flags = jit_prefix_instruction();
-    *prefixes = 0;
-    gen_clear_prefixes();
-    return instr_flags;
-}
 
 void instr16_68(int32_t imm16) { push16(imm16); }
 void instr32_68(int32_t imm32) { push32(imm32); }
 
-void instr16_68_jit(int32_t imm16) { push16_imm_jit(imm16); }
-void instr32_68_jit(int32_t imm32) { push32_imm_jit(imm32); }
-
 void instr16_69_mem(int32_t addr, int32_t r, int32_t imm) { write_reg16(r, imul_reg16(safe_read16(addr) << 16 >> 16, imm << 16 >> 16)); }
 void instr16_69_reg(int32_t r1, int32_t r, int32_t imm) { write_reg16(r, imul_reg16(read_reg16(r1) << 16 >> 16, imm << 16 >> 16)); }
 void instr32_69_mem(int32_t addr, int32_t r, int32_t imm) { write_reg32(r, imul_reg32(safe_read32s(addr), imm)); }
@@ -356,9 +280,6 @@ void instr32_69_reg(int32_t r1, int32_t r, int32_t imm) { write_reg32(r, imul_re
 void instr16_6A(int32_t imm8) { push16(imm8); }
 void instr32_6A(int32_t imm8) { push32(imm8); }
 
-void instr16_6A_jit(int32_t imm8) { push16_imm_jit(imm8); }
-void instr32_6A_jit(int32_t imm8) { push32_imm_jit(imm8); }
-
 void instr16_6B_mem(int32_t addr, int32_t r, int32_t imm) { write_reg16(r, imul_reg16(safe_read16(addr) << 16 >> 16, imm)); }
 void instr16_6B_reg(int32_t r1, int32_t r, int32_t imm) { write_reg16(r, imul_reg16(read_reg16(r1) << 16 >> 16, imm)); }
 void instr32_6B_mem(int32_t addr, int32_t r, int32_t imm) { write_reg32(r, imul_reg32(safe_read32s(addr), imm)); }
@@ -405,40 +326,6 @@ void instr32_7D(int32_t imm8) { jmpcc32(!test_l(), imm8); }
 void instr32_7E(int32_t imm8) { jmpcc32( test_le(), imm8); }
 void instr32_7F(int32_t imm8) { jmpcc32(!test_le(), imm8); }
 
-void instr16_70_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_o"); }
-void instr16_71_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_no"); }
-void instr16_72_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_b"); }
-void instr16_73_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_nb"); }
-void instr16_74_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_z"); }
-void instr16_75_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_nz"); }
-void instr16_76_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_be"); }
-void instr16_77_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_nbe"); }
-void instr16_78_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_s"); }
-void instr16_79_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_ns"); }
-void instr16_7A_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_p"); }
-void instr16_7B_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_np"); }
-void instr16_7C_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_l"); }
-void instr16_7D_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_nl"); }
-void instr16_7E_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_le"); }
-void instr16_7F_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_nle"); }
-
-void instr32_70_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_o"); }
-void instr32_71_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_no"); }
-void instr32_72_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_b"); }
-void instr32_73_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_nb"); }
-void instr32_74_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_z"); }
-void instr32_75_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_nz"); }
-void instr32_76_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_be"); }
-void instr32_77_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_nbe"); }
-void instr32_78_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_s"); }
-void instr32_79_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_ns"); }
-void instr32_7A_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_p"); }
-void instr32_7B_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_np"); }
-void instr32_7C_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_l"); }
-void instr32_7D_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_nl"); }
-void instr32_7E_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_le"); }
-void instr32_7F_jit(int32_t imm8) { jit_link_block_conditional(imm8, "test_nle"); }
-
 DEFINE_MODRM_INSTR2_READ_WRITE_8(instr_80_0, add8(___, imm))
 DEFINE_MODRM_INSTR2_READ_WRITE_8(instr_80_1,  or8(___, imm))
 DEFINE_MODRM_INSTR2_READ_WRITE_8(instr_80_2, adc8(___, imm))
@@ -514,65 +401,10 @@ void instr16_89_mem(int32_t addr, int32_t r) { safe_write16(addr, read_reg16(r))
 void instr32_89_reg(int32_t r2, int32_t r) { write_reg32(r2, read_reg32(r)); }
 void instr32_89_mem(int32_t addr, int32_t r) { safe_write32(addr, read_reg32(r)); }
 
-void instr16_89_reg_jit(int32_t r_dest, int32_t r_src)
-{
-    gen_set_reg16_r(get_reg16_index(r_dest), get_reg16_index(r_src));
-}
-void instr32_89_reg_jit(int32_t r_dest, int32_t r_src)
-{
-    gen_set_reg32_r(r_dest, r_src);
-}
-
-void instr16_89_mem_jit(int32_t modrm_byte, int32_t r)
-{
-    // XXX
-    gen_modrm_resolve(modrm_byte); gen_modrm_fn1("instr16_89_mem", 14, modrm_byte >> 3 & 7);
-}
-void instr32_89_mem_jit(int32_t modrm_byte, int32_t r)
-{
-    // Pseudo: safe_write32(modrm_resolve(modrm_byte), reg32s[r]);
-    const int32_t address_local = GEN_LOCAL_SCRATCH0;
-    const int32_t value_local = GEN_LOCAL_SCRATCH1;
-
-    gen_modrm_resolve(modrm_byte);
-    wg_set_local(instruction_body, address_local);
-
-    wg_push_i32(instruction_body, (uint32_t) &reg32s[r]);
-    wg_load_aligned_i32_from_stack(instruction_body, 0);
-    wg_set_local(instruction_body, value_local);
-
-    gen_safe_write32(address_local, value_local);
-}
-
 DEFINE_MODRM_INSTR_READ8(instr_8A, write_reg8(r, ___))
 DEFINE_MODRM_INSTR_READ16(instr16_8B, write_reg16(r, ___))
 DEFINE_MODRM_INSTR_READ32(instr32_8B, write_reg32(r, ___))
 
-void instr16_8B_reg_jit(int32_t r_src, int32_t r_dest)
-{
-    gen_set_reg16_r(get_reg16_index(r_dest), get_reg16_index(r_src));
-}
-void instr32_8B_reg_jit(int32_t r_src, int32_t r_dest)
-{
-    gen_set_reg32_r(r_dest, r_src);
-}
-
-void instr16_8B_mem_jit(int32_t modrm_byte, int32_t r)
-{
-    // XXX
-    gen_modrm_resolve(modrm_byte); gen_modrm_fn1("instr16_8B_mem", 14, modrm_byte >> 3 & 7);
-}
-void instr32_8B_mem_jit(int32_t modrm_byte, int32_t r)
-{
-    // Pseudo: reg32s[r] = safe_read32s(modrm_resolve(modrm_byte));
-    wg_push_i32(instruction_body, (int32_t) &reg32s[r]);
-
-    gen_modrm_resolve(modrm_byte);
-    gen_safe_read32();
-
-    wg_store_aligned_i32(instruction_body);
-}
-
 void instr_8C_check_sreg(int32_t sreg) {
     if(sreg >= 6)
     {
@@ -617,28 +449,6 @@ void instr32_8D_mem(int32_t addr, int32_t r) {
     *prefixes = 0;
 }
 
-void instr16_8D_mem_jit(int32_t modrm_byte)
-{
-    int32_t loc = (int32_t) &reg16[get_reg16_index(modrm_byte >> 3 & 7)];
-    wg_push_u32(instruction_body, loc);
-    // override prefix, so modrm_resolve does not return the segment part
-    *prefixes |= SEG_PREFIX_ZERO;
-    gen_modrm_resolve(modrm_byte);
-    wg_store_aligned_u16(instruction_body);
-    *prefixes = 0;
-}
-
-void instr32_8D_mem_jit(int32_t modrm_byte)
-{
-    int32_t loc = (int32_t) &reg32s[modrm_byte >> 3 & 7];
-    wg_push_u32(instruction_body, loc);
-    // override prefix, so modrm_resolve does not return the segment part
-    *prefixes |= SEG_PREFIX_ZERO;
-    gen_modrm_resolve(modrm_byte);
-    wg_store_aligned_i32(instruction_body);
-    *prefixes = 0;
-}
-
 void instr_8E_helper(int32_t data, int32_t mod)
 {
     if(mod == ES || mod == SS || mod == DS || mod == FS || mod == GS)
@@ -700,30 +510,6 @@ void instr32_8F_0_reg(int32_t r)
     write_reg32(r, pop32s());
 }
 
-void instr16_8F_0_jit_mem(int32_t modrm_byte)
-{
-    gen_fn0_const("instr16_8F_0_mem_pre", 20);
-    gen_modrm_resolve(modrm_byte);
-    gen_modrm_fn0("instr16_8F_0_mem", 16);
-}
-
-void instr16_8F_0_jit_reg(int32_t r)
-{
-    gen_fn1_const("instr16_8F_0_reg", 16, r);
-}
-
-void instr32_8F_0_jit_mem(int32_t modrm_byte)
-{
-    gen_fn0_const("instr32_8F_0_mem_pre", 20);
-    gen_modrm_resolve(modrm_byte);
-    gen_modrm_fn0("instr32_8F_0_mem", 16);
-}
-
-void instr32_8F_0_jit_reg(int32_t r)
-{
-    gen_fn1_const("instr32_8F_0_reg", 16, r);
-}
-
 void instr_90() { }
 void instr16_91() { xchg16r(CX); }
 void instr32_91() { xchg32r(ECX); }
@@ -1495,13 +1281,6 @@ void instr32_E8(int32_t imm32s) {
     instruction_pointer[0] = instruction_pointer[0] + imm32s;
     //dbg_assert(is_asize_32() || get_real_eip() < 0x10000);
 }
-void instr16_E8_jit(int32_t imm16) {
-    gen_fn1_const("instr16_E8", 10, imm16);
-}
-
-void instr32_E8_jit(int32_t imm32s) {
-    gen_fn1_const("instr32_E8", 10, imm32s);
-}
 
 void instr16_E9(int32_t imm16) {
     // jmp
@@ -1512,12 +1291,6 @@ void instr32_E9(int32_t imm32s) {
     instruction_pointer[0] = instruction_pointer[0] + imm32s;
     dbg_assert(is_asize_32() || get_real_eip() < 0x10000);
 }
-void instr16_E9_jit(int32_t imm16) {
-    gen_fn1_const("instr16_E9", 10, imm16);
-}
-void instr32_E9_jit(int32_t imm32s) {
-    gen_fn1_const("instr32_E9", 10, imm32s);
-}
 
 void instr16_EA(int32_t new_ip, int32_t cs) {
     // jmpf
@@ -1541,12 +1314,6 @@ void instr32_EB(int32_t imm8) {
     dbg_assert(is_asize_32() || get_real_eip() < 0x10000);
 }
 
-void instr16_EB_jit(int32_t imm8s) {
-    gen_fn1_const("instr16_EB", 10, imm8s);
-}
-void instr32_EB_jit(int32_t imm8s) {
-    gen_fn1_const("instr32_EB", 10, imm8s);
-}
 
 void instr_EC() {
     int32_t port = reg16[DX];
@@ -1588,15 +1355,6 @@ void instr_F0() {
     // some instructions that don't write to memory
     run_prefix_instruction();
 }
-jit_instr_flags instr_F0_jit() {
-    // lock
-    //dbg_log("lock");
-
-    // TODO
-    // This triggers UD when used with
-    // some instructions that don't write to memory
-    return jit_prefix_instruction();
-}
 void instr_F1() {
     // INT1
     // https://code.google.com/p/corkami/wiki/x86oddities#IceBP
@@ -1611,16 +1369,6 @@ void instr_F2() {
     run_prefix_instruction();
     *prefixes = 0;
 }
-jit_instr_flags instr_F2_jit() {
-    // repnz
-    dbg_assert((*prefixes & PREFIX_MASK_REP) == 0);
-    gen_add_prefix_bits(PREFIX_REPNZ);
-    *prefixes |= PREFIX_REPNZ;
-    jit_instr_flags instr_flags = jit_prefix_instruction();
-    gen_clear_prefixes();
-    *prefixes = 0;
-    return instr_flags;
-}
 
 void instr_F3() {
     // repz
@@ -1630,17 +1378,6 @@ void instr_F3() {
     *prefixes = 0;
 }
 
-jit_instr_flags instr_F3_jit() {
-    // repz
-    dbg_assert((*prefixes & PREFIX_MASK_REP) == 0);
-    gen_add_prefix_bits(PREFIX_REPZ);
-    *prefixes |= PREFIX_REPZ;
-    jit_instr_flags instr_flags = jit_prefix_instruction();
-    gen_clear_prefixes();
-    *prefixes = 0;
-    return instr_flags;
-}
-
 void instr_F4() {
     hlt_op();
 }
@@ -1809,8 +1546,6 @@ void instr16_FF_5_mem(int32_t addr)
     dbg_assert(is_asize_32() || get_real_eip() < 0x10000);
 }
 DEFINE_MODRM_INSTR1_READ16(instr16_FF_6, push16(___))
-void instr16_FF_6_jit_reg(int32_t reg) { push16_reg_jit(reg); }
-void instr16_FF_6_jit_mem(int32_t modrm_byte) { push16_mem_jit(modrm_byte); }
 
 DEFINE_MODRM_INSTR1_READ_WRITE_32(instr32_FF_0, inc32(___))
 DEFINE_MODRM_INSTR1_READ_WRITE_32(instr32_FF_1, dec32(___))
@@ -1877,19 +1612,10 @@ void instr32_FF_5_mem(int32_t addr)
     dbg_assert(is_asize_32() || new_ip < 0x10000);
 }
 DEFINE_MODRM_INSTR1_READ32(instr32_FF_6, push32(___))
-void instr32_FF_6_jit_reg(int32_t reg) { push32_reg_jit(reg); }
-void instr32_FF_6_jit_mem(int32_t modrm_byte) { push32_mem_jit(modrm_byte); }
 
 void run_instruction(int32_t opcode)
 {
 #include "../../build/interpreter.c"
 }
 
-jit_instr_flags jit_instruction(int32_t opcode)
-{
-    jit_instr_flags instr_flags = 0;
-#include "../../build/jit.c"
-    return instr_flags;
-}
-
 #pragma clang diagnostic pop

+ 1 - 16
src/native/instructions.h

@@ -155,8 +155,6 @@ void instr16_0E(void);
 void instr32_0E(void);
 void instr16_0F(void);
 void instr32_0F(void);
-jit_instr_flags instr16_0F_jit(void);
-jit_instr_flags instr32_0F_jit(void);
 void instr_10_mem(int32_t addr, int32_t r);
 void instr_10_reg(int32_t r1, int32_t r);
 void instr16_11_mem(int32_t addr, int32_t r);
@@ -211,7 +209,6 @@ void instr_24(int32_t imm8);
 void instr16_25(int32_t imm16);
 void instr32_25(int32_t imm32);
 void instr_26(void);
-jit_instr_flags instr_26_jit(void);
 void instr_27(void);
 void instr_28_mem(int32_t addr, int32_t r);
 void instr_28_reg(int32_t r1, int32_t r);
@@ -229,7 +226,6 @@ void instr_2C(int32_t imm8);
 void instr16_2D(int32_t imm16);
 void instr32_2D(int32_t imm32);
 void instr_2E(void);
-jit_instr_flags instr_2E_jit(void);
 void instr_2F(void);
 void instr_30_mem(int32_t addr, int32_t r);
 void instr_30_reg(int32_t r1, int32_t r);
@@ -247,7 +243,6 @@ void instr_34(int32_t imm8);
 void instr16_35(int32_t imm16);
 void instr32_35(int32_t imm32);
 void instr_36(void);
-jit_instr_flags instr_36_jit(void);
 void instr_37(void);
 void instr_38_mem(int32_t addr, int32_t r);
 void instr_38_reg(int32_t r1, int32_t r);
@@ -265,7 +260,6 @@ void instr_3C(int32_t imm8);
 void instr16_3D(int32_t imm16);
 void instr32_3D(int32_t imm32);
 void instr_3E(void);
-jit_instr_flags instr_3E_jit(void);
 void instr_3F(void);
 void instr16_40(void);
 void instr32_40(void);
@@ -340,13 +334,9 @@ void instr_62_mem(int32_t addr, int32_t r);
 void instr_63_mem(int32_t addr, int32_t r);
 void instr_63_reg(int32_t r1, int32_t r);
 void instr_64(void);
-jit_instr_flags instr_64_jit(void);
 void instr_65(void);
-jit_instr_flags instr_65_jit(void);
 void instr_66(void);
-jit_instr_flags instr_66_jit(void);
 void instr_67(void);
-jit_instr_flags instr_67_jit(void);
 void instr16_68(int32_t imm16);
 void instr32_68(int32_t imm32);
 void instr16_69_mem(int32_t addr, int32_t r, int32_t imm);
@@ -512,8 +502,6 @@ void instr16_8D_mem(int32_t addr, int32_t r);
 void instr32_8D_reg(int32_t r, int32_t r2);
 void instr32_8D_mem_pre(void);
 void instr32_8D_mem(int32_t addr, int32_t r);
-void instr16_8D_mem_jit(int32_t modrm_byte);
-void instr32_8D_mem_jit(int32_t modrm_byte);
 void instr_8E_helper(int32_t data, int32_t mod);
 void instr_8E_mem(int32_t addr, int32_t r);
 void instr_8E_reg(int32_t r1, int32_t r);
@@ -803,12 +791,9 @@ void instr_EE(void);
 void instr16_EF(void);
 void instr32_EF(void);
 void instr_F0(void);
-jit_instr_flags instr_F0_jit(void);
 void instr_F1(void);
 void instr_F2(void);
-jit_instr_flags instr_F2_jit(void);
 void instr_F3(void);
-jit_instr_flags instr_F3_jit(void);
 void instr_F4(void);
 void instr_F5(void);
 void instr_F6_0_mem(int32_t addr, int32_t imm);
@@ -901,5 +886,5 @@ void instr32_FF_5_reg(int32_t r);
 void instr32_FF_5_mem(int32_t addr);
 void instr32_FF_6_mem(int32_t addr);
 void instr32_FF_6_reg(int32_t r1);
+
 void run_instruction(int32_t opcode);
-jit_instr_flags jit_instruction(int32_t opcode);

+ 2 - 52
src/native/instructions_0f.c

@@ -5,7 +5,6 @@
 #include <stdlib.h>
 
 #include "arith.h"
-#include "codegen/codegen.h"
 #include "const.h"
 #include "cpu.h"
 #include "fpu.h"
@@ -1969,41 +1968,6 @@ void instr32_0F8E(int32_t imm) { jmpcc32( test_le(), imm); }
 void instr16_0F8F(int32_t imm) { jmpcc16(!test_le(), imm); }
 void instr32_0F8F(int32_t imm) { jmpcc32(!test_le(), imm); }
 
-void instr16_0F80_jit(int32_t imm) { jit_link_block_conditional(imm, "test_o"); }
-void instr16_0F81_jit(int32_t imm) { jit_link_block_conditional(imm, "test_no"); }
-void instr16_0F82_jit(int32_t imm) { jit_link_block_conditional(imm, "test_b"); }
-void instr16_0F83_jit(int32_t imm) { jit_link_block_conditional(imm, "test_nb"); }
-void instr16_0F84_jit(int32_t imm) { jit_link_block_conditional(imm, "test_z"); }
-void instr16_0F85_jit(int32_t imm) { jit_link_block_conditional(imm, "test_nz"); }
-void instr16_0F86_jit(int32_t imm) { jit_link_block_conditional(imm, "test_be"); }
-void instr16_0F87_jit(int32_t imm) { jit_link_block_conditional(imm, "test_nbe"); }
-void instr16_0F88_jit(int32_t imm) { jit_link_block_conditional(imm, "test_s"); }
-void instr16_0F89_jit(int32_t imm) { jit_link_block_conditional(imm, "test_ns"); }
-void instr16_0F8A_jit(int32_t imm) { jit_link_block_conditional(imm, "test_p"); }
-void instr16_0F8B_jit(int32_t imm) { jit_link_block_conditional(imm, "test_np"); }
-void instr16_0F8C_jit(int32_t imm) { jit_link_block_conditional(imm, "test_l"); }
-void instr16_0F8D_jit(int32_t imm) { jit_link_block_conditional(imm, "test_nl"); }
-void instr16_0F8E_jit(int32_t imm) { jit_link_block_conditional(imm, "test_le"); }
-void instr16_0F8F_jit(int32_t imm) { jit_link_block_conditional(imm, "test_nle"); }
-
-
-void instr32_0F80_jit(int32_t imm) { jit_link_block_conditional(imm, "test_o"); }
-void instr32_0F81_jit(int32_t imm) { jit_link_block_conditional(imm, "test_no"); }
-void instr32_0F82_jit(int32_t imm) { jit_link_block_conditional(imm, "test_b"); }
-void instr32_0F83_jit(int32_t imm) { jit_link_block_conditional(imm, "test_nb"); }
-void instr32_0F84_jit(int32_t imm) { jit_link_block_conditional(imm, "test_z"); }
-void instr32_0F85_jit(int32_t imm) { jit_link_block_conditional(imm, "test_nz"); }
-void instr32_0F86_jit(int32_t imm) { jit_link_block_conditional(imm, "test_be"); }
-void instr32_0F87_jit(int32_t imm) { jit_link_block_conditional(imm, "test_nbe"); }
-void instr32_0F88_jit(int32_t imm) { jit_link_block_conditional(imm, "test_s"); }
-void instr32_0F89_jit(int32_t imm) { jit_link_block_conditional(imm, "test_ns"); }
-void instr32_0F8A_jit(int32_t imm) { jit_link_block_conditional(imm, "test_p"); }
-void instr32_0F8B_jit(int32_t imm) { jit_link_block_conditional(imm, "test_np"); }
-void instr32_0F8C_jit(int32_t imm) { jit_link_block_conditional(imm, "test_l"); }
-void instr32_0F8D_jit(int32_t imm) { jit_link_block_conditional(imm, "test_nl"); }
-void instr32_0F8E_jit(int32_t imm) { jit_link_block_conditional(imm, "test_le"); }
-void instr32_0F8F_jit(int32_t imm) { jit_link_block_conditional(imm, "test_nle"); }
-
 // setcc
 void instr_0F90_reg(int32_t r, int32_t unused) { setcc_reg( test_o(), r); }
 void instr_0F91_reg(int32_t r, int32_t unused) { setcc_reg(!test_o(), r); }
@@ -3035,8 +2999,8 @@ void instr_660FE5(union reg128 source, int32_t r) {
 }
 DEFINE_SSE_SPLIT(instr_660FE5, safe_read128s, read_xmm128s)
 
-void instr_0FE6_mem(int32_t addr, int32_t r) { unimplemented_sse(); }
-void instr_0FE6_reg(int32_t r1, int32_t r2) { unimplemented_sse(); }
+void instr_0FE6_mem(int32_t addr, int32_t r) { trigger_ud(); }
+void instr_0FE6_reg(int32_t r1, int32_t r2) { trigger_ud(); }
 void instr_660FE6_mem(int32_t addr, int32_t r) { unimplemented_sse(); }
 void instr_660FE6_reg(int32_t r1, int32_t r2) { unimplemented_sse(); }
 void instr_F20FE6_mem(int32_t addr, int32_t r) { unimplemented_sse(); }
@@ -3708,18 +3672,4 @@ void run_instruction0f_32(int32_t opcode)
 #include "../../build/interpreter0f_32.c"
 }
 
-jit_instr_flags jit_instruction0f_16(int32_t opcode)
-{
-    jit_instr_flags instr_flags = 0;
-#include "../../build/jit0f_16.c"
-    return instr_flags;
-}
-
-jit_instr_flags jit_instruction0f_32(int32_t opcode)
-{
-    jit_instr_flags instr_flags = 0;
-#include "../../build/jit0f_32.c"
-    return instr_flags;
-}
-
 #pragma clang diagnostic pop

+ 0 - 2
src/native/instructions_0f.h

@@ -998,5 +998,3 @@ void instr_660FFE_mem(int32_t addr, int32_t r);
 void instr_0FFF(void);
 void run_instruction0f_16(int32_t opcode);
 void run_instruction0f_32(int32_t opcode);
-jit_instr_flags jit_instruction0f_16(int32_t opcode);
-jit_instr_flags jit_instruction0f_32(int32_t opcode);

+ 0 - 377
src/native/jit.c

@@ -1,377 +0,0 @@
-#include <stdint.h>
-
-#include "const.h"
-#include "cpu.h"
-#include "global_pointers.h"
-#include "jit.h"
-#include "js_imports.h"
-#include "log.h"
-#include "profiler/profiler.h"
-
-
-void free_wasm_table_index(uint16_t wasm_table_index)
-{
-#if DEBUG
-    for(int32_t i = 0; i < wasm_table_index_free_list_count; i++)
-    {
-        assert(wasm_table_index_free_list[i] != wasm_table_index);
-    }
-#endif
-
-    assert(wasm_table_index_free_list_count < WASM_TABLE_SIZE);
-    wasm_table_index_free_list[wasm_table_index_free_list_count++] = wasm_table_index;
-
-    // It is not strictly necessary to clear the function, but it will fail
-    // more predictably if we accidentally use the function
-    // XXX: This fails in Chromium:
-    //   RangeError: WebAssembly.Table.set(): Modifying existing entry in table not supported.
-    //jit_clear_func(wasm_table_index);
-}
-
-// remove the entry with the given index from the jit_cache_arr structure
-void remove_jit_cache_entry(uint32_t page, int32_t addr_index)
-{
-    assert(addr_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY);
-    assert(page == (jit_cache_arr[addr_index].start_addr >> 12));
-
-    int32_t page_index = page_first_jit_cache_entry[page];
-    bool did_remove = false;
-
-    if(page_index == addr_index)
-    {
-        page_first_jit_cache_entry[page] = jit_cache_arr[addr_index].next_index_same_page;
-        did_remove = true;
-    }
-    else
-    {
-        while(page_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY)
-        {
-            int32_t next_index = jit_cache_arr[page_index].next_index_same_page;
-            if(next_index == addr_index)
-            {
-                jit_cache_arr[page_index].next_index_same_page = jit_cache_arr[addr_index].next_index_same_page;
-                did_remove = true;
-                break;
-            }
-            page_index = next_index;
-        }
-    }
-
-    jit_cache_arr[addr_index].next_index_same_page = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
-
-    assert(did_remove);
-}
-
-// remove all entries with the given wasm_table_index from the jit_cache_arr structure
-void remove_jit_cache_wasm_index(int32_t page, uint16_t wasm_table_index)
-{
-    int32_t cache_array_index = page_first_jit_cache_entry[page];
-
-    assert(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY);
-
-    bool pending = false;
-
-    do
-    {
-        struct code_cache* entry = &jit_cache_arr[cache_array_index];
-        int32_t next_cache_array_index = entry->next_index_same_page;
-
-        if(entry->wasm_table_index == wasm_table_index)
-        {
-            // if one entry is pending, all must be pending
-            dbg_assert(!pending || entry->pending);
-
-            pending = entry->pending;
-
-            remove_jit_cache_entry(page, cache_array_index);
-
-            assert(entry->next_index_same_page == JIT_CACHE_ARRAY_NO_NEXT_ENTRY);
-            entry->wasm_table_index = 0;
-            entry->start_addr = 0;
-            entry->pending = false;
-        }
-
-        cache_array_index = next_cache_array_index;
-    }
-    while(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY);
-
-    if(pending)
-    {
-        assert(wasm_table_index_pending_free_count < WASM_TABLE_SIZE);
-        wasm_table_index_pending_free[wasm_table_index_pending_free_count++] = wasm_table_index;
-    }
-    else
-    {
-        free_wasm_table_index(wasm_table_index);
-    }
-
-    if(page_first_jit_cache_entry[page] == JIT_CACHE_ARRAY_NO_NEXT_ENTRY &&
-        page_entry_points[page][0] == ENTRY_POINT_END)
-    {
-        tlb_set_has_code(page, false);
-    }
-
-#if CHECK_JIT_CACHE_ARRAY_INVARIANTS
-    // sanity check that the above iteration deleted all entries
-
-    for(int32_t i = 0; i < JIT_CACHE_ARRAY_SIZE; i++)
-    {
-        struct code_cache* entry = &jit_cache_arr[i];
-
-        assert(entry->wasm_table_index != wasm_table_index);
-    }
-#endif
-}
-
-bool find_u16(const uint16_t* array, uint16_t value, int32_t length)
-{
-    for(int32_t i = 0; i < length; i++)
-    {
-        if(array[i] == value)
-        {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-__attribute__((noinline))
-void jit_clear_page(uint32_t index)
-{
-    assert(index < MAX_PHYSICAL_PAGES);
-    int32_t cache_array_index = page_first_jit_cache_entry[index];
-
-    assert(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY);
-
-    uint16_t index_to_free[100];
-    int32_t index_to_free_length = 0;
-
-    uint16_t index_to_pending_free[100];
-    int32_t index_to_pending_free_length = 0;
-
-    page_first_jit_cache_entry[index] = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
-    profiler_stat_increment(S_INVALIDATE_PAGE);
-
-    do
-    {
-        profiler_stat_increment(S_INVALIDATE_CACHE_ENTRY);
-        struct code_cache* entry = &jit_cache_arr[cache_array_index];
-        uint16_t wasm_table_index = entry->wasm_table_index;
-
-        assert(same_page(index << DIRTY_ARR_SHIFT, entry->start_addr));
-
-        int32_t next_cache_array_index = entry->next_index_same_page;
-
-        entry->next_index_same_page = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
-        entry->start_addr = 0;
-        entry->wasm_table_index = 0;
-
-        if(entry->pending)
-        {
-            entry->pending = false;
-
-            if(!find_u16(index_to_pending_free, wasm_table_index, index_to_pending_free_length))
-            {
-                assert(index_to_pending_free_length < 100);
-                index_to_pending_free[index_to_pending_free_length++] = wasm_table_index;
-            }
-        }
-        else
-        {
-            if(!find_u16(index_to_free, wasm_table_index, index_to_free_length))
-            {
-                assert(index_to_free_length < 100);
-                index_to_free[index_to_free_length++] = wasm_table_index;
-            }
-        }
-
-        cache_array_index = next_cache_array_index;
-    }
-    while(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY);
-
-    for(int32_t i = 0; i < index_to_free_length; i++)
-    {
-        free_wasm_table_index(index_to_free[i]);
-    }
-
-    for(int32_t i = 0; i < index_to_pending_free_length; i++)
-    {
-        uint16_t wasm_table_index = index_to_pending_free[i];
-        assert(wasm_table_index_pending_free_count < WASM_TABLE_SIZE);
-        wasm_table_index_pending_free[wasm_table_index_pending_free_count++] = wasm_table_index;
-    }
-}
-
-void jit_dirty_index(uint32_t index)
-{
-    assert(index < MAX_PHYSICAL_PAGES);
-
-    bool did_have_code = false;
-    int32_t cache_array_index = page_first_jit_cache_entry[index];
-
-    if(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY)
-    {
-        did_have_code = true;
-        jit_clear_page(index);
-    }
-
-    uint16_t* entry_points = page_entry_points[index];
-
-    if(entry_points[0] != ENTRY_POINT_END)
-    {
-        did_have_code = true;
-
-        // don't try to compile code in this page anymore until it's hot again
-        hot_code_addresses[jit_hot_hash_page(index)] = 0;
-
-        for(int32_t i = 0; i < MAX_ENTRIES_PER_PAGE; i++)
-        {
-            if(entry_points[i] == ENTRY_POINT_END)
-            {
-                break;
-            }
-
-            entry_points[i] = ENTRY_POINT_END;
-        }
-
-#if DEBUG
-        for(int32_t i = 0; i < MAX_ENTRIES_PER_PAGE; i++)
-        {
-            assert(entry_points[i] == ENTRY_POINT_END);
-        }
-#endif
-    }
-
-    if(did_have_code)
-    {
-        tlb_set_has_code(index, false);
-    }
-}
-
-/*
- * There are 3 primary ways a cached basic block will be dirtied:
- * 1. A write dirties basic block A independently (A is clean and
- * write came from outside A)
- * 2. A write from within basic block A dirties itself
- * 3. A run_instruction during compilation dirties itself
-
- * #3 won't happen with generate_instruction so we don't
- * account for it
- */
-void jit_dirty_cache(uint32_t start_addr, uint32_t end_addr)
-{
-#if ENABLE_JIT
-    assert(start_addr <= end_addr);
-    for(uint32_t i = start_addr; i < end_addr; i++)
-    {
-        uint32_t index = i >> DIRTY_ARR_SHIFT;
-        // XXX: Should only call once per index
-        jit_dirty_index(index);
-    }
-#endif
-}
-
-void jit_dirty_cache_small(uint32_t start_addr, uint32_t end_addr)
-{
-#if ENABLE_JIT
-    assert(start_addr <= end_addr);
-
-    uint32_t start_index = start_addr >> DIRTY_ARR_SHIFT;
-    uint32_t end_index = (end_addr - 1) >> DIRTY_ARR_SHIFT;
-
-    jit_dirty_index(start_index);
-
-    // Note: This can't happen when paging is enabled, as writes across
-    //       boundaries are split up on two pages
-    if(start_index != end_index)
-    {
-        assert(end_index == start_index + 1);
-        jit_dirty_index(end_index);
-    }
-#endif
-}
-
-void jit_dirty_cache_single(uint32_t addr)
-{
-#if ENABLE_JIT
-    uint32_t index = addr >> DIRTY_ARR_SHIFT;
-
-    jit_dirty_index(index);
-#endif
-}
-
-void jit_empty_cache()
-{
-    for(int32_t i = 0; i < JIT_CACHE_ARRAY_SIZE; i++)
-    {
-        jit_cache_arr[i].start_addr = 0;
-        jit_cache_arr[i].next_index_same_page = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
-        jit_cache_arr[i].wasm_table_index = 0;
-        jit_cache_arr[i].pending = false;
-    }
-
-    for(int32_t i = 0; i < GROUP_DIRTINESS_LENGTH; i++)
-    {
-        page_first_jit_cache_entry[i] = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
-    }
-
-    for(int32_t i = 0; i < MAX_PHYSICAL_PAGES; i++)
-    {
-        uint16_t* entry_points = page_entry_points[i];
-
-        for(int32_t j = 0; j < MAX_ENTRIES_PER_PAGE; j++)
-        {
-            entry_points[j] = ENTRY_POINT_END;
-        }
-    }
-
-    for(int32_t i = 0; i < 0xFFFF; i++)
-    {
-        // don't assign 0 (XXX: Check)
-        wasm_table_index_free_list[i] = i + 1;
-    }
-
-    wasm_table_index_free_list_count = 0xFFFF;
-}
-
-int32_t jit_unused_cache_stat()
-{
-    int32_t count = 0;
-
-    for(int32_t i = 0; i < JIT_CACHE_ARRAY_SIZE; i++)
-    {
-        struct code_cache* entry = &jit_cache_arr[i];
-        int32_t phys_addr = entry->start_addr;
-
-        if(phys_addr == 0)
-        {
-            count++;
-        }
-    }
-
-    return count;
-}
-
-int32_t jit_get_entry_length(int32_t i)
-{
-    assert(i >= 0 && i < JIT_CACHE_ARRAY_SIZE);
-#if DEBUG
-    return jit_cache_arr[i].len;
-#else
-    UNUSED(i);
-    return 0;
-#endif
-}
-
-int32_t jit_get_entry_address(int32_t i)
-{
-    assert(i >= 0 && i < JIT_CACHE_ARRAY_SIZE);
-    return jit_cache_arr[i].start_addr;
-}
-
-int32_t jit_get_entry_pending(int32_t i)
-{
-    assert(i >= 0 && i < JIT_CACHE_ARRAY_SIZE);
-    return jit_cache_arr[i].pending;
-}

+ 0 - 12
src/native/jit.h

@@ -1,12 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-
-void free_wasm_table_index(uint16_t wasm_table_index);
-void remove_jit_cache_wasm_index(int32_t page, uint16_t wasm_table_index);
-
-void remove_jit_cache_entry(uint32_t page, int32_t addr_index);
-void jit_dirty_cache(uint32_t start_addr, uint32_t end_addr);
-void jit_dirty_cache_single(uint32_t addr);
-void jit_dirty_cache_small(uint32_t start_addr, uint32_t end_addr);
-void jit_empty_cache(void);

+ 0 - 4
src/native/js_imports.h

@@ -25,10 +25,6 @@ extern int32_t set_cr0(int32_t);
 extern int32_t verr(int32_t);
 extern int32_t verw(int32_t);
 
-extern void codegen_finalize(int32_t, int32_t, int32_t, int32_t, int32_t);
-extern void log_uncompiled_code(int32_t, int32_t);
-extern void dump_function_code(const struct basic_block* basic_block, int32_t basic_block_count, int32_t end);
-
 extern void cpl_changed(void);
 extern void cpuid(void);
 extern void enter16(int32_t, int32_t);

+ 1 - 1
src/native/memory.c

@@ -5,11 +5,11 @@
 
 #include "const.h"
 #include "global_pointers.h"
-#include "jit.h"
 #include "js_imports.h"
 #include "log.h"
 #include "memory.h"
 #include "profiler/profiler.h"
+#include "rust_imports.h"
 
 bool in_mapped_range(uint32_t addr)
 {

+ 0 - 101
src/native/misc_instr.c

@@ -4,7 +4,6 @@
 #include <stdint.h>
 #include <stdio.h>
 
-#include "codegen/codegen.h"
 #include "const.h"
 #include "cpu.h"
 #include "fpu.h"
@@ -209,44 +208,6 @@ void push16(int32_t imm16)
     }
 }
 
-void push16_reg_jit(int32_t reg)
-{
-    if(*stack_size_32)
-    {
-        gen_fn1_reg16("push16_ss32", 11, reg);
-    }
-    else
-    {
-        gen_fn1_reg16("push16_ss16", 11, reg);
-    }
-}
-
-void push16_imm_jit(int32_t imm)
-{
-    if(*stack_size_32)
-    {
-        gen_fn1_const("push16_ss32", 11, imm);
-    }
-    else
-    {
-        gen_fn1_const("push16_ss16", 11, imm);
-    }
-}
-
-void push16_mem_jit(int32_t modrm_byte)
-{
-    if(*stack_size_32)
-    {
-        gen_modrm_resolve(modrm_byte);
-        gen_modrm_fn0("push16_ss32_mem", 15);
-    }
-    else
-    {
-        gen_modrm_resolve(modrm_byte);
-        gen_modrm_fn0("push16_ss16_mem", 15);
-    }
-}
-
 __attribute__((always_inline))
 void push32_ss16(int32_t imm32)
 {
@@ -279,44 +240,6 @@ void push32(int32_t imm32)
     }
 }
 
-void push32_reg_jit(int32_t reg)
-{
-    if(*stack_size_32)
-    {
-        gen_fn1_reg32s("push32_ss32", 11, reg);
-    }
-    else
-    {
-        gen_fn1_reg32s("push32_ss16", 11, reg);
-    }
-}
-
-void push32_imm_jit(int32_t imm)
-{
-    if(*stack_size_32)
-    {
-        gen_fn1_const("push32_ss32", 11, imm);
-    }
-    else
-    {
-        gen_fn1_const("push32_ss16", 11, imm);
-    }
-}
-
-void push32_mem_jit(int32_t modrm_byte)
-{
-    if(*stack_size_32)
-    {
-        gen_modrm_resolve(modrm_byte);
-        gen_modrm_fn0("push32_ss32_mem", 15);
-    }
-    else
-    {
-        gen_modrm_resolve(modrm_byte);
-        gen_modrm_fn0("push32_ss16_mem", 15);
-    }
-}
-
 __attribute__((always_inline))
 int32_t pop16_ss16()
 {
@@ -350,18 +273,6 @@ int32_t pop16()
     }
 }
 
-void pop16_reg_jit(int32_t reg)
-{
-    if(*stack_size_32)
-    {
-        gen_set_reg16_fn0("pop16_ss32", 10, reg);
-    }
-    else
-    {
-        gen_set_reg16_fn0("pop16_ss16", 10, reg);
-    }
-}
-
 __attribute__((always_inline))
 int32_t pop32s_ss16()
 {
@@ -393,18 +304,6 @@ int32_t pop32s()
     }
 }
 
-void pop32s_reg_jit(int32_t reg)
-{
-    if(*stack_size_32)
-    {
-        gen_set_reg32s_fn0("pop32s_ss32", 11, reg);
-    }
-    else
-    {
-        gen_set_reg32s_fn0("pop32s_ss16", 11, reg);
-    }
-}
-
 void pusha16()
 {
     uint16_t temp = reg16[SP];

+ 4 - 8
src/native/misc_instr.h

@@ -45,28 +45,24 @@ void cmovcc16(bool condition, int32_t value, int32_t r);
 void cmovcc32(bool condition, int32_t value, int32_t r);
 int32_t get_stack_pointer(int32_t offset);
 void adjust_stack_reg(int32_t adjustment);
+
 void push16_ss16(int32_t imm16);
 void push16_ss32(int32_t imm16);
 void push16_ss16_mem(int32_t addr);
 void push16_ss32_mem(int32_t addr);
 void push16(int32_t imm16);
-void push16_reg_jit(int32_t reg);
-void push16_imm_jit(int32_t imm);
-void push16_mem_jit(int32_t modrm_byte);
+
 void push32_ss16(int32_t imm32);
 void push32_ss32(int32_t imm32);
 void push32_ss16_mem(int32_t addr);
 void push32_ss32_mem(int32_t addr);
 void push32(int32_t imm32);
-void push32_reg_jit(int32_t reg);
-void push32_imm_jit(int32_t imm);
-void push32_mem_jit(int32_t modrm_byte);
+
 int32_t pop16(void);
-void pop16_reg_jit(int32_t reg);
 int32_t pop32_ss16(void);
 int32_t pop32_ss32(void);
 int32_t pop32s(void);
-void pop32s_reg_jit(int32_t reg);
+
 void pusha16(void);
 void pusha32(void);
 void setcc_reg(bool condition, int32_t r);

+ 0 - 82
src/native/profiler/opstats.c

@@ -1,82 +0,0 @@
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-
-#include "../codegen/codegen.h"
-#include "../shared.h"
-
-#if ENABLE_PROFILER_OPSTATS
-
-static struct {
-    uint32_t opcode[0x100];
-    uint32_t opcode_0f[0x100];
-} opstats_buffer = {
-    .opcode = { 0 },
-    .opcode_0f = { 0 },
-};
-
-#endif
-
-void gen_opstats(uint32_t instruction)
-{
-#if ENABLE_PROFILER_OPSTATS
-    bool is_0f = false;
-
-    for(int32_t i = 0; i < 4; i++)
-    {
-        int32_t opcode = instruction & 0xFF;
-        instruction >>= 8;
-
-        // TODO:
-        // - If instruction depends on middle bits of modrm_byte, split
-        // - Split depending on memory or register variant
-        // - If the instruction uses 4 or more prefixes, only the prefixes will be counted
-
-        if(is_0f)
-        {
-            gen_increment_mem32((int32_t)&opstats_buffer.opcode_0f[opcode]);
-            break;
-        }
-        else
-        {
-            gen_increment_mem32((int32_t)&opstats_buffer.opcode[opcode]);
-
-            if(opcode == 0x0F)
-            {
-                is_0f = true;
-            }
-            else if(opcode == 0x26 || opcode == 0x2E || opcode == 0x36 || opcode == 0x3E ||
-                    opcode == 0x64 || opcode == 0x65 || opcode == 0x66 || opcode == 0x67 ||
-                    opcode == 0xF0 || opcode == 0xF2 || opcode == 0xF3)
-            {
-                // prefix
-            }
-            else
-            {
-                break;
-            }
-        }
-    }
-#else
-    UNUSED(instruction);
-#endif
-}
-
-int32_t get_opstats_buffer(int32_t index)
-{
-    assert(index >= 0 && index < 0x200);
-
-#if ENABLE_PROFILER_OPSTATS
-    if(index < 0x100)
-    {
-        return opstats_buffer.opcode[index];
-    }
-    else
-    {
-        return opstats_buffer.opcode_0f[index - 0x100];
-    }
-#else
-    UNUSED(index);
-    return 0;
-#endif
-}

+ 0 - 5
src/native/profiler/opstats.h

@@ -1,5 +0,0 @@
-#pragma once
-#include <stdint.h>
-
-void gen_opstats(uint32_t instruction);
-int32_t get_opstats_buffer(int32_t index);

+ 1 - 1
src/native/profiler/profiler.h

@@ -15,7 +15,7 @@ enum stat_name {
     S_RUN_INTERPRETED,
     S_RUN_INTERPRETED_PENDING,
     S_RUN_INTERPRETED_NEAR_END_OF_PAGE,
-    S_RUN_INTERPRETED_NOT_HOT,
+    S_RUN_INTERPRETED_DIFFERENT_STATE,
     S_RUN_INTERPRETED_STEPS,
 
     S_RUN_FROM_CACHE,

+ 17 - 0
src/native/rust_imports.h

@@ -0,0 +1,17 @@
+#pragma once
+
+#include "cpu.h"
+#include <stdint.h>
+
+uint32_t jit_find_cache_entry(uint32_t phys_addr, cached_state_flags flags);
+void jit_increase_hotness_and_maybe_compile(uint32_t phys_addr, uint32_t cs_offset, cached_state_flags flags);
+
+void jit_dirty_cache_single(uint32_t phys_addr);
+void jit_dirty_cache_small(uint32_t phys_start_addr, uint32_t phys_end_addr);
+
+bool jit_page_has_code(uint32_t physical_page);
+
+uint32_t jit_unused_cache_stat(void);
+uint32_t jit_get_entry_length(int32_t i);
+uint32_t jit_get_entry_address(int32_t i);
+bool jit_get_entry_pending(int32_t i);

+ 0 - 9
src/native/shared.h

@@ -6,13 +6,4 @@
 
 #define UNUSED(x) (void)(x)
 
-static inline size_t strlen(const char *str)
-{
-    const char *s;
-
-    for (s = str; *s; ++s) {}
-
-    return (s - str);
-}
-
 void *memset(void *dest, int c, size_t n);

+ 96 - 0
src/rust/analysis.rs

@@ -0,0 +1,96 @@
+#![allow(non_snake_case)]
+
+use cpu_context::CpuContext;
+use prefix::{PREFIX_66, PREFIX_67, PREFIX_F2, PREFIX_F3};
+use regs::{CS, DS, ES, FS, GS, SS};
+
+#[derive(PartialEq, Eq)]
+pub enum AnalysisType {
+    Normal,
+    BlockBoundary,
+    Jump {
+        offset: i32,
+        is_32: bool,
+        condition: Option<u8>,
+    },
+}
+
+pub struct Analysis {
+    pub no_next_instruction: bool,
+    pub ty: AnalysisType,
+}
+
+pub fn analyze_step(mut cpu: &mut CpuContext) -> Analysis {
+    let mut analysis = Analysis {
+        no_next_instruction: false,
+        ty: AnalysisType::Normal,
+    };
+    cpu.prefixes = 0;
+    let opcode = cpu.read_imm8() as u32 | (cpu.osize_32() as u32) << 8;
+    ::gen::analyzer::analyzer(opcode, &mut cpu, &mut analysis);
+    analysis
+}
+
+pub fn analyze_step_handle_prefix(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    ::gen::analyzer::analyzer(
+        cpu.read_imm8() as u32 | (cpu.osize_32() as u32) << 8,
+        cpu,
+        analysis,
+    )
+}
+pub fn analyze_step_handle_segment_prefix(
+    segment: u32,
+    cpu: &mut CpuContext,
+    analysis: &mut Analysis,
+) {
+    assert!(segment <= 5);
+    cpu.prefixes |= segment + 1;
+    analyze_step_handle_prefix(cpu, analysis)
+}
+
+pub fn instr16_0F_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    ::gen::analyzer0f_16::analyzer(cpu.read_imm8(), cpu, analysis)
+}
+pub fn instr32_0F_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    ::gen::analyzer0f_32::analyzer(cpu.read_imm8(), cpu, analysis)
+}
+pub fn instr_26_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    analyze_step_handle_segment_prefix(ES, cpu, analysis)
+}
+pub fn instr_2E_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    analyze_step_handle_segment_prefix(CS, cpu, analysis)
+}
+pub fn instr_36_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    analyze_step_handle_segment_prefix(SS, cpu, analysis)
+}
+pub fn instr_3E_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    analyze_step_handle_segment_prefix(DS, cpu, analysis)
+}
+pub fn instr_64_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    analyze_step_handle_segment_prefix(FS, cpu, analysis)
+}
+pub fn instr_65_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    analyze_step_handle_segment_prefix(GS, cpu, analysis)
+}
+pub fn instr_66_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    cpu.prefixes |= PREFIX_66;
+    analyze_step_handle_prefix(cpu, analysis)
+}
+pub fn instr_67_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    cpu.prefixes |= PREFIX_67;
+    analyze_step_handle_prefix(cpu, analysis)
+}
+pub fn instr_F0_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    // lock: Ignored
+    analyze_step_handle_prefix(cpu, analysis)
+}
+pub fn instr_F2_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    cpu.prefixes |= PREFIX_F2;
+    analyze_step_handle_prefix(cpu, analysis)
+}
+pub fn instr_F3_analyze(cpu: &mut CpuContext, analysis: &mut Analysis) {
+    cpu.prefixes |= PREFIX_F3;
+    analyze_step_handle_prefix(cpu, analysis)
+}
+
+pub fn modrm_analyze(ctx: &mut CpuContext, modrm_byte: u8) { ::modrm::skip(ctx, modrm_byte); }

+ 132 - 0
src/rust/c_api.rs

@@ -0,0 +1,132 @@
+use std::mem;
+use std::ptr::NonNull;
+
+use jit::{cached_code, JitState};
+use page::Page;
+use state_flags::CachedStateFlags;
+
+static mut MODULE_PTR: NonNull<JitState> =
+    unsafe { NonNull::new_unchecked(mem::align_of::<JitState>() as *mut _) };
+
+fn get_module<'a>() -> &'a mut JitState { unsafe { MODULE_PTR.as_mut() } }
+
+#[no_mangle]
+/// Called from JS, not C
+pub fn rust_setup() {
+    let x = Box::new(JitState::create_and_initialise());
+    unsafe {
+        MODULE_PTR = NonNull::new(Box::into_raw(x)).expect("assigning module ptr");
+    }
+
+    use std::panic;
+
+    panic::set_hook(Box::new(|panic_info| {
+        if let Some(location) = panic_info.location() {
+            dbg_log!(
+                "panic occurred in file '{}' at line {}",
+                location.file(),
+                location.line()
+            );
+        }
+        else {
+            dbg_log!("panic occurred but can't get location information...");
+        }
+    }));
+}
+
+#[no_mangle]
+pub fn jit_find_cache_entry(phys_address: u32, state_flags: u32) -> u32 {
+    let cached_code {
+        wasm_table_index,
+        initial_state,
+    } = ::jit::jit_find_cache_entry(phys_address, CachedStateFlags::of_u32(state_flags));
+    wasm_table_index as u32 | (initial_state as u32) << 16
+}
+
+#[no_mangle]
+/// Called from JS, not C
+pub fn codegen_finalize_finished(
+    wasm_table_index: u16,
+    phys_addr: u32,
+    end_addr: u32,
+    first_opcode: u32,
+    state_flags: u32,
+) {
+    ::jit::codegen_finalize_finished(
+        get_module(),
+        wasm_table_index,
+        phys_addr,
+        end_addr,
+        first_opcode,
+        CachedStateFlags::of_u32(state_flags),
+    )
+}
+
+#[no_mangle]
+pub fn jit_increase_hotness_and_maybe_compile(phys_address: u32, cs_offset: u32, state_flags: u32) {
+    ::jit::jit_increase_hotness_and_maybe_compile(
+        get_module(),
+        phys_address,
+        cs_offset,
+        CachedStateFlags::of_u32(state_flags),
+    )
+}
+
+#[no_mangle]
+#[cfg(debug_assertions)]
+pub fn jit_force_generate_unsafe(phys_addr: u32, cs_offset: u32, state_flags: u32) {
+    ::jit::jit_force_generate_unsafe(
+        get_module(),
+        phys_addr,
+        cs_offset,
+        CachedStateFlags::of_u32(state_flags),
+    )
+}
+
+#[no_mangle]
+pub fn jit_dirty_cache(start_addr: u32, end_addr: u32) {
+    ::jit::jit_dirty_cache(get_module(), start_addr, end_addr);
+}
+
+#[no_mangle]
+pub fn jit_dirty_cache_small(start_addr: u32, end_addr: u32) {
+    ::jit::jit_dirty_cache_small(get_module(), start_addr, end_addr);
+}
+
+#[no_mangle]
+pub fn jit_dirty_cache_single(addr: u32) { ::jit::jit_dirty_cache_single(get_module(), addr); }
+
+#[no_mangle]
+pub fn jit_page_has_code(page: u32) -> bool {
+    ::jit::jit_page_has_code(get_module(), Page::page_of(page << 12))
+}
+
+#[no_mangle]
+/// Called from JS, not C
+pub fn jit_empty_cache() { ::jit::jit_empty_cache(get_module()) }
+
+#[no_mangle]
+/// Called from JS, not C
+pub fn jit_get_op_ptr() -> *const u8 { ::jit::jit_get_op_ptr(get_module()) }
+
+#[no_mangle]
+/// Called from JS, not C
+pub fn jit_get_op_len() -> u32 { ::jit::jit_get_op_len(get_module()) }
+
+#[no_mangle]
+#[cfg(debug_assertions)]
+pub fn jit_unused_cache_stat() -> u32 { ::jit::jit_unused_cache_stat() }
+#[no_mangle]
+#[cfg(debug_assertions)]
+pub fn jit_get_entry_length(i: u32) -> u32 { ::jit::jit_get_entry_length(i) }
+#[no_mangle]
+#[cfg(debug_assertions)]
+pub fn jit_get_entry_address(i: u32) -> u32 { ::jit::jit_get_entry_address(i) }
+#[no_mangle]
+#[cfg(debug_assertions)]
+pub fn jit_get_entry_pending(i: u32) -> bool { ::jit::jit_get_entry_pending(i) }
+#[no_mangle]
+#[cfg(debug_assertions)]
+pub fn jit_get_wasm_table_index_free_list_count() -> u32 {
+    ::jit::jit_get_wasm_table_index_free_list_count(get_module())
+}

+ 382 - 0
src/rust/codegen.rs

@@ -0,0 +1,382 @@
+use global_pointers;
+use jit::JitContext;
+use jit::{GEN_LOCAL_SCRATCH0, GEN_LOCAL_SCRATCH1, GEN_LOCAL_SCRATCH2};
+use modrm;
+use tlb::{TLB_GLOBAL, TLB_NO_USER, TLB_READONLY, TLB_VALID};
+use wasmgen::module_init::WasmBuilder;
+use wasmgen::{module_init, wasm_util};
+
+pub fn gen_set_previous_eip_offset_from_eip(builder: &mut WasmBuilder, n: u32) {
+    let cs = &mut builder.code_section;
+    wasm_util::push_i32(cs, global_pointers::PREVIOUS_IP as i32); // store address of previous ip
+    wasm_util::load_aligned_i32(cs, global_pointers::INSTRUCTION_POINTER); // load ip
+    if n != 0 {
+        wasm_util::push_i32(cs, n as i32);
+        wasm_util::add_i32(cs); // add constant to ip value
+    }
+    wasm_util::store_aligned_i32(cs); // store it as previous ip
+}
+
+pub fn gen_increment_instruction_pointer(builder: &mut WasmBuilder, n: u32) {
+    let cs = &mut builder.code_section;
+    wasm_util::push_i32(cs, global_pointers::INSTRUCTION_POINTER as i32); // store address of ip
+
+    wasm_util::load_aligned_i32(cs, global_pointers::INSTRUCTION_POINTER); // load ip
+
+    wasm_util::push_i32(cs, n as i32);
+
+    wasm_util::add_i32(cs);
+    wasm_util::store_aligned_i32(cs); // store it back in
+}
+
+pub fn gen_set_previous_eip(builder: &mut WasmBuilder) {
+    let cs = &mut builder.code_section;
+    wasm_util::push_i32(cs, global_pointers::PREVIOUS_IP as i32); // store address of previous ip
+    wasm_util::load_aligned_i32(cs, global_pointers::INSTRUCTION_POINTER); // load ip
+    wasm_util::store_aligned_i32(cs); // store it as previous ip
+}
+
+pub fn gen_relative_jump(builder: &mut WasmBuilder, n: i32) {
+    // add n to instruction_pointer (without setting the offset as above)
+    let instruction_body = &mut builder.instruction_body;
+    wasm_util::push_i32(
+        instruction_body,
+        global_pointers::INSTRUCTION_POINTER as i32,
+    );
+    wasm_util::load_aligned_i32(instruction_body, global_pointers::INSTRUCTION_POINTER);
+    wasm_util::push_i32(instruction_body, n);
+    wasm_util::add_i32(instruction_body);
+    wasm_util::store_aligned_i32(instruction_body);
+}
+
+pub fn gen_increment_variable(builder: &mut WasmBuilder, variable_address: u32, n: i32) {
+    wasm_util::increment_variable(&mut builder.code_section, variable_address, n);
+}
+
+pub fn gen_increment_timestamp_counter(builder: &mut WasmBuilder, n: i32) {
+    gen_increment_variable(builder, global_pointers::TIMESTAMP_COUNTER, n);
+}
+
+pub fn gen_increment_mem32(builder: &mut WasmBuilder, addr: u32) {
+    wasm_util::increment_mem32(&mut builder.code_section, addr)
+}
+
+pub fn gen_fn0_const(ctx: &mut JitContext, name: &str) {
+    let builder = &mut ctx.builder;
+    let fn_idx = builder.get_fn_idx(name, module_init::FN0_TYPE_INDEX);
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+}
+
+pub fn gen_fn0_const_ret(builder: &mut WasmBuilder, name: &str) {
+    let fn_idx = builder.get_fn_idx(name, module_init::FN0_RET_TYPE_INDEX);
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+}
+
+pub fn gen_fn1_const(ctx: &mut JitContext, name: &str, arg0: u32) {
+    let builder = &mut ctx.builder;
+    let fn_idx = builder.get_fn_idx(name, module_init::FN1_TYPE_INDEX);
+    wasm_util::push_i32(&mut builder.instruction_body, arg0 as i32);
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+}
+
+pub fn gen_call_fn1_ret(builder: &mut WasmBuilder, name: &str) {
+    // generates: fn( _ ) where _ must be left on the stack before calling this, and fn returns a value
+    let fn_idx = builder.get_fn_idx(name, module_init::FN1_RET_TYPE_INDEX);
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+}
+
+pub fn gen_fn2_const(ctx: &mut JitContext, name: &str, arg0: u32, arg1: u32) {
+    let builder = &mut ctx.builder;
+    let fn_idx = builder.get_fn_idx(name, module_init::FN2_TYPE_INDEX);
+    wasm_util::push_i32(&mut builder.instruction_body, arg0 as i32);
+    wasm_util::push_i32(&mut builder.instruction_body, arg1 as i32);
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+}
+
+pub fn gen_call_fn2(builder: &mut WasmBuilder, name: &str) {
+    // generates: fn( _, _ ) where _ must be left on the stack before calling this
+    let fn_idx = builder.get_fn_idx(name, module_init::FN2_TYPE_INDEX);
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+}
+
+pub fn gen_fn3_const(ctx: &mut JitContext, name: &str, arg0: u32, arg1: u32, arg2: u32) {
+    let builder = &mut ctx.builder;
+    let fn_idx = builder.get_fn_idx(name, module_init::FN3_TYPE_INDEX);
+    wasm_util::push_i32(&mut builder.instruction_body, arg0 as i32);
+    wasm_util::push_i32(&mut builder.instruction_body, arg1 as i32);
+    wasm_util::push_i32(&mut builder.instruction_body, arg2 as i32);
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+}
+
+pub fn gen_modrm_fn0(ctx: &mut JitContext, name: &str) {
+    // generates: fn( _ )
+    let builder = &mut ctx.builder;
+    let fn_idx = builder.get_fn_idx(name, module_init::FN1_TYPE_INDEX);
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+}
+
+pub fn gen_modrm_fn1(ctx: &mut JitContext, name: &str, arg0: u32) {
+    // generates: fn( _, arg0 )
+    let builder = &mut ctx.builder;
+    let fn_idx = builder.get_fn_idx(name, module_init::FN2_TYPE_INDEX);
+    wasm_util::push_i32(&mut builder.instruction_body, arg0 as i32);
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+}
+
+pub fn gen_modrm_fn2(ctx: &mut JitContext, name: &str, arg0: u32, arg1: u32) {
+    // generates: fn( _, arg0, arg1 )
+    let builder = &mut ctx.builder;
+    let fn_idx = builder.get_fn_idx(name, module_init::FN3_TYPE_INDEX);
+    wasm_util::push_i32(&mut builder.instruction_body, arg0 as i32);
+    wasm_util::push_i32(&mut builder.instruction_body, arg1 as i32);
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+}
+
+pub fn gen_modrm_resolve(ctx: &mut JitContext, modrm_byte: u8) { modrm::gen(ctx, modrm_byte) }
+
+pub fn gen_set_reg16_r(ctx: &mut JitContext, dest: u32, src: u32) {
+    // generates: reg16[r_dest] = reg16[r_src]
+    let builder = &mut ctx.builder;
+    wasm_util::push_i32(
+        &mut builder.instruction_body,
+        global_pointers::get_reg16_offset(dest) as i32,
+    );
+    wasm_util::load_aligned_u16(
+        &mut builder.instruction_body,
+        global_pointers::get_reg16_offset(src),
+    );
+    wasm_util::store_aligned_u16(&mut builder.instruction_body);
+}
+pub fn gen_set_reg32_r(ctx: &mut JitContext, dest: u32, src: u32) {
+    // generates: reg32s[r_dest] = reg32s[r_src]
+    let builder = &mut ctx.builder;
+    wasm_util::push_i32(
+        &mut builder.instruction_body,
+        global_pointers::get_reg32_offset(dest) as i32,
+    );
+    wasm_util::load_aligned_i32(
+        &mut builder.instruction_body,
+        global_pointers::get_reg32_offset(src),
+    );
+    wasm_util::store_aligned_i32(&mut builder.instruction_body);
+}
+
+pub fn gen_set_reg16_fn0(ctx: &mut JitContext, name: &str, reg: u32) {
+    // generates: reg16[reg] = fn()
+    let builder = &mut ctx.builder;
+    let fn_idx = builder.get_fn_idx(name, module_init::FN0_RET_TYPE_INDEX);
+    wasm_util::push_i32(
+        &mut builder.instruction_body,
+        global_pointers::get_reg16_offset(reg) as i32,
+    );
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+    wasm_util::store_aligned_u16(&mut builder.instruction_body);
+}
+
+pub fn gen_set_reg32s_fn0(ctx: &mut JitContext, name: &str, reg: u32) {
+    // generates: reg32s[reg] = fn()
+    let builder = &mut ctx.builder;
+    let fn_idx = builder.get_fn_idx(name, module_init::FN0_RET_TYPE_INDEX);
+    wasm_util::push_i32(
+        &mut builder.instruction_body,
+        global_pointers::get_reg32_offset(reg) as i32,
+    );
+    wasm_util::call_fn(&mut builder.instruction_body, fn_idx);
+    wasm_util::store_aligned_i32(&mut builder.instruction_body);
+}
+
+pub fn gen_safe_read32(ctx: &mut JitContext) {
+    // Assumes virtual address has been pushed to the stack, and generates safe_read32s' fast-path
+    // inline, bailing to safe_read32s_slow if necessary
+    let builder = &mut ctx.builder;
+    //let instruction_body = &mut ctx.builder.instruction_body;
+    //let cpu = &mut ctx.cpu;
+
+    let address_local = GEN_LOCAL_SCRATCH0;
+    wasm_util::tee_local(&mut builder.instruction_body, address_local);
+
+    // Pseudo: base_on_stack = (uint32_t)address >> 12;
+    wasm_util::push_i32(&mut builder.instruction_body, 12);
+    wasm_util::shr_u32(&mut builder.instruction_body);
+
+    // scale index
+    wasm_util::push_i32(&mut builder.instruction_body, 2);
+    wasm_util::shl_i32(&mut builder.instruction_body);
+
+    // Pseudo: entry = tlb_data[base_on_stack];
+    let entry_local = GEN_LOCAL_SCRATCH1;
+    wasm_util::load_aligned_i32_from_stack(
+        &mut builder.instruction_body,
+        global_pointers::TLB_DATA,
+    );
+    wasm_util::tee_local(&mut builder.instruction_body, entry_local);
+
+    // Pseudo: bool can_use_fast_path = (entry & 0xFFF & ~TLB_READONLY & ~TLB_GLOBAL & ~(cpl == 3 ? 0 : TLB_NO_USER) == TLB_VALID &&
+    //                                   (address & 0xFFF) <= (0x1000 - 4));
+    wasm_util::push_i32(
+        &mut builder.instruction_body,
+        (0xFFF & !TLB_READONLY & !TLB_GLOBAL & !(if ctx.cpu.cpl3() { 0 } else { TLB_NO_USER }))
+            as i32,
+    );
+    wasm_util::and_i32(&mut builder.instruction_body);
+
+    wasm_util::push_i32(&mut builder.instruction_body, TLB_VALID as i32);
+    wasm_util::eq_i32(&mut builder.instruction_body);
+
+    wasm_util::get_local(&mut builder.instruction_body, address_local);
+    wasm_util::push_i32(&mut builder.instruction_body, 0xFFF);
+    wasm_util::and_i32(&mut builder.instruction_body);
+    wasm_util::push_i32(&mut builder.instruction_body, 0x1000 - 4);
+    wasm_util::le_i32(&mut builder.instruction_body);
+
+    wasm_util::and_i32(&mut builder.instruction_body);
+
+    // Pseudo:
+    // if(can_use_fast_path) leave_on_stack(mem8[entry & ~0xFFF ^ address]);
+    wasm_util::if_i32(&mut builder.instruction_body);
+    wasm_util::get_local(&mut builder.instruction_body, entry_local);
+    wasm_util::push_i32(&mut builder.instruction_body, !0xFFF);
+    wasm_util::and_i32(&mut builder.instruction_body);
+    wasm_util::get_local(&mut builder.instruction_body, address_local);
+    wasm_util::xor_i32(&mut builder.instruction_body);
+
+    wasm_util::load_unaligned_i32_from_stack(
+        &mut builder.instruction_body,
+        global_pointers::MEMORY,
+    );
+
+    // Pseudo:
+    // else { leave_on_stack(safe_read32s_slow(address)); }
+    wasm_util::else_(&mut builder.instruction_body);
+    wasm_util::get_local(&mut builder.instruction_body, address_local);
+    gen_call_fn1_ret(builder, "safe_read32s_slow");
+    wasm_util::block_end(&mut builder.instruction_body);
+}
+
+pub fn gen_safe_write32(ctx: &mut JitContext, local_for_address: u32, local_for_value: u32) {
+    // Generates safe_write32' fast-path inline, bailing to safe_write32_slow if necessary.
+
+    // local_for_{address,value} are the numbers of the local variables which contain the virtual
+    // address and value for safe_write32
+    // Usage:
+    // set_local(0, value);
+    // set_local(1, v_addr);
+    // gen_safe_write32(0, 1);
+
+    // Since this function clobbers other variables, we confirm that the caller uses the local
+    // variables we expect them to
+    assert!(local_for_address == GEN_LOCAL_SCRATCH0);
+    assert!(local_for_value == GEN_LOCAL_SCRATCH1);
+
+    let builder = &mut ctx.builder;
+    //let instruction_body = &mut ctx.builder.instruction_body;
+    //let cpu = &mut ctx.cpu;
+
+    wasm_util::get_local(&mut builder.instruction_body, local_for_address);
+
+    // Pseudo: base_on_stack = (uint32_t)address >> 12;
+    wasm_util::push_i32(&mut builder.instruction_body, 12);
+    wasm_util::shr_u32(&mut builder.instruction_body);
+
+    // scale index
+    wasm_util::push_i32(&mut builder.instruction_body, 2);
+    wasm_util::shl_i32(&mut builder.instruction_body);
+
+    // entry_local is only used in the following block, so the scratch variable can be reused later
+    {
+        // Pseudo: entry = tlb_data[base_on_stack];
+        let entry_local = GEN_LOCAL_SCRATCH2;
+        wasm_util::load_aligned_i32_from_stack(
+            &mut builder.instruction_body,
+            global_pointers::TLB_DATA,
+        );
+        wasm_util::tee_local(&mut builder.instruction_body, entry_local);
+
+        // Pseudo: bool can_use_fast_path = (entry & 0xFFF & ~TLB_GLOBAL & ~(cpl == 3 ? 0 : TLB_NO_USER) == TLB_VALID &&
+        //                                   (address & 0xFFF) <= (0x1000 - 4));
+        wasm_util::push_i32(
+            &mut builder.instruction_body,
+            (0xFFF & !TLB_GLOBAL & !(if ctx.cpu.cpl3() { 0 } else { TLB_NO_USER })) as i32,
+        );
+        wasm_util::and_i32(&mut builder.instruction_body);
+
+        wasm_util::push_i32(&mut builder.instruction_body, TLB_VALID as i32);
+        wasm_util::eq_i32(&mut builder.instruction_body);
+
+        wasm_util::get_local(&mut builder.instruction_body, local_for_address);
+        wasm_util::push_i32(&mut builder.instruction_body, 0xFFF);
+        wasm_util::and_i32(&mut builder.instruction_body);
+        wasm_util::push_i32(&mut builder.instruction_body, 0x1000 - 4);
+        wasm_util::le_i32(&mut builder.instruction_body);
+
+        wasm_util::and_i32(&mut builder.instruction_body);
+
+        // Pseudo:
+        // if(can_use_fast_path)
+        // {
+        //     phys_addr = entry & ~0xFFF ^ address;
+        wasm_util::if_void(&mut builder.instruction_body);
+
+        wasm_util::get_local(&mut builder.instruction_body, entry_local);
+        wasm_util::push_i32(&mut builder.instruction_body, !0xFFF);
+        wasm_util::and_i32(&mut builder.instruction_body);
+        wasm_util::get_local(&mut builder.instruction_body, local_for_address);
+        wasm_util::xor_i32(&mut builder.instruction_body);
+    }
+
+    // entry_local isn't needed anymore, so we overwrite it
+    let phys_addr_local = GEN_LOCAL_SCRATCH2;
+    // Pseudo:
+    //     /* continued within can_use_fast_path branch */
+    //     mem8[phys_addr] = value;
+
+    wasm_util::tee_local(&mut builder.instruction_body, phys_addr_local);
+    wasm_util::get_local(&mut builder.instruction_body, local_for_value);
+    wasm_util::store_unaligned_i32(&mut builder.instruction_body, global_pointers::MEMORY);
+
+    // Pseudo:
+    // else { safe_read32_slow(address, value); }
+    wasm_util::else_(&mut builder.instruction_body);
+    wasm_util::get_local(&mut builder.instruction_body, local_for_address);
+    wasm_util::get_local(&mut builder.instruction_body, local_for_value);
+    gen_call_fn2(builder, "safe_write32_slow");
+    wasm_util::block_end(&mut builder.instruction_body);
+}
+
+pub fn gen_fn1_reg16(ctx: &mut JitContext, name: &str, r: u32) {
+    let fn_idx = ctx.builder.get_fn_idx(name, module_init::FN1_TYPE_INDEX);
+    wasm_util::load_aligned_u16(
+        &mut ctx.builder.instruction_body,
+        global_pointers::get_reg16_offset(r),
+    );
+    wasm_util::call_fn(&mut ctx.builder.instruction_body, fn_idx)
+}
+
+pub fn gen_fn1_reg32(ctx: &mut JitContext, name: &str, r: u32) {
+    let fn_idx = ctx.builder.get_fn_idx(name, module_init::FN1_TYPE_INDEX);
+    wasm_util::load_aligned_i32(
+        &mut ctx.builder.instruction_body,
+        global_pointers::get_reg32_offset(r),
+    );
+    wasm_util::call_fn(&mut ctx.builder.instruction_body, fn_idx)
+}
+
+pub fn gen_clear_prefixes(ctx: &mut JitContext) {
+    let instruction_body = &mut ctx.builder.instruction_body;
+    wasm_util::push_i32(instruction_body, global_pointers::PREFIXES as i32); // load address of prefixes
+    wasm_util::push_i32(instruction_body, 0);
+    wasm_util::store_aligned_i32(instruction_body);
+}
+
+pub fn gen_add_prefix_bits(ctx: &mut JitContext, mask: u32) {
+    assert!(mask < 0x100);
+
+    let instruction_body = &mut ctx.builder.instruction_body;
+    wasm_util::push_i32(instruction_body, global_pointers::PREFIXES as i32); // load address of prefixes
+
+    wasm_util::load_aligned_i32(instruction_body, global_pointers::PREFIXES); // load old value
+    wasm_util::push_i32(instruction_body, mask as i32);
+    wasm_util::or_i32(instruction_body);
+
+    wasm_util::store_aligned_i32(instruction_body);
+}

+ 50 - 0
src/rust/cpu.rs

@@ -0,0 +1,50 @@
+// TODO: Make this an instance, so we can plug in a fake cpu
+
+use page::Page;
+use state_flags::CachedStateFlags;
+
+mod unsafe_cpu {
+    extern "C" {
+        pub fn tlb_set_has_code(physical_page: u32, has_code: bool);
+        pub fn read8(addr: u32) -> u8;
+        pub fn read16(addr: u32) -> u16;
+        pub fn read32(addr: u32) -> u32;
+        pub fn check_tlb_invariants();
+
+        pub fn codegen_finalize(
+            wasm_table_index: u16,
+            phys_addr: u32,
+            end_addr: u32,
+            first_opcode: u32,
+            state_flags: u32,
+        );
+    }
+}
+
+pub fn read8(addr: u32) -> u8 { unsafe { unsafe_cpu::read8(addr) } }
+pub fn read16(addr: u32) -> u16 { unsafe { unsafe_cpu::read16(addr) } }
+pub fn read32(addr: u32) -> u32 { unsafe { unsafe_cpu::read32(addr) } }
+
+pub fn tlb_set_has_code(physical_page: Page, has_code: bool) {
+    unsafe { unsafe_cpu::tlb_set_has_code(physical_page.to_u32(), has_code) }
+}
+
+pub fn check_tlb_invariants() { unsafe { unsafe_cpu::check_tlb_invariants() } }
+
+pub fn codegen_finalize(
+    wasm_table_index: u16,
+    phys_addr: u32,
+    end_addr: u32,
+    first_opcode: u32,
+    state_flags: CachedStateFlags,
+) {
+    unsafe {
+        unsafe_cpu::codegen_finalize(
+            wasm_table_index,
+            phys_addr,
+            end_addr,
+            first_opcode,
+            state_flags.to_u32(),
+        )
+    }
+}

+ 74 - 0
src/rust/cpu_context.rs

@@ -0,0 +1,74 @@
+use cpu;
+use prefix::{PREFIX_MASK_ADDRSIZE, PREFIX_MASK_OPSIZE};
+use state_flags::CachedStateFlags;
+
+#[derive(Clone)]
+pub struct CpuContext {
+    pub eip: u32,
+    pub prefixes: u32,
+    pub cs_offset: u32,
+    pub state_flags: CachedStateFlags,
+}
+
+impl CpuContext {
+    pub fn advance8(&mut self) {
+        assert!(self.eip & 0xFFF < 0xFFF);
+        self.eip += 1;
+    }
+    pub fn advance16(&mut self) {
+        assert!(self.eip & 0xFFF < 0xFFE);
+        self.eip += 2;
+    }
+    pub fn advance32(&mut self) {
+        assert!(self.eip & 0xFFF < 0xFFC);
+        self.eip += 4;
+    }
+    #[allow(unused)]
+    pub fn advance_moffs(&mut self) {
+        if self.asize_32() {
+            self.advance32()
+        }
+        else {
+            self.advance16()
+        }
+    }
+
+    pub fn read_imm8(&mut self) -> u8 {
+        assert!(self.eip & 0xFFF < 0xFFF);
+        let v = cpu::read8(self.eip);
+        self.eip += 1;
+        v
+    }
+    pub fn read_imm8s(&mut self) -> i8 { self.read_imm8() as i8 }
+    pub fn read_imm16(&mut self) -> u16 {
+        assert!(self.eip & 0xFFF < 0xFFE);
+        let v = cpu::read16(self.eip);
+        self.eip += 2;
+        v
+    }
+    pub fn read_imm16s(&mut self) -> i16 { self.read_imm16() as i16 }
+    pub fn read_imm32(&mut self) -> u32 {
+        assert!(self.eip & 0xFFF < 0xFFC);
+        let v = cpu::read32(self.eip);
+        self.eip += 4;
+        v
+    }
+    pub fn read_moffs(&mut self) -> u32 {
+        if self.asize_32() {
+            self.read_imm32()
+        }
+        else {
+            self.read_imm16() as u32
+        }
+    }
+
+    pub fn cpl3(&self) -> bool { self.state_flags.cpl3() }
+    pub fn has_flat_segmentation(&self) -> bool { self.state_flags.has_flat_segmentation() }
+    pub fn osize_32(&self) -> bool {
+        self.state_flags.is_32() != (self.prefixes & PREFIX_MASK_OPSIZE != 0)
+    }
+    pub fn asize_32(&self) -> bool {
+        self.state_flags.is_32() != (self.prefixes & PREFIX_MASK_ADDRSIZE != 0)
+    }
+    pub fn ssize_32(&self) -> bool { self.state_flags.ssize_32() }
+}

+ 12 - 8
src/rust/dbg.rs

@@ -19,19 +19,23 @@ macro_rules! dbg_assert {
 #[allow(unused_macros)]
 macro_rules! dbg_log {
     ($fmt:expr) => {
-        use ::util::{ DEBUG, _log_to_js_console };
-        if DEBUG { _log_to_js_console($fmt); }
+        {
+            use ::util::{ DEBUG, _log_to_js_console };
+            if DEBUG { _log_to_js_console($fmt); }
+        }
     };
     ($fmt:expr, $($arg:tt)*) => {
-        use ::util::{ DEBUG, _log_to_js_console };
-        if DEBUG { _log_to_js_console(format!($fmt, $($arg)*)); }
+        {
+            use ::util::{ DEBUG, _log_to_js_console };
+            if DEBUG { _log_to_js_console(format!($fmt, $($arg)*)); }
+        }
     };
 }
 
 #[cfg(target_arch = "wasm32")]
 #[allow(unused_macros)]
 macro_rules! dbg_assert {
-    ($cond:expr) => {
+    ($cond:expr) => {{
         use util::{_log_to_js_console, abort, DEBUG};
         if DEBUG && !$cond {
             _log_to_js_console(format!(
@@ -45,8 +49,8 @@ macro_rules! dbg_assert {
                 abort();
             }
         }
-    };
-    ($cond:expr, $desc:expr) => {
+    }};
+    ($cond:expr, $desc:expr) => {{
         use util::{_log_to_js_console, abort, DEBUG};
         if DEBUG && !$cond {
             _log_to_js_console(format!(
@@ -61,5 +65,5 @@ macro_rules! dbg_assert {
                 abort();
             }
         }
-    };
+    }};
 }

+ 7 - 0
src/rust/gen/mod.rs

@@ -0,0 +1,7 @@
+pub mod jit;
+pub mod jit0f_16;
+pub mod jit0f_32;
+
+pub mod analyzer;
+pub mod analyzer0f_16;
+pub mod analyzer0f_32;

+ 18 - 0
src/rust/global_pointers.rs

@@ -0,0 +1,18 @@
+pub const REG: u32 = 4;
+pub const INSTRUCTION_POINTER: u32 = 556;
+pub const PREVIOUS_IP: u32 = 560;
+pub const PREFIXES: u32 = 648;
+pub const TIMESTAMP_COUNTER: u32 = 664;
+pub const OPSTATS_BUFFER: u32 = 0x1000;
+pub const OPSTATS_BUFFER_0F: u32 = 0x1400;
+pub const TLB_DATA: u32 = 0x400000;
+pub const MEMORY: u32 = 0x800000;
+
+pub fn get_reg16_offset(r: u32) -> u32 {
+    assert!(r < 8);
+    REG + 4 * r
+}
+pub fn get_reg32_offset(r: u32) -> u32 {
+    assert!(r < 8);
+    REG + 4 * r
+}

+ 1360 - 0
src/rust/jit.rs

@@ -0,0 +1,1360 @@
+use std::collections::{HashMap, HashSet};
+
+use analysis::AnalysisType;
+use codegen;
+use cpu;
+use cpu_context::CpuContext;
+use jit_instructions;
+use page::Page;
+use profiler;
+use profiler::stat;
+use state_flags::CachedStateFlags;
+use util::SafeToU16;
+use wasmgen::module_init::WasmBuilder;
+use wasmgen::{module_init, wasm_util};
+
+pub const WASM_TABLE_SIZE: u32 = 0x10000;
+
+pub const HASH_PRIME: u32 = 6151;
+
+pub const CHECK_JIT_CACHE_ARRAY_INVARIANTS: bool = false;
+pub const ENABLE_JIT_NONFAULTING_OPTIMZATION: bool = true;
+
+pub const JIT_MAX_ITERATIONS_PER_FUNCTION: u32 = 10000;
+
+pub const JIT_ALWAYS_USE_LOOP_SAFETY: bool = false;
+
+pub const JIT_THRESHOLD: u32 = 2500;
+
+const CONDITION_FUNCTIONS: [&str; 16] = [
+    "test_o", "test_no", "test_b", "test_nb", "test_z", "test_nz", "test_be", "test_nbe", "test_s",
+    "test_ns", "test_p", "test_np", "test_l", "test_nl", "test_le", "test_nle",
+];
+
+const CODE_CACHE_SEARCH_SIZE: u32 = 8;
+const MAX_INSTRUCTION_LENGTH: u32 = 16;
+
+mod jit_cache_array {
+    use page::Page;
+    use state_flags::CachedStateFlags;
+
+    // Note: For performance reasons, this is global state. See jit_find_cache_entry
+
+    const NO_NEXT_ENTRY: u32 = 0xffff_ffff;
+
+    pub const SIZE: u32 = 0x40000;
+    pub const MASK: u32 = (SIZE - 1);
+
+    #[derive(Copy, Clone)]
+    pub struct Entry {
+        pub start_addr: u32,
+
+        #[cfg(debug_assertions)]
+        pub len: u32,
+
+        #[cfg(debug_assertions)]
+        pub opcode: u32,
+
+        // an index into jit_cache_array for the next code_cache entry within the same physical page
+        next_index_same_page: u32,
+
+        pub initial_state: u16,
+        pub wasm_table_index: u16,
+        pub state_flags: CachedStateFlags,
+        pub pending: bool,
+    }
+
+    impl Entry {
+        pub fn create(
+            start_addr: u32,
+            next_index_same_page: Option<u32>,
+            wasm_table_index: u16,
+            initial_state: u16,
+            state_flags: CachedStateFlags,
+            pending: bool,
+        ) -> Entry {
+            let next_index_same_page = next_index_same_page.unwrap_or(NO_NEXT_ENTRY);
+            Entry {
+                start_addr,
+                next_index_same_page,
+                wasm_table_index,
+                initial_state,
+                state_flags,
+                pending,
+
+                #[cfg(debug_assertions)]
+                len: 0,
+
+                #[cfg(debug_assertions)]
+                opcode: 0,
+            }
+        }
+        pub fn next_index_same_page(&self) -> Option<u32> {
+            if self.next_index_same_page == NO_NEXT_ENTRY {
+                None
+            }
+            else {
+                Some(self.next_index_same_page)
+            }
+        }
+
+        pub fn set_next_index_same_page(&mut self, next_index: Option<u32>) {
+            if let Some(i) = next_index {
+                self.next_index_same_page = i
+            }
+            else {
+                self.next_index_same_page = NO_NEXT_ENTRY
+            }
+        }
+    }
+
+    const DEFAULT_ENTRY: Entry = Entry {
+        start_addr: 0,
+        next_index_same_page: NO_NEXT_ENTRY,
+        wasm_table_index: 0,
+        initial_state: 0,
+        state_flags: CachedStateFlags::EMPTY,
+        pending: false,
+
+        #[cfg(debug_assertions)]
+        len: 0,
+        #[cfg(debug_assertions)]
+        opcode: 0,
+    };
+
+    #[allow(non_upper_case_globals)]
+    static mut jit_cache_array: [Entry; SIZE as usize] = [Entry {
+        start_addr: 0,
+        next_index_same_page: 0,
+        wasm_table_index: 0,
+        initial_state: 0,
+        state_flags: CachedStateFlags::EMPTY,
+        pending: false,
+
+        #[cfg(debug_assertions)]
+        len: 0,
+        #[cfg(debug_assertions)]
+        opcode: 0,
+    }; SIZE as usize];
+
+    #[allow(non_upper_case_globals)]
+    static mut page_first_entry: [u32; 0x100000] = [0; 0x100000];
+
+    pub fn get_page_index(page: Page) -> Option<u32> {
+        let index = unsafe { page_first_entry[page.to_u32() as usize] };
+        if index == NO_NEXT_ENTRY {
+            None
+        }
+        else {
+            Some(index)
+        }
+    }
+
+    pub fn set_page_index(page: Page, index: Option<u32>) {
+        let index = index.unwrap_or(NO_NEXT_ENTRY);
+        unsafe { page_first_entry[page.to_u32() as usize] = index }
+    }
+
+    pub fn get(i: u32) -> &'static Entry { unsafe { &jit_cache_array[i as usize] } }
+    pub fn get_mut(i: u32) -> &'static mut Entry { unsafe { &mut jit_cache_array[i as usize] } }
+
+    pub fn get_unchecked(i: u32) -> &'static Entry {
+        unsafe { jit_cache_array.get_unchecked(i as usize) }
+    }
+
+    fn set(i: u32, entry: Entry) { unsafe { jit_cache_array[i as usize] = entry }; }
+
+    pub fn insert(index: u32, mut entry: Entry) {
+        let page = Page::page_of(entry.start_addr);
+
+        let previous_entry_index = get_page_index(page);
+
+        if let Some(previous_entry_index) = previous_entry_index {
+            let previous_entry = get(previous_entry_index);
+
+            if previous_entry.start_addr != 0 {
+                dbg_assert!(
+                    Page::page_of(previous_entry.start_addr) == Page::page_of(entry.start_addr)
+                );
+            }
+        }
+
+        set_page_index(page, Some(index));
+        entry.set_next_index_same_page(previous_entry_index);
+
+        set(index, entry);
+    }
+
+    pub fn remove(index: u32) {
+        let page = Page::page_of(get(index).start_addr);
+
+        let mut page_index = get_page_index(page);
+        let mut did_remove = false;
+
+        if page_index == Some(index) {
+            set_page_index(page, get(index).next_index_same_page());
+            did_remove = true;
+        }
+        else {
+            while let Some(page_index_ok) = page_index {
+                let next_index = get(page_index_ok).next_index_same_page();
+                if next_index == Some(index) {
+                    get_mut(page_index_ok)
+                        .set_next_index_same_page(get(index).next_index_same_page());
+                    did_remove = true;
+                    break;
+                }
+                page_index = next_index;
+            }
+        }
+
+        get_mut(index).set_next_index_same_page(None);
+
+        dbg_assert!(did_remove);
+    }
+
+    pub fn iter() -> ::std::slice::Iter<'static, Entry> { unsafe { jit_cache_array.iter() } }
+
+    pub fn clear() {
+        unsafe {
+            for (i, _) in jit_cache_array.iter().enumerate() {
+                jit_cache_array[i] = DEFAULT_ENTRY;
+            }
+
+            for (i, _) in page_first_entry.iter().enumerate() {
+                page_first_entry[i] = NO_NEXT_ENTRY;
+            }
+        }
+    }
+
+    pub fn check_invariants() {
+        if !::jit::CHECK_JIT_CACHE_ARRAY_INVARIANTS {
+            return;
+        }
+
+        // there are no loops in the linked lists
+        // https://en.wikipedia.org/wiki/Cycle_detection#Floyd's_Tortoise_and_Hare
+        for i in 0..(1 << 20) {
+            let mut slow = get_page_index(Page::page_of(i << 12));
+            let mut fast = slow;
+
+            while let Some(fast_ok) = fast {
+                fast = get(fast_ok).next_index_same_page();
+                slow = get(slow.unwrap()).next_index_same_page();
+
+                if let Some(fast_ok) = fast {
+                    fast = get(fast_ok).next_index_same_page();
+                }
+                else {
+                    break;
+                }
+
+                dbg_assert!(slow != fast);
+            }
+        }
+
+        let mut wasm_table_index_to_jit_cache_index = [0; ::jit::WASM_TABLE_SIZE as usize];
+
+        for (i, entry) in iter().enumerate() {
+            dbg_assert!(entry.next_index_same_page().map_or(true, |i| i < SIZE));
+
+            if entry.pending {
+                dbg_assert!(entry.start_addr != 0);
+                dbg_assert!(entry.wasm_table_index != 0);
+            }
+            else {
+                // an invalid entry has both its start_addr and wasm_table_index set to 0
+                // neither start_addr nor wasm_table_index are 0 for any valid entry
+
+                dbg_assert!((entry.start_addr == 0) == (entry.wasm_table_index == 0));
+            }
+
+            // having a next entry implies validity
+            dbg_assert!(entry.next_index_same_page() == None || entry.start_addr != 0);
+
+            // any valid wasm_table_index can only be used within a single page
+            if entry.wasm_table_index != 0 {
+                let j = wasm_table_index_to_jit_cache_index[entry.wasm_table_index as usize];
+
+                if j != 0 {
+                    let other_entry = get(j);
+                    dbg_assert!(other_entry.wasm_table_index == entry.wasm_table_index);
+                    dbg_assert!(
+                        Page::page_of(other_entry.start_addr) == Page::page_of(entry.start_addr)
+                    );
+                }
+                else {
+                    wasm_table_index_to_jit_cache_index[entry.wasm_table_index as usize] = i as u32;
+                }
+            }
+
+            if entry.start_addr != 0 {
+                // valid entries can be reached from page_first_entry
+                let mut reached = false;
+
+                let page = Page::page_of(entry.start_addr);
+                let mut cache_array_index = get_page_index(page);
+
+                while let Some(index) = cache_array_index {
+                    let other_entry = get(index);
+
+                    if i as u32 == index {
+                        reached = true;
+                        break;
+                    }
+
+                    cache_array_index = other_entry.next_index_same_page();
+                }
+
+                dbg_assert!(reached);
+            }
+        }
+    }
+}
+
+pub struct JitState {
+    // as an alternative to HashSet, we could use a bitmap of 4096 bits here
+    // (faster, but uses much more memory)
+    // or a compressed bitmap (likely faster)
+    hot_code_addresses: [u32; HASH_PRIME as usize],
+    wasm_table_index_free_list: Vec<u16>,
+    wasm_table_index_pending_free: Vec<u16>,
+    entry_points: HashMap<Page, HashSet<u16>>,
+    wasm_builder: WasmBuilder,
+}
+
+impl JitState {
+    pub fn create_and_initialise() -> JitState {
+        let mut wasm_builder = WasmBuilder::new();
+        wasm_builder.init();
+        let mut c = JitState {
+            hot_code_addresses: [0; HASH_PRIME as usize],
+            wasm_table_index_free_list: vec![],
+            wasm_table_index_pending_free: vec![],
+            entry_points: HashMap::new(),
+            wasm_builder,
+        };
+        jit_empty_cache(&mut c);
+        c
+    }
+}
+
+#[derive(PartialEq, Eq)]
+enum BasicBlockType {
+    Normal {
+        next_block_addr: u32,
+    },
+    ConditionalJump {
+        next_block_addr: u32,
+        next_block_branch_taken_addr: Option<u32>,
+        condition: u8,
+        jump_offset: i32,
+        jump_offset_is_32: bool,
+    },
+    Exit,
+}
+
+struct BasicBlock {
+    addr: u32,
+    end_addr: u32,
+    is_entry_block: bool,
+    ty: BasicBlockType,
+}
+
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct cached_code {
+    pub wasm_table_index: u16,
+    pub initial_state: u16,
+}
+
+impl cached_code {
+    const NONE: cached_code = cached_code {
+        wasm_table_index: 0,
+        initial_state: 0,
+    };
+}
+
+pub struct JitContext<'a> {
+    pub cpu: &'a mut CpuContext,
+    pub builder: &'a mut WasmBuilder,
+}
+
+pub const GEN_LOCAL_ARG_INITIAL_STATE: u32 = 0;
+pub const GEN_LOCAL_STATE: u32 = 1;
+pub const GEN_LOCAL_ITERATION_COUNTER: u32 = 2;
+// local scratch variables for use wherever required
+pub const GEN_LOCAL_SCRATCH0: u32 = 3;
+pub const GEN_LOCAL_SCRATCH1: u32 = 4;
+pub const GEN_LOCAL_SCRATCH2: u32 = 5;
+// Function arguments are not included in the local variable count
+pub const GEN_NO_OF_LOCALS: u32 = 5;
+
+pub const JIT_INSTR_BLOCK_BOUNDARY_FLAG: u32 = 1 << 0;
+pub const JIT_INSTR_NONFAULTING_FLAG: u32 = 1 << 1;
+
+pub const FN_GET_SEG_IDX: u16 = 0;
+
+fn jit_hot_hash_page(page: Page) -> u32 { page.to_u32() % HASH_PRIME }
+
+fn is_near_end_of_page(address: u32) -> bool { address & 0xFFF >= 0x1000 - MAX_INSTRUCTION_LENGTH }
+
+pub fn jit_find_cache_entry(phys_address: u32, state_flags: CachedStateFlags) -> cached_code {
+    for i in 0..CODE_CACHE_SEARCH_SIZE {
+        let index = (phys_address + i) & jit_cache_array::MASK;
+        let entry = jit_cache_array::get_unchecked(index);
+
+        #[cfg(debug_assertions)]
+        {
+            if entry.start_addr == phys_address {
+                if entry.pending {
+                    profiler::stat_increment(stat::S_RUN_INTERPRETED_PENDING)
+                }
+                if entry.state_flags != state_flags {
+                    profiler::stat_increment(stat::S_RUN_INTERPRETED_DIFFERENT_STATE)
+                }
+            }
+
+            if is_near_end_of_page(phys_address) {
+                assert!(entry.start_addr != phys_address);
+                profiler::stat_increment(stat::S_RUN_INTERPRETED_NEAR_END_OF_PAGE);
+            }
+        }
+
+        if !entry.pending && entry.start_addr == phys_address && entry.state_flags == state_flags {
+            #[cfg(debug_assertions)]
+            {
+                assert!(cpu::read32(entry.start_addr) == entry.opcode)
+            }
+            return cached_code {
+                wasm_table_index: entry.wasm_table_index,
+                initial_state: entry.initial_state,
+            };
+        }
+    }
+
+    cached_code::NONE
+}
+
+fn record_entry_point(ctx: &mut JitState, phys_address: u32) {
+    if is_near_end_of_page(phys_address) {
+        return;
+    }
+    let page = Page::page_of(phys_address);
+    let offset_in_page = phys_address as u16 & 0xFFF;
+    let mut is_new = false;
+    ctx.entry_points
+        .entry(page)
+        .or_insert_with(|| {
+            is_new = true;
+            HashSet::new()
+        })
+        .insert(offset_in_page);
+
+    if is_new {
+        cpu::tlb_set_has_code(page, true);
+    }
+}
+
+fn jit_find_basic_blocks(
+    page: Page,
+    entry_points: &HashSet<u16>,
+    cpu: CpuContext,
+) -> (Vec<BasicBlock>, bool) {
+    let mut to_visit_stack: Vec<u16> = entry_points.iter().cloned().collect();
+    let mut marked_as_entry: HashSet<u16> = entry_points.clone();
+    let page_high_bits = page.to_address();
+    let mut basic_blocks: HashMap<u32, BasicBlock> = HashMap::new();
+    let mut requires_loop_limit = false;
+
+    while let Some(to_visit_offset) = to_visit_stack.pop() {
+        let to_visit = to_visit_offset as u32 | page_high_bits;
+        if basic_blocks.contains_key(&to_visit) {
+            continue;
+        }
+        let mut current_address = to_visit;
+        let mut current_block = BasicBlock {
+            addr: current_address,
+            end_addr: 0,
+            ty: BasicBlockType::Exit,
+            is_entry_block: false,
+        };
+        loop {
+            if is_near_end_of_page(current_address) {
+                // TODO: Don't insert this block if empty
+                current_block.end_addr = current_address;
+                profiler::stat_increment(stat::S_COMPILE_CUT_OFF_AT_END_OF_PAGE);
+                break;
+            }
+            let mut ctx = &mut CpuContext {
+                eip: current_address,
+                ..cpu
+            };
+            let analysis = ::analysis::analyze_step(&mut ctx);
+            let has_next_instruction = !analysis.no_next_instruction;
+            current_address = ctx.eip;
+
+            match analysis.ty {
+                AnalysisType::Normal => {
+                    dbg_assert!(has_next_instruction);
+
+                    if basic_blocks.contains_key(&current_address) {
+                        current_block.end_addr = current_address;
+                        current_block.ty = BasicBlockType::Normal {
+                            next_block_addr: current_address,
+                        };
+                    }
+                },
+                AnalysisType::Jump {
+                    offset,
+                    is_32,
+                    condition,
+                } => {
+                    let jump_target = if is_32 {
+                        current_address.wrapping_add(offset as u32)
+                    }
+                    else {
+                        ctx.cs_offset.wrapping_add(
+                            (current_address
+                                .wrapping_sub(ctx.cs_offset)
+                                .wrapping_add(offset as u32)) & 0xFFFF,
+                        )
+                    };
+
+                    if let Some(condition) = condition {
+                        // conditional jump: continue at next and continue at jump target
+
+                        dbg_assert!(has_next_instruction);
+                        to_visit_stack.push(current_address as u16 & 0xFFF);
+
+                        let next_block_branch_taken_addr;
+
+                        if Page::page_of(jump_target) == page {
+                            to_visit_stack.push(jump_target as u16 & 0xFFF);
+
+                            next_block_branch_taken_addr = Some(jump_target);
+
+                            // Very simple heuristic for "infinite loops": This
+                            // detects Linux's "calibrating delay loop"
+                            if jump_target == current_block.addr {
+                                dbg_log!("Basic block looping back to front");
+                                requires_loop_limit = true;
+                            }
+                        }
+                        else {
+                            next_block_branch_taken_addr = None;
+                        }
+
+                        current_block.ty = BasicBlockType::ConditionalJump {
+                            next_block_addr: current_address,
+                            next_block_branch_taken_addr,
+                            condition,
+                            jump_offset: offset,
+                            jump_offset_is_32: is_32,
+                        };
+
+                        current_block.end_addr = current_address;
+
+                        break;
+                    }
+                    else {
+                        // non-conditional jump: continue at jump target
+
+                        if has_next_instruction {
+                            // Execution will eventually come back to the next instruction (CALL)
+                            marked_as_entry.insert(current_address as u16 & 0xFFF);
+                            to_visit_stack.push(current_address as u16 & 0xFFF);
+                        }
+
+                        if Page::page_of(jump_target) == page {
+                            current_block.ty = BasicBlockType::Normal {
+                                next_block_addr: jump_target,
+                            };
+                            to_visit_stack.push(jump_target as u16 & 0xFFF);
+                        }
+                        else {
+                            current_block.ty = BasicBlockType::Exit;
+                        }
+
+                        current_block.end_addr = current_address;
+
+                        break;
+                    }
+                },
+                AnalysisType::BlockBoundary => {
+                    // a block boundary but not a jump, get out
+
+                    if has_next_instruction {
+                        // block boundary, but execution will eventually come back
+                        // to the next instruction. Create a new basic block
+                        // starting at the next instruction and register it as an
+                        // entry point
+                        marked_as_entry.insert(current_address as u16 & 0xFFF);
+                        to_visit_stack.push(current_address as u16 & 0xFFF);
+                    }
+
+                    current_block.end_addr = current_address;
+                    break;
+                },
+            }
+        }
+
+        basic_blocks.insert(to_visit, current_block);
+    }
+
+    for block in basic_blocks.values_mut() {
+        if marked_as_entry.contains(&(block.addr as u16 & 0xFFF)) {
+            block.is_entry_block = true;
+        }
+    }
+
+    let mut basic_blocks: Vec<BasicBlock> =
+        basic_blocks.into_iter().map(|(_, block)| block).collect();
+
+    basic_blocks.sort_by_key(|block| block.addr);
+
+    for i in 0..basic_blocks.len() - 1 {
+        let next_block_addr = basic_blocks[i + 1].addr;
+        let block = &mut basic_blocks[i];
+        if next_block_addr < block.end_addr {
+            block.ty = BasicBlockType::Normal { next_block_addr };
+            block.end_addr = next_block_addr;
+
+            // TODO: assert that the old type is equal to the type of the following block?
+        }
+    }
+
+    (basic_blocks, requires_loop_limit)
+}
+
+fn create_cache_entry(ctx: &mut JitState, entry: jit_cache_array::Entry) {
+    let mut found_entry_index = None;
+    let phys_addr = entry.start_addr;
+
+    for i in 0..CODE_CACHE_SEARCH_SIZE {
+        let addr_index = (phys_addr + i) & jit_cache_array::MASK;
+        let entry = jit_cache_array::get(addr_index);
+
+        if entry.start_addr == 0 {
+            found_entry_index = Some(addr_index);
+            break;
+        }
+    }
+
+    let found_entry_index = match found_entry_index {
+        Some(i) => i,
+        None => {
+            profiler::stat_increment(stat::S_CACHE_MISMATCH);
+
+            // no free slots, overwrite the first one
+            let found_entry_index = phys_addr & jit_cache_array::MASK;
+
+            let old_entry = jit_cache_array::get_mut(found_entry_index);
+
+            // if we're here, we expect to overwrite a valid index
+            dbg_assert!(old_entry.start_addr != 0);
+            dbg_assert!(old_entry.wasm_table_index != 0);
+
+            if old_entry.wasm_table_index == entry.wasm_table_index {
+                dbg_assert!(old_entry.pending);
+                dbg_assert!(Page::page_of(old_entry.start_addr) == Page::page_of(phys_addr));
+
+                // The old entry belongs to the same wasm table index as this entry.
+                // *Don't* free the wasm table index, instead just delete the old entry
+                // and use its slot for this entry.
+                // TODO: Optimally, we should pick another slot instead of dropping
+                // an entry has just been created.
+                //let old_page = Page::page_of(old_entry.start_addr);
+                jit_cache_array::remove(found_entry_index);
+
+                dbg_assert!(old_entry.next_index_same_page() == None);
+                old_entry.pending = false;
+                old_entry.start_addr = 0;
+            }
+            else {
+                let old_wasm_table_index = old_entry.wasm_table_index;
+                let old_page = Page::page_of(old_entry.start_addr);
+
+                remove_jit_cache_wasm_index(ctx, old_page, old_wasm_table_index);
+
+                //jit_cache_array::check_invariants();
+
+                // old entry should be removed after calling remove_jit_cache_wasm_index
+
+                dbg_assert!(!old_entry.pending);
+                dbg_assert!(old_entry.start_addr == 0);
+                dbg_assert!(old_entry.wasm_table_index == 0);
+                dbg_assert!(old_entry.next_index_same_page() == None);
+            }
+
+            found_entry_index
+        },
+    };
+
+    jit_cache_array::insert(found_entry_index, entry);
+}
+
+#[cfg(debug_assertions)]
+pub fn jit_force_generate_unsafe(
+    ctx: &mut JitState,
+    phys_addr: u32,
+    cs_offset: u32,
+    state_flags: CachedStateFlags,
+) {
+    record_entry_point(ctx, phys_addr);
+    jit_analyze_and_generate(ctx, Page::page_of(phys_addr), cs_offset, state_flags);
+}
+
+fn jit_analyze_and_generate(
+    ctx: &mut JitState,
+    page: Page,
+    cs_offset: u32,
+    state_flags: CachedStateFlags,
+) {
+    profiler::stat_increment(stat::S_COMPILE);
+
+    let entry_points = ctx.entry_points.remove(&page);
+    let cpu = CpuContext {
+        eip: 0,
+        prefixes: 0,
+        cs_offset,
+        state_flags,
+    };
+
+    if let Some(entry_points) = entry_points {
+        let (mut basic_blocks, requires_loop_limit) =
+            jit_find_basic_blocks(page, &entry_points, cpu.clone());
+
+        //for b in basic_blocks.iter() {
+        //    dbg_log!(
+        //        "> Basic block from {:x} to {:x}, is_entry={}",
+        //        b.addr,
+        //        b.end_addr,
+        //        b.is_entry_block
+        //    );
+        //}
+
+        jit_generate_module(
+            &basic_blocks,
+            requires_loop_limit,
+            cpu.clone(),
+            &mut ctx.wasm_builder,
+        );
+
+        // allocate an index in the wasm table
+        let wasm_table_index = ctx
+            .wasm_table_index_free_list
+            .pop()
+            .expect("allocate wasm table index");
+        dbg_assert!(wasm_table_index != 0);
+
+        // create entries for each basic block that is marked as an entry point
+        let mut entry_point_count = 0;
+
+        for (i, block) in basic_blocks.iter().enumerate() {
+            profiler::stat_increment(stat::S_COMPILE_BASIC_BLOCK);
+
+            if block.is_entry_block && block.addr != block.end_addr {
+                dbg_assert!(block.addr != 0);
+
+                let initial_state = i.safe_to_u16();
+
+                let mut entry = jit_cache_array::Entry::create(
+                    block.addr,
+                    None, // to be filled in by create_cache_entry
+                    wasm_table_index,
+                    initial_state,
+                    state_flags,
+                    true,
+                );
+
+                #[cfg(debug_assertions)]
+                {
+                    entry.len = block.end_addr - block.addr;
+                    entry.opcode = cpu::read32(block.addr);
+                }
+
+                create_cache_entry(ctx, entry);
+
+                entry_point_count += 1;
+                profiler::stat_increment(stat::S_COMPILE_ENTRY_POINT);
+            }
+        }
+
+        dbg_assert!(entry_point_count > 0);
+
+        cpu::tlb_set_has_code(page, true);
+
+        jit_cache_array::check_invariants();
+        cpu::check_tlb_invariants();
+
+        let end_addr = 0;
+        let first_opcode = 0;
+        let phys_addr = page.to_address();
+
+        // will call codegen_finalize_finished asynchronously when finished
+        cpu::codegen_finalize(
+            wasm_table_index,
+            phys_addr,
+            end_addr,
+            first_opcode,
+            state_flags,
+        );
+
+        profiler::stat_increment(stat::S_COMPILE_SUCCESS);
+    }
+    else {
+        //dbg_log("No basic blocks, not generating code");
+        // Nothing to do
+    }
+}
+
+pub fn codegen_finalize_finished(
+    ctx: &mut JitState,
+    wasm_table_index: u16,
+    phys_addr: u32,
+    _end_addr: u32,
+    _first_opcode: u32,
+    _state_flags: CachedStateFlags,
+) {
+    assert!(wasm_table_index != 0);
+
+    match ctx
+        .wasm_table_index_pending_free
+        .iter()
+        .position(|i| *i == wasm_table_index)
+    {
+        Some(i) => {
+            ctx.wasm_table_index_pending_free.swap_remove(i);
+            free_wasm_table_index(ctx, wasm_table_index);
+        },
+        None => {
+            let page = Page::page_of(phys_addr);
+            let mut cache_array_index = jit_cache_array::get_page_index(page);
+
+            while let Some(index) = cache_array_index {
+                let mut entry = jit_cache_array::get_mut(index);
+
+                if entry.wasm_table_index == wasm_table_index {
+                    dbg_assert!(entry.pending);
+                    entry.pending = false;
+                }
+
+                cache_array_index = entry.next_index_same_page();
+            }
+        },
+    }
+
+    jit_cache_array::check_invariants();
+
+    if CHECK_JIT_CACHE_ARRAY_INVARIANTS {
+        // sanity check that the above iteration marked all entries as not pending
+
+        for entry in jit_cache_array::iter() {
+            if entry.wasm_table_index == wasm_table_index {
+                assert!(!entry.pending);
+            }
+        }
+    }
+}
+
+fn jit_generate_module(
+    basic_blocks: &Vec<BasicBlock>,
+    requires_loop_limit: bool,
+    mut cpu: CpuContext,
+    builder: &mut WasmBuilder,
+) {
+    builder.reset();
+
+    let fn_get_seg_idx = builder.get_fn_idx("get_seg", module_init::FN1_RET_TYPE_INDEX);
+    dbg_assert!(fn_get_seg_idx == FN_GET_SEG_IDX);
+
+    let basic_block_indices: HashMap<u32, u32> = basic_blocks
+        .iter()
+        .enumerate()
+        .map(|(index, block)| (block.addr, index as u32))
+        .collect();
+
+    // set state local variable to the initial state passed as the first argument
+    wasm_util::get_local(&mut builder.instruction_body, GEN_LOCAL_ARG_INITIAL_STATE);
+    wasm_util::set_local(&mut builder.instruction_body, GEN_LOCAL_STATE);
+
+    // initialise max_iterations
+    // TODO: Remove if not requires_loop_limit
+    wasm_util::push_i32(
+        &mut builder.instruction_body,
+        JIT_MAX_ITERATIONS_PER_FUNCTION as i32,
+    );
+    wasm_util::set_local(&mut builder.instruction_body, GEN_LOCAL_ITERATION_COUNTER);
+
+    // main state machine loop
+    wasm_util::loop_void(&mut builder.instruction_body);
+
+    if JIT_ALWAYS_USE_LOOP_SAFETY || requires_loop_limit {
+        profiler::stat_increment(stat::S_COMPILE_WITH_LOOP_SAFETY);
+
+        // decrement max_iterations
+        wasm_util::get_local(&mut builder.instruction_body, GEN_LOCAL_ITERATION_COUNTER);
+        wasm_util::push_i32(&mut builder.instruction_body, -1);
+        wasm_util::add_i32(&mut builder.instruction_body);
+        wasm_util::set_local(&mut builder.instruction_body, GEN_LOCAL_ITERATION_COUNTER);
+
+        // if max_iterations == 0: return
+        wasm_util::get_local(&mut builder.instruction_body, GEN_LOCAL_ITERATION_COUNTER);
+        wasm_util::eqz_i32(&mut builder.instruction_body);
+        wasm_util::if_void(&mut builder.instruction_body);
+        wasm_util::return_(&mut builder.instruction_body);
+        wasm_util::block_end(&mut builder.instruction_body);
+    }
+
+    wasm_util::block_void(&mut builder.instruction_body); // for the default case
+
+    // generate the opening blocks for the cases
+
+    for _ in 0..basic_blocks.len() {
+        wasm_util::block_void(&mut builder.instruction_body);
+    }
+
+    wasm_util::get_local(&mut builder.instruction_body, GEN_LOCAL_STATE);
+    wasm_util::brtable_and_cases(&mut builder.instruction_body, basic_blocks.len() as u32);
+
+    for (i, block) in basic_blocks.iter().enumerate() {
+        // Case [i] will jump after the [i]th block, so we first generate the
+        // block end opcode and then the code for that block
+        wasm_util::block_end(&mut builder.instruction_body);
+
+        if block.addr == block.end_addr {
+            // Empty basic block, generate no code (for example, jump to block
+            // that is near end of page)
+            dbg_assert!(block.ty == BasicBlockType::Exit);
+        }
+        else {
+            builder.commit_instruction_body_to_cs();
+            jit_generate_basic_block(&mut cpu, builder, block.addr, block.end_addr);
+            builder.commit_instruction_body_to_cs();
+        }
+
+        let invalid_connection_to_next_block = block.end_addr != cpu.eip;
+
+        match (&block.ty, invalid_connection_to_next_block) {
+            (_, true) | (BasicBlockType::Exit, _) => {
+                // Exit this function
+                wasm_util::return_(&mut builder.instruction_body);
+            },
+            (BasicBlockType::Normal { next_block_addr }, _) => {
+                // Unconditional jump to next basic block
+                // - All instructions that don't change eip
+                // - Unconditional jump
+
+                let next_bb_index = *basic_block_indices.get(&next_block_addr).expect("");
+                //dbg_assert!(next_bb_index != -1);
+
+                // set state variable to next basic block
+                wasm_util::push_i32(&mut builder.instruction_body, next_bb_index as i32);
+                wasm_util::set_local(&mut builder.instruction_body, GEN_LOCAL_STATE);
+
+                wasm_util::br(
+                    &mut builder.instruction_body,
+                    basic_blocks.len() as u32 - i as u32,
+                ); // to the loop
+            },
+            (
+                &BasicBlockType::ConditionalJump {
+                    next_block_addr,
+                    next_block_branch_taken_addr,
+                    condition,
+                    jump_offset,
+                    jump_offset_is_32,
+                },
+                _,
+            ) => {
+                // Conditional jump to next basic block
+                // - jnz, jc, etc.
+
+                dbg_assert!(condition < 16);
+                let condition = CONDITION_FUNCTIONS[condition as usize];
+
+                codegen::gen_fn0_const_ret(builder, condition);
+
+                wasm_util::if_void(&mut builder.instruction_body);
+
+                // Branch taken
+
+                if jump_offset_is_32 {
+                    codegen::gen_relative_jump(builder, jump_offset);
+                }
+                else {
+                    // TODO: Is this necessary?
+                    let ctx = &mut JitContext {
+                        cpu: &mut cpu.clone(),
+                        builder,
+                    };
+                    codegen::gen_fn1_const(ctx, "jmp_rel16", jump_offset as u32);
+                }
+
+                if let Some(next_block_branch_taken_addr) = next_block_branch_taken_addr {
+                    let next_basic_block_branch_taken_index = *basic_block_indices
+                        .get(&next_block_branch_taken_addr)
+                        .expect("");
+
+                    wasm_util::push_i32(
+                        &mut builder.instruction_body,
+                        next_basic_block_branch_taken_index as i32,
+                    );
+                    wasm_util::set_local(&mut builder.instruction_body, GEN_LOCAL_STATE);
+                }
+                else {
+                    // Jump to different page
+                    wasm_util::return_(&mut builder.instruction_body);
+                }
+
+                wasm_util::else_(&mut builder.instruction_body);
+
+                {
+                    // Branch not taken
+                    // TODO: Could use fall-through here
+                    let next_basic_block_index =
+                        *basic_block_indices.get(&next_block_addr).expect("");
+
+                    wasm_util::push_i32(
+                        &mut builder.instruction_body,
+                        next_basic_block_index as i32,
+                    );
+                    wasm_util::set_local(&mut builder.instruction_body, GEN_LOCAL_STATE);
+                }
+
+                wasm_util::block_end(&mut builder.instruction_body);
+
+                wasm_util::br(
+                    &mut builder.instruction_body,
+                    basic_blocks.len() as u32 - i as u32,
+                ); // to the loop
+            },
+        }
+    }
+
+    wasm_util::block_end(&mut builder.instruction_body); // default case
+    wasm_util::unreachable(&mut builder.instruction_body);
+
+    wasm_util::block_end(&mut builder.instruction_body); // loop
+
+    builder.commit_instruction_body_to_cs();
+    builder.finish(GEN_NO_OF_LOCALS as u8);
+}
+
+fn jit_generate_basic_block(
+    mut cpu: &mut CpuContext,
+    builder: &mut WasmBuilder,
+    start_addr: u32,
+    stop_addr: u32,
+) {
+    let mut len = 0;
+
+    let mut end_addr;
+    let mut was_block_boundary;
+    let mut eip_delta = 0;
+
+    //*instruction_pointer = start_addr;
+
+    // First iteration of do-while assumes the caller confirms this condition
+    dbg_assert!(!is_near_end_of_page(start_addr));
+
+    cpu.eip = start_addr;
+
+    loop {
+        if false {
+            ::opstats::gen_opstats(builder, cpu::read32(cpu.eip));
+        }
+        let start_eip = cpu.eip;
+        let mut instruction_flags = 0;
+        jit_instructions::jit_instruction(&mut cpu, builder, &mut instruction_flags);
+        let end_eip = cpu.eip;
+
+        let instruction_length = end_eip - start_eip;
+        was_block_boundary = instruction_flags & JIT_INSTR_BLOCK_BOUNDARY_FLAG != 0;
+
+        dbg_assert!(instruction_length < MAX_INSTRUCTION_LENGTH);
+
+        if ENABLE_JIT_NONFAULTING_OPTIMZATION {
+            // There are a few conditions to keep in mind to optimize the update of previous_ip and
+            // instruction_pointer:
+            // - previous_ip MUST be updated just before a faulting instruction
+            // - instruction_pointer MUST be updated before jump instructions (since they use the EIP
+            //   value for instruction logic)
+            // - Nonfaulting instructions don't need either to be updated
+            if was_block_boundary {
+                // prev_ip = eip + eip_delta, so that previous_ip points to the start of this instruction
+                codegen::gen_set_previous_eip_offset_from_eip(builder, eip_delta);
+
+                // eip += eip_delta + len(jump) so instruction logic uses the correct eip
+                codegen::gen_increment_instruction_pointer(builder, eip_delta + instruction_length);
+                builder.commit_instruction_body_to_cs();
+
+                eip_delta = 0;
+            }
+            else if instruction_flags & JIT_INSTR_NONFAULTING_FLAG == 0 {
+                // Faulting instruction
+
+                // prev_ip = eip + eip_delta, so that previous_ip points to the start of this instruction
+                codegen::gen_set_previous_eip_offset_from_eip(builder, eip_delta);
+                builder.commit_instruction_body_to_cs();
+
+                // Leave this instruction's length to be updated in the next batch, whatever it may be
+                eip_delta += instruction_length;
+            }
+            else {
+                // Non-faulting, so we skip setting previous_ip and simply queue the instruction length
+                // for whenever eip is updated next
+                profiler::stat_increment(stat::S_NONFAULTING_OPTIMIZATION);
+                eip_delta += instruction_length;
+            }
+        }
+        else {
+            codegen::gen_set_previous_eip(builder);
+            codegen::gen_increment_instruction_pointer(builder, instruction_length);
+            builder.commit_instruction_body_to_cs();
+        }
+        end_addr = cpu.eip;
+        len += 1;
+
+        if end_addr == stop_addr {
+            break;
+        }
+
+        if was_block_boundary || is_near_end_of_page(end_addr) || end_addr > stop_addr {
+            dbg_log!("Overlapping basic blocks start={:x} expected_end={:x} end={:x} was_block_boundary={} near_end_of_page={}",
+                     start_addr, stop_addr, end_addr, was_block_boundary, is_near_end_of_page(end_addr));
+            break;
+        }
+    }
+
+    if ENABLE_JIT_NONFAULTING_OPTIMZATION {
+        // When the block ends in a non-jump instruction, we may have uncommitted updates still
+        if eip_delta > 0 {
+            builder.commit_instruction_body_to_cs();
+            codegen::gen_increment_instruction_pointer(builder, eip_delta);
+        }
+    }
+
+    codegen::gen_increment_timestamp_counter(builder, len);
+
+    // no page was crossed
+    dbg_assert!(Page::page_of(end_addr) == Page::page_of(start_addr));
+}
+
+pub fn jit_increase_hotness_and_maybe_compile(
+    ctx: &mut JitState,
+    phys_address: u32,
+    cs_offset: u32,
+    state_flags: CachedStateFlags,
+) {
+    record_entry_point(ctx, phys_address);
+    let page = Page::page_of(phys_address);
+    let address_hash = jit_hot_hash_page(page) as usize;
+    ctx.hot_code_addresses[address_hash] += 1;
+    if ctx.hot_code_addresses[address_hash] >= JIT_THRESHOLD {
+        ctx.hot_code_addresses[address_hash] = 0;
+        jit_analyze_and_generate(ctx, page, cs_offset, state_flags)
+    };
+}
+
+fn free_wasm_table_index(ctx: &mut JitState, wasm_table_index: u16) {
+    if CHECK_JIT_CACHE_ARRAY_INVARIANTS {
+        dbg_assert!(!ctx.wasm_table_index_free_list.contains(&wasm_table_index));
+    }
+    ctx.wasm_table_index_free_list.push(wasm_table_index)
+
+    // It is not strictly necessary to clear the function, but it will fail
+    // more predictably if we accidentally use the function
+    // XXX: This fails in Chromium:
+    //   RangeError: WebAssembly.Table.set(): Modifying existing entry in table not supported.
+    //jit_clear_func(wasm_table_index);
+}
+
+/// Remove all entries with the given wasm_table_index in page
+fn remove_jit_cache_wasm_index(ctx: &mut JitState, page: Page, wasm_table_index: u16) {
+    let mut cache_array_index = jit_cache_array::get_page_index(page).unwrap();
+
+    let mut pending = false;
+
+    loop {
+        let entry = jit_cache_array::get_mut(cache_array_index);
+        let next_cache_array_index = entry.next_index_same_page();
+
+        if entry.wasm_table_index == wasm_table_index {
+            // if one entry is pending, all must be pending
+            dbg_assert!(!pending || entry.pending);
+
+            pending = entry.pending;
+
+            jit_cache_array::remove(cache_array_index);
+
+            dbg_assert!(entry.next_index_same_page() == None);
+            entry.wasm_table_index = 0;
+            entry.start_addr = 0;
+            entry.pending = false;
+        }
+
+        if let Some(i) = next_cache_array_index {
+            cache_array_index = i;
+        }
+        else {
+            break;
+        }
+    }
+
+    if pending {
+        ctx.wasm_table_index_pending_free.push(wasm_table_index);
+    }
+    else {
+        free_wasm_table_index(ctx, wasm_table_index);
+    }
+
+    if !jit_page_has_code(ctx, page) {
+        cpu::tlb_set_has_code(page, false);
+    }
+
+    if CHECK_JIT_CACHE_ARRAY_INVARIANTS {
+        // sanity check that the above iteration deleted all entries
+
+        for entry in jit_cache_array::iter() {
+            dbg_assert!(entry.wasm_table_index != wasm_table_index);
+        }
+    }
+}
+
+/// Register a write in this page: Delete all present code
+pub fn jit_dirty_page(ctx: &mut JitState, page: Page) {
+    let mut did_have_code = false;
+
+    if let Some(mut cache_array_index) = jit_cache_array::get_page_index(page) {
+        did_have_code = true;
+
+        let mut index_to_free = HashSet::new();
+        let mut index_to_pending_free = HashSet::new();
+
+        jit_cache_array::set_page_index(page, None);
+        profiler::stat_increment(stat::S_INVALIDATE_PAGE);
+
+        loop {
+            profiler::stat_increment(stat::S_INVALIDATE_CACHE_ENTRY);
+            let entry = jit_cache_array::get_mut(cache_array_index);
+            let wasm_table_index = entry.wasm_table_index;
+
+            assert!(page == Page::page_of(entry.start_addr));
+
+            let next_cache_array_index = entry.next_index_same_page();
+
+            entry.set_next_index_same_page(None);
+            entry.start_addr = 0;
+            entry.wasm_table_index = 0;
+
+            if entry.pending {
+                entry.pending = false;
+
+                index_to_pending_free.insert(wasm_table_index);
+            }
+            else {
+                index_to_free.insert(wasm_table_index);
+            }
+
+            if let Some(i) = next_cache_array_index {
+                cache_array_index = i;
+            }
+            else {
+                break;
+            }
+        }
+
+        for index in index_to_free.iter().cloned() {
+            free_wasm_table_index(ctx, index)
+        }
+
+        for index in index_to_pending_free {
+            ctx.wasm_table_index_pending_free.push(index);
+        }
+    }
+
+    match ctx.entry_points.remove(&page) {
+        None => {},
+        Some(_entry_points) => {
+            did_have_code = true;
+
+            // don't try to compile code in this page anymore until it's hot again
+            ctx.hot_code_addresses[jit_hot_hash_page(page) as usize] = 0;
+        },
+    }
+
+    if did_have_code {
+        cpu::tlb_set_has_code(page, false);
+    }
+}
+
+pub fn jit_dirty_cache(ctx: &mut JitState, start_addr: u32, end_addr: u32) {
+    assert!(start_addr < end_addr);
+
+    let start_page = Page::page_of(start_addr);
+    let end_page = Page::page_of(end_addr - 1);
+
+    for page in start_page.to_u32()..end_page.to_u32() + 1 {
+        jit_dirty_page(ctx, Page::page_of(page << 12));
+    }
+}
+
+pub fn jit_dirty_cache_small(ctx: &mut JitState, start_addr: u32, end_addr: u32) {
+    assert!(start_addr < end_addr);
+
+    let start_page = Page::page_of(start_addr);
+    let end_page = Page::page_of(end_addr - 1);
+
+    jit_dirty_page(ctx, start_page);
+
+    // Note: This can't happen when paging is enabled, as writes across
+    //       boundaries are split up on two pages
+    if start_page != end_page {
+        assert!(start_page.to_u32() + 1 == end_page.to_u32());
+        jit_dirty_page(ctx, end_page);
+    }
+}
+
+pub fn jit_dirty_cache_single(ctx: &mut JitState, addr: u32) {
+    jit_dirty_page(ctx, Page::page_of(addr));
+}
+
+pub fn jit_empty_cache(ctx: &mut JitState) {
+    jit_cache_array::clear();
+
+    ctx.entry_points.clear();
+
+    ctx.wasm_table_index_pending_free.clear();
+    ctx.wasm_table_index_free_list.clear();
+
+    for i in 0..0xFFFF {
+        // don't assign 0 (XXX: Check)
+        ctx.wasm_table_index_free_list.push(i as u16 + 1);
+    }
+}
+
+pub fn jit_page_has_code(ctx: &JitState, page: Page) -> bool {
+    (jit_cache_array::get_page_index(page) != None || ctx.entry_points.contains_key(&page))
+}
+
+#[cfg(debug_assertions)]
+pub fn jit_unused_cache_stat() -> u32 {
+    jit_cache_array::iter()
+        .filter(|e| e.start_addr == 0)
+        .count() as u32
+}
+#[cfg(debug_assertions)]
+pub fn jit_get_entry_length(i: u32) -> u32 { jit_cache_array::get(i).len }
+#[cfg(debug_assertions)]
+pub fn jit_get_entry_address(i: u32) -> u32 { jit_cache_array::get(i).start_addr }
+#[cfg(debug_assertions)]
+pub fn jit_get_entry_pending(i: u32) -> bool { jit_cache_array::get(i).pending }
+#[cfg(debug_assertions)]
+pub fn jit_get_wasm_table_index_free_list_count(ctx: &JitState) -> u32 {
+    ctx.wasm_table_index_free_list.len() as u32
+}
+
+pub fn jit_get_op_len(ctx: &JitState) -> u32 { ctx.wasm_builder.get_op_len() }
+pub fn jit_get_op_ptr(ctx: &JitState) -> *const u8 { ctx.wasm_builder.get_op_ptr() }

+ 396 - 0
src/rust/jit_instructions.rs

@@ -0,0 +1,396 @@
+#![allow(non_snake_case)]
+
+use codegen;
+use cpu_context::CpuContext;
+use global_pointers;
+use jit::JitContext;
+use jit::{GEN_LOCAL_SCRATCH0, GEN_LOCAL_SCRATCH1};
+use prefix::SEG_PREFIX_ZERO;
+use prefix::{PREFIX_66, PREFIX_67, PREFIX_F2, PREFIX_F3};
+use regs::{AX, BP, BX, CX, DI, DX, SI, SP};
+use regs::{CS, DS, ES, FS, GS, SS};
+use regs::{EAX, EBP, EBX, ECX, EDI, EDX, ESI, ESP};
+use wasmgen::module_init::WasmBuilder;
+use wasmgen::wasm_util;
+
+pub fn jit_instruction(cpu: &mut CpuContext, builder: &mut WasmBuilder, instr_flags: &mut u32) {
+    cpu.prefixes = 0;
+    let ctx = &mut JitContext { cpu, builder };
+    ::gen::jit::jit(
+        ctx.cpu.read_imm8() as u32 | (ctx.cpu.osize_32() as u32) << 8,
+        ctx,
+        instr_flags,
+    );
+}
+
+pub fn jit_handle_prefix(ctx: &mut JitContext, instr_flags: &mut u32) {
+    // TODO: Could avoid repeatedly generating prefix updates
+    let prefixes = ctx.cpu.prefixes;
+    codegen::gen_add_prefix_bits(ctx, prefixes);
+    ::gen::jit::jit(
+        ctx.cpu.read_imm8() as u32 | (ctx.cpu.osize_32() as u32) << 8,
+        ctx,
+        instr_flags,
+    );
+    codegen::gen_clear_prefixes(ctx);
+}
+
+pub fn jit_handle_segment_prefix(segment: u32, ctx: &mut JitContext, instr_flags: &mut u32) {
+    assert!(segment <= 5);
+    ctx.cpu.prefixes |= segment + 1;
+    jit_handle_prefix(ctx, instr_flags)
+}
+
+pub fn instr16_0F_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    ::gen::jit0f_16::jit(ctx.cpu.read_imm8(), ctx, instr_flags)
+}
+pub fn instr32_0F_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    ::gen::jit0f_32::jit(ctx.cpu.read_imm8(), ctx, instr_flags)
+}
+pub fn instr_26_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    jit_handle_segment_prefix(ES, ctx, instr_flags)
+}
+pub fn instr_2E_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    jit_handle_segment_prefix(CS, ctx, instr_flags)
+}
+pub fn instr_36_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    jit_handle_segment_prefix(SS, ctx, instr_flags)
+}
+pub fn instr_3E_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    jit_handle_segment_prefix(DS, ctx, instr_flags)
+}
+
+pub fn instr_64_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    jit_handle_segment_prefix(FS, ctx, instr_flags)
+}
+pub fn instr_65_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    jit_handle_segment_prefix(GS, ctx, instr_flags)
+}
+
+pub fn instr_66_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    ctx.cpu.prefixes |= PREFIX_66;
+    jit_handle_prefix(ctx, instr_flags)
+}
+pub fn instr_67_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    ctx.cpu.prefixes |= PREFIX_67;
+    jit_handle_prefix(ctx, instr_flags)
+}
+pub fn instr_F0_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    // lock: Ignore
+    jit_handle_prefix(ctx, instr_flags)
+}
+pub fn instr_F2_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    ctx.cpu.prefixes |= PREFIX_F2;
+    jit_handle_prefix(ctx, instr_flags)
+}
+pub fn instr_F3_jit(ctx: &mut JitContext, instr_flags: &mut u32) {
+    ctx.cpu.prefixes |= PREFIX_F3;
+    jit_handle_prefix(ctx, instr_flags)
+}
+
+fn push16_reg_jit(ctx: &mut JitContext, r: u32) {
+    let name = if ctx.cpu.ssize_32() {
+        "push16_ss32"
+    }
+    else {
+        "push16_ss16"
+    };
+    codegen::gen_fn1_reg16(ctx, name, r);
+}
+
+fn push32_reg_jit(ctx: &mut JitContext, r: u32) {
+    let name = if ctx.cpu.ssize_32() {
+        "push32_ss32"
+    }
+    else {
+        "push32_ss16"
+    };
+    codegen::gen_fn1_reg32(ctx, name, r);
+}
+
+fn push16_imm_jit(ctx: &mut JitContext, imm: u32) {
+    let name = if ctx.cpu.ssize_32() {
+        "push16_ss32"
+    }
+    else {
+        "push16_ss16"
+    };
+    codegen::gen_fn1_const(ctx, name, imm)
+}
+
+fn push32_imm_jit(ctx: &mut JitContext, imm: u32) {
+    let name = if ctx.cpu.ssize_32() {
+        "push32_ss32"
+    }
+    else {
+        "push32_ss16"
+    };
+    codegen::gen_fn1_const(ctx, name, imm)
+}
+
+fn push16_mem_jit(ctx: &mut JitContext, modrm_byte: u8) {
+    codegen::gen_modrm_resolve(ctx, modrm_byte);
+    let name = if ctx.cpu.ssize_32() {
+        "push16_ss32_mem"
+    }
+    else {
+        "push16_ss16_mem"
+    };
+    codegen::gen_modrm_fn0(ctx, name)
+}
+
+fn push32_mem_jit(ctx: &mut JitContext, modrm_byte: u8) {
+    codegen::gen_modrm_resolve(ctx, modrm_byte);
+    let name = if ctx.cpu.ssize_32() {
+        "push32_ss32_mem"
+    }
+    else {
+        "push32_ss16_mem"
+    };
+    codegen::gen_modrm_fn0(ctx, name)
+}
+
+fn pop16_reg_jit(ctx: &mut JitContext, reg: u32) {
+    if ctx.cpu.ssize_32() {
+        codegen::gen_set_reg16_fn0(ctx, "pop16_ss32", reg);
+    }
+    else {
+        codegen::gen_set_reg16_fn0(ctx, "pop16_ss16", reg);
+    }
+}
+
+fn pop32_reg_jit(ctx: &mut JitContext, reg: u32) {
+    if ctx.cpu.ssize_32() {
+        codegen::gen_set_reg32s_fn0(ctx, "pop32s_ss32", reg);
+    }
+    else {
+        codegen::gen_set_reg32s_fn0(ctx, "pop32s_ss16", reg);
+    }
+}
+
+pub fn instr16_50_jit(ctx: &mut JitContext) { push16_reg_jit(ctx, AX); }
+pub fn instr32_50_jit(ctx: &mut JitContext) { push32_reg_jit(ctx, EAX); }
+pub fn instr16_51_jit(ctx: &mut JitContext) { push16_reg_jit(ctx, CX); }
+pub fn instr32_51_jit(ctx: &mut JitContext) { push32_reg_jit(ctx, ECX); }
+pub fn instr16_52_jit(ctx: &mut JitContext) { push16_reg_jit(ctx, DX); }
+pub fn instr32_52_jit(ctx: &mut JitContext) { push32_reg_jit(ctx, EDX); }
+pub fn instr16_53_jit(ctx: &mut JitContext) { push16_reg_jit(ctx, BX); }
+pub fn instr32_53_jit(ctx: &mut JitContext) { push32_reg_jit(ctx, EBX); }
+pub fn instr16_54_jit(ctx: &mut JitContext) { push16_reg_jit(ctx, SP); }
+pub fn instr32_54_jit(ctx: &mut JitContext) { push32_reg_jit(ctx, ESP); }
+pub fn instr16_55_jit(ctx: &mut JitContext) { push16_reg_jit(ctx, BP); }
+pub fn instr32_55_jit(ctx: &mut JitContext) { push32_reg_jit(ctx, EBP); }
+pub fn instr16_56_jit(ctx: &mut JitContext) { push16_reg_jit(ctx, SI); }
+pub fn instr32_56_jit(ctx: &mut JitContext) { push32_reg_jit(ctx, ESI); }
+pub fn instr16_57_jit(ctx: &mut JitContext) { push16_reg_jit(ctx, DI); }
+pub fn instr32_57_jit(ctx: &mut JitContext) { push32_reg_jit(ctx, EDI); }
+
+pub fn instr16_58_jit(ctx: &mut JitContext) { pop16_reg_jit(ctx, AX); }
+pub fn instr32_58_jit(ctx: &mut JitContext) { pop32_reg_jit(ctx, EAX); }
+pub fn instr16_59_jit(ctx: &mut JitContext) { pop16_reg_jit(ctx, CX); }
+pub fn instr32_59_jit(ctx: &mut JitContext) { pop32_reg_jit(ctx, ECX); }
+pub fn instr16_5A_jit(ctx: &mut JitContext) { pop16_reg_jit(ctx, DX); }
+pub fn instr32_5A_jit(ctx: &mut JitContext) { pop32_reg_jit(ctx, EDX); }
+pub fn instr16_5B_jit(ctx: &mut JitContext) { pop16_reg_jit(ctx, BX); }
+pub fn instr32_5B_jit(ctx: &mut JitContext) { pop32_reg_jit(ctx, EBX); }
+// hole for pop esp
+pub fn instr16_5D_jit(ctx: &mut JitContext) { pop16_reg_jit(ctx, BP); }
+pub fn instr32_5D_jit(ctx: &mut JitContext) { pop32_reg_jit(ctx, EBP); }
+pub fn instr16_5E_jit(ctx: &mut JitContext) { pop16_reg_jit(ctx, SI); }
+pub fn instr32_5E_jit(ctx: &mut JitContext) { pop32_reg_jit(ctx, ESI); }
+pub fn instr16_5F_jit(ctx: &mut JitContext) { pop16_reg_jit(ctx, DI); }
+pub fn instr32_5F_jit(ctx: &mut JitContext) { pop32_reg_jit(ctx, EDI); }
+
+pub fn instr16_68_jit(ctx: &mut JitContext, imm16: u32) { push16_imm_jit(ctx, imm16) }
+pub fn instr32_68_jit(ctx: &mut JitContext, imm32: u32) { push32_imm_jit(ctx, imm32) }
+pub fn instr16_6A_jit(ctx: &mut JitContext, imm16: u32) { push16_imm_jit(ctx, imm16) }
+pub fn instr32_6A_jit(ctx: &mut JitContext, imm32: u32) { push32_imm_jit(ctx, imm32) }
+
+// Code for conditional jumps is generated automatically by the basic block codegen
+pub fn instr16_70_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_70_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_71_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_71_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_72_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_72_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_73_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_73_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_74_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_74_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_75_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_75_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_76_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_76_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_77_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_77_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_78_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_78_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_79_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_79_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_7A_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_7A_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_7B_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_7B_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_7C_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_7C_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_7D_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_7D_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_7E_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_7E_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_7F_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_7F_jit(_ctx: &mut JitContext, _imm: u32) {}
+
+pub fn instr16_89_mem_jit(ctx: &mut JitContext, modrm_byte: u8, r: u32) {
+    // TODO: Inlining
+    codegen::gen_modrm_resolve(ctx, modrm_byte);
+    codegen::gen_modrm_fn1(ctx, "instr16_89_mem", r);
+}
+pub fn instr16_89_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
+    codegen::gen_set_reg16_r(ctx, r1, r2);
+}
+pub fn instr32_89_mem_jit(ctx: &mut JitContext, modrm_byte: u8, r: u32) {
+    // Pseudo: safe_write32(modrm_resolve(modrm_byte), reg32s[r]);
+    let address_local = GEN_LOCAL_SCRATCH0;
+    let value_local = GEN_LOCAL_SCRATCH1;
+
+    codegen::gen_modrm_resolve(ctx, modrm_byte);
+    wasm_util::set_local(&mut ctx.builder.instruction_body, address_local);
+
+    wasm_util::push_i32(
+        &mut ctx.builder.instruction_body,
+        global_pointers::get_reg32_offset(r) as i32,
+    );
+    wasm_util::load_aligned_i32_from_stack(&mut ctx.builder.instruction_body, 0);
+    wasm_util::set_local(&mut ctx.builder.instruction_body, value_local);
+
+    codegen::gen_safe_write32(ctx, address_local, value_local);
+}
+pub fn instr32_89_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
+    codegen::gen_set_reg32_r(ctx, r1, r2);
+}
+
+pub fn instr16_8B_mem_jit(ctx: &mut JitContext, modrm_byte: u8, r: u32) {
+    // TODO: Inlining
+    codegen::gen_modrm_resolve(ctx, modrm_byte);
+    codegen::gen_modrm_fn1(ctx, "instr16_8B_mem", r);
+}
+pub fn instr16_8B_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
+    codegen::gen_set_reg16_r(ctx, r2, r1);
+}
+pub fn instr32_8B_mem_jit(ctx: &mut JitContext, modrm_byte: u8, r: u32) {
+    // Pseudo: reg32s[r] = safe_read32s(modrm_resolve(modrm_byte));
+    wasm_util::push_i32(
+        &mut ctx.builder.instruction_body,
+        global_pointers::get_reg32_offset(r) as i32,
+    );
+
+    codegen::gen_modrm_resolve(ctx, modrm_byte);
+    codegen::gen_safe_read32(ctx);
+
+    wasm_util::store_aligned_i32(&mut ctx.builder.instruction_body);
+}
+pub fn instr32_8B_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
+    codegen::gen_set_reg32_r(ctx, r2, r1);
+}
+
+pub fn instr16_8D_mem_jit(ctx: &mut JitContext, modrm_byte: u8, reg: u32) {
+    let loc = global_pointers::get_reg16_offset(reg);
+    wasm_util::push_i32(&mut ctx.builder.instruction_body, loc as i32);
+    ctx.cpu.prefixes |= SEG_PREFIX_ZERO;
+    codegen::gen_modrm_resolve(ctx, modrm_byte);
+    wasm_util::store_aligned_u16(&mut ctx.builder.instruction_body);
+}
+pub fn instr32_8D_mem_jit(ctx: &mut JitContext, modrm_byte: u8, reg: u32) {
+    let loc = global_pointers::get_reg32_offset(reg);
+    wasm_util::push_i32(&mut ctx.builder.instruction_body, loc as i32);
+    ctx.cpu.prefixes |= SEG_PREFIX_ZERO;
+    codegen::gen_modrm_resolve(ctx, modrm_byte);
+    wasm_util::store_aligned_i32(&mut ctx.builder.instruction_body);
+}
+
+pub fn instr16_8D_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
+    codegen::gen_fn2_const(ctx, "instr16_8D_reg", r1, r2);
+}
+
+pub fn instr32_8D_reg_jit(ctx: &mut JitContext, r1: u32, r2: u32) {
+    codegen::gen_fn2_const(ctx, "instr32_8D_reg", r1, r2);
+}
+
+pub fn instr16_8F_0_mem_jit(ctx: &mut JitContext, modrm_byte: u8) {
+    codegen::gen_fn0_const(ctx, "instr16_8F_0_mem_pre");
+    codegen::gen_modrm_resolve(ctx, modrm_byte);
+    codegen::gen_modrm_fn0(ctx, "instr16_8F_0_mem");
+}
+pub fn instr16_8F_0_reg_jit(ctx: &mut JitContext, r: u32) {
+    codegen::gen_fn1_const(ctx, "instr16_8F_0_reg", r);
+}
+pub fn instr32_8F_0_mem_jit(ctx: &mut JitContext, modrm_byte: u8) {
+    codegen::gen_fn0_const(ctx, "instr32_8F_0_mem_pre");
+    codegen::gen_modrm_resolve(ctx, modrm_byte);
+    codegen::gen_modrm_fn0(ctx, "instr32_8F_0_mem");
+}
+pub fn instr32_8F_0_reg_jit(ctx: &mut JitContext, r: u32) {
+    codegen::gen_fn1_const(ctx, "instr32_8F_0_reg", r);
+}
+
+pub fn instr16_E8_jit(ctx: &mut JitContext, imm: u32) {
+    codegen::gen_fn1_const(ctx, "instr16_E8", imm);
+}
+pub fn instr32_E8_jit(ctx: &mut JitContext, imm: u32) {
+    codegen::gen_fn1_const(ctx, "instr32_E8", imm);
+}
+pub fn instr16_E9_jit(ctx: &mut JitContext, imm: u32) {
+    codegen::gen_fn1_const(ctx, "instr16_E9", imm);
+}
+pub fn instr32_E9_jit(ctx: &mut JitContext, imm: u32) {
+    codegen::gen_fn1_const(ctx, "instr32_E9", imm);
+}
+pub fn instr16_EB_jit(ctx: &mut JitContext, imm: u32) {
+    codegen::gen_fn1_const(ctx, "instr16_EB", imm);
+}
+pub fn instr32_EB_jit(ctx: &mut JitContext, imm: u32) {
+    codegen::gen_fn1_const(ctx, "instr32_EB", imm);
+}
+
+pub fn instr16_FF_6_mem_jit(ctx: &mut JitContext, modrm_byte: u8) {
+    push16_mem_jit(ctx, modrm_byte)
+}
+pub fn instr16_FF_6_reg_jit(ctx: &mut JitContext, r: u32) { push16_reg_jit(ctx, r) }
+pub fn instr32_FF_6_mem_jit(ctx: &mut JitContext, modrm_byte: u8) {
+    push32_mem_jit(ctx, modrm_byte)
+}
+pub fn instr32_FF_6_reg_jit(ctx: &mut JitContext, r: u32) { push32_reg_jit(ctx, r) }
+
+// Code for conditional jumps is generated automatically by the basic block codegen
+pub fn instr16_0F80_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F81_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F82_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F83_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F84_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F85_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F86_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F87_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F88_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F89_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F8A_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F8B_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F8C_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F8D_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F8E_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr16_0F8F_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F80_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F81_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F82_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F83_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F84_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F85_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F86_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F87_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F88_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F89_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F8A_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F8B_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F8C_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F8D_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F8E_jit(_ctx: &mut JitContext, _imm: u32) {}
+pub fn instr32_0F8F_jit(_ctx: &mut JitContext, _imm: u32) {}

+ 52 - 0
src/rust/leb.rs

@@ -0,0 +1,52 @@
+pub fn write_leb_i32(buf: &mut Vec<u8>, mut v: i32) {
+    // Super complex stuff. See the following:
+    // https://en.wikipedia.org/wiki/LEB128#Encode_signed_integer
+    // http://llvm.org/doxygen/LEB128_8h_source.html#l00048
+
+    let mut more = true;
+    let negative = v < 0;
+    let size = 32;
+    while more {
+        let mut byte = (v & 0b1111111) as u8; // get last 7 bits
+        v >>= 7; // shift them away from the value
+        if negative {
+            v |= (!0 as i32) << (size - 7); // extend sign
+        }
+        let sign_bit = byte & (1 << 6);
+        if (v == 0 && sign_bit == 0) || (v == -1 && sign_bit != 0) {
+            more = false;
+        }
+        else {
+            byte |= 0b10000000; // turn on MSB
+        }
+        buf.push(byte);
+    }
+}
+
+pub fn write_leb_u32(buf: &mut Vec<u8>, mut v: u32) {
+    loop {
+        let mut byte = v as u8 & 0b01111111; // get last 7 bits
+        v >>= 7; // shift them away from the value
+        if v != 0 {
+            byte |= 0b10000000; // turn on MSB
+        }
+        buf.push(byte);
+        if v == 0 {
+            break;
+        }
+    }
+}
+
+pub fn write_fixed_leb16_at_idx(vec: &mut Vec<u8>, idx: usize, x: u16) {
+    dbg_assert!(x < (1 << 14)); // we have 14 bits of available space in 2 bytes for leb
+    vec[idx] = ((x & 0b1111111) | 0b10000000) as u8;
+    vec[idx + 1] = (x >> 7) as u8;
+}
+
+pub fn write_fixed_leb32_at_idx(vec: &mut Vec<u8>, idx: usize, x: u32) {
+    dbg_assert!(x < (1 << 28)); // we have 28 bits of available space in 4 bytes for leb
+    vec[idx] = (x & 0b1111111) as u8 | 0b10000000;
+    vec[idx + 1] = (x >> 7 & 0b1111111) as u8 | 0b10000000;
+    vec[idx + 2] = (x >> 14 & 0b1111111) as u8 | 0b10000000;
+    vec[idx + 3] = (x >> 21 & 0b1111111) as u8;
+}

+ 20 - 3
src/rust/lib.rs

@@ -5,7 +5,24 @@ extern crate quickcheck;
 #[macro_use]
 mod dbg;
 
-#[macro_use]
-mod util;
+pub mod c_api;
 
-pub mod wasmgen;
+mod analysis;
+mod codegen;
+mod cpu;
+mod cpu_context;
+mod gen;
+mod global_pointers;
+mod jit;
+mod jit_instructions;
+mod leb;
+mod modrm;
+mod opstats;
+mod page;
+mod prefix;
+mod profiler;
+mod regs;
+mod state_flags;
+mod tlb;
+mod util;
+mod wasmgen;

+ 345 - 0
src/rust/modrm.rs

@@ -0,0 +1,345 @@
+use cpu_context::CpuContext;
+use jit::JitContext;
+use prefix::{PREFIX_MASK_SEGMENT, SEG_PREFIX_ZERO};
+use regs::{BP, BX, DI, SI};
+use regs::{DS, SS};
+use regs::{EAX, EBP, EBX, ECX, EDI, EDX, ESI, ESP};
+use wasmgen::wasm_util;
+
+pub fn skip(ctx: &mut CpuContext, modrm_byte: u8) {
+    if ctx.asize_32() {
+        skip32(ctx, modrm_byte)
+    }
+    else {
+        skip16(ctx, modrm_byte)
+    }
+}
+
+fn skip16(ctx: &mut CpuContext, modrm_byte: u8) {
+    assert!(modrm_byte < 0xC0);
+    let r = modrm_byte & 7;
+
+    if modrm_byte < 0x40 {
+        if r == 6 {
+            ctx.advance16()
+        }
+    }
+    else if modrm_byte < 0x80 {
+        ctx.advance8()
+    }
+    else {
+        ctx.advance16()
+    }
+}
+
+fn skip32(ctx: &mut CpuContext, modrm_byte: u8) {
+    assert!(modrm_byte < 0xC0);
+    let r = modrm_byte & 7;
+
+    if r == 4 {
+        let sib = ctx.read_imm8();
+        if modrm_byte < 0x40 {
+            if sib & 7 == 5 {
+                ctx.advance32()
+            }
+        }
+        else if modrm_byte < 0x80 {
+            ctx.advance8()
+        }
+        else {
+            ctx.advance32()
+        }
+    }
+    else if r == 5 && modrm_byte < 0x40 {
+        ctx.advance32();
+    }
+    else {
+        if modrm_byte < 0x40 {
+            // Nothing
+        }
+        else if modrm_byte < 0x80 {
+            ctx.advance8()
+        }
+        else {
+            ctx.advance32()
+        }
+    }
+}
+
+pub fn gen(ctx: &mut JitContext, modrm_byte: u8) {
+    if ctx.cpu.asize_32() {
+        gen32(ctx, modrm_byte)
+    }
+    else {
+        gen16(ctx, modrm_byte)
+    }
+}
+
+enum Imm16 {
+    None,
+    Imm8,
+    Imm16,
+}
+
+enum Offset16 {
+    Zero,
+    One(u32),
+    Two(u32, u32),
+}
+
+fn gen16_case(ctx: &mut JitContext, seg: u32, offset: Offset16, imm: Imm16) {
+    // Generates one of:
+    // - add_segment(reg)
+    // - add_segment(imm)
+    // - add_segment(reg1 + reg2 & 0xFFFF)
+    // - add_segment(reg1 + imm & 0xFFFF)
+    // - add_segment(reg1 + reg2 + imm & 0xFFFF)
+
+    let immediate_value = match imm {
+        Imm16::None => 0,
+        Imm16::Imm8 => ctx.cpu.read_imm8s() as i32,
+        Imm16::Imm16 => ctx.cpu.read_imm16s() as i32,
+    };
+
+    match offset {
+        Offset16::Zero => {
+            wasm_util::push_i32(&mut ctx.builder.instruction_body, immediate_value & 0xFFFF);
+        },
+        Offset16::One(r) => {
+            wasm_util::load_aligned_u16(
+                &mut ctx.builder.instruction_body,
+                ::global_pointers::get_reg16_offset(r),
+            );
+
+            if immediate_value != 0 {
+                wasm_util::push_i32(&mut ctx.builder.instruction_body, immediate_value);
+                wasm_util::add_i32(&mut ctx.builder.instruction_body);
+
+                wasm_util::push_i32(&mut ctx.builder.instruction_body, 0xFFFF);
+                wasm_util::and_i32(&mut ctx.builder.instruction_body);
+            }
+        },
+        Offset16::Two(r1, r2) => {
+            wasm_util::load_aligned_u16(
+                &mut ctx.builder.instruction_body,
+                ::global_pointers::get_reg16_offset(r1),
+            );
+            wasm_util::load_aligned_u16(
+                &mut ctx.builder.instruction_body,
+                ::global_pointers::get_reg16_offset(r2),
+            );
+            wasm_util::add_i32(&mut ctx.builder.instruction_body);
+
+            if immediate_value != 0 {
+                wasm_util::push_i32(&mut ctx.builder.instruction_body, immediate_value);
+                wasm_util::add_i32(&mut ctx.builder.instruction_body);
+            }
+
+            wasm_util::push_i32(&mut ctx.builder.instruction_body, 0xFFFF);
+            wasm_util::and_i32(&mut ctx.builder.instruction_body);
+        },
+    }
+
+    jit_add_seg_offset(ctx, seg);
+}
+
+fn gen16(ctx: &mut JitContext, modrm_byte: u8) {
+    match modrm_byte & !0o070 {
+        0o000 => gen16_case(ctx, DS, Offset16::Two(BX, SI), Imm16::None),
+        0o001 => gen16_case(ctx, DS, Offset16::Two(BX, DI), Imm16::None),
+        0o002 => gen16_case(ctx, SS, Offset16::Two(BP, SI), Imm16::None),
+        0o003 => gen16_case(ctx, SS, Offset16::Two(BP, DI), Imm16::None),
+        0o004 => gen16_case(ctx, DS, Offset16::One(SI), Imm16::None),
+        0o005 => gen16_case(ctx, DS, Offset16::One(DI), Imm16::None),
+        0o006 => gen16_case(ctx, DS, Offset16::Zero, Imm16::Imm16),
+        0o007 => gen16_case(ctx, DS, Offset16::One(BX), Imm16::None),
+
+        0o100 => gen16_case(ctx, DS, Offset16::Two(BX, SI), Imm16::Imm8),
+        0o101 => gen16_case(ctx, DS, Offset16::Two(BX, DI), Imm16::Imm8),
+        0o102 => gen16_case(ctx, SS, Offset16::Two(BP, SI), Imm16::Imm8),
+        0o103 => gen16_case(ctx, SS, Offset16::Two(BP, DI), Imm16::Imm8),
+        0o104 => gen16_case(ctx, DS, Offset16::One(SI), Imm16::Imm8),
+        0o105 => gen16_case(ctx, DS, Offset16::One(DI), Imm16::Imm8),
+        0o106 => gen16_case(ctx, SS, Offset16::One(BP), Imm16::Imm8),
+        0o107 => gen16_case(ctx, DS, Offset16::One(BX), Imm16::Imm8),
+
+        0o200 => gen16_case(ctx, DS, Offset16::Two(BX, SI), Imm16::Imm16),
+        0o201 => gen16_case(ctx, DS, Offset16::Two(BX, DI), Imm16::Imm16),
+        0o202 => gen16_case(ctx, SS, Offset16::Two(BP, SI), Imm16::Imm16),
+        0o203 => gen16_case(ctx, SS, Offset16::Two(BP, DI), Imm16::Imm16),
+        0o204 => gen16_case(ctx, DS, Offset16::One(SI), Imm16::Imm16),
+        0o205 => gen16_case(ctx, DS, Offset16::One(DI), Imm16::Imm16),
+        0o206 => gen16_case(ctx, SS, Offset16::One(BP), Imm16::Imm16),
+        0o207 => gen16_case(ctx, DS, Offset16::One(BX), Imm16::Imm16),
+
+        _ => assert!(false),
+    }
+}
+
+#[derive(PartialEq)]
+enum Imm32 {
+    None,
+    Imm8,
+    Imm32,
+}
+
+enum Offset {
+    Reg(u32),
+    Sib,
+    None,
+}
+
+fn gen32_case(ctx: &mut JitContext, seg: u32, offset: Offset, imm: Imm32) {
+    match offset {
+        Offset::Sib => {
+            gen_sib(ctx, imm != Imm32::None);
+
+            let immediate_value = match imm {
+                Imm32::None => 0,
+                Imm32::Imm8 => ctx.cpu.read_imm8s() as i32,
+                Imm32::Imm32 => ctx.cpu.read_imm32() as i32,
+            };
+
+            if immediate_value != 0 {
+                wasm_util::push_i32(&mut ctx.builder.instruction_body, immediate_value);
+                wasm_util::add_i32(&mut ctx.builder.instruction_body);
+            }
+        },
+        Offset::Reg(r) => {
+            let immediate_value = match imm {
+                Imm32::None => 0,
+                Imm32::Imm8 => ctx.cpu.read_imm8s() as i32,
+                Imm32::Imm32 => ctx.cpu.read_imm32() as i32,
+            };
+            wasm_util::load_aligned_i32(
+                &mut ctx.builder.instruction_body,
+                ::global_pointers::get_reg32_offset(r),
+            );
+            if immediate_value != 0 {
+                wasm_util::push_i32(&mut ctx.builder.instruction_body, immediate_value);
+                wasm_util::add_i32(&mut ctx.builder.instruction_body);
+            }
+            jit_add_seg_offset(ctx, seg);
+        },
+        Offset::None => {
+            let immediate_value = match imm {
+                Imm32::None => 0,
+                Imm32::Imm8 => ctx.cpu.read_imm8s() as i32,
+                Imm32::Imm32 => ctx.cpu.read_imm32() as i32,
+            };
+            wasm_util::push_i32(&mut ctx.builder.instruction_body, immediate_value);
+            jit_add_seg_offset(ctx, seg);
+        },
+    }
+}
+
+fn gen32(ctx: &mut JitContext, modrm_byte: u8) {
+    match modrm_byte & !0o070 {
+        0o000 => gen32_case(ctx, DS, Offset::Reg(EAX), Imm32::None),
+        0o001 => gen32_case(ctx, DS, Offset::Reg(ECX), Imm32::None),
+        0o002 => gen32_case(ctx, DS, Offset::Reg(EDX), Imm32::None),
+        0o003 => gen32_case(ctx, DS, Offset::Reg(EBX), Imm32::None),
+        0o004 => gen32_case(ctx, DS, Offset::Sib, Imm32::None),
+        0o005 => gen32_case(ctx, DS, Offset::None, Imm32::Imm32),
+        0o006 => gen32_case(ctx, DS, Offset::Reg(ESI), Imm32::None),
+        0o007 => gen32_case(ctx, DS, Offset::Reg(EDI), Imm32::None),
+
+        0o100 => gen32_case(ctx, DS, Offset::Reg(EAX), Imm32::Imm8),
+        0o101 => gen32_case(ctx, DS, Offset::Reg(ECX), Imm32::Imm8),
+        0o102 => gen32_case(ctx, DS, Offset::Reg(EDX), Imm32::Imm8),
+        0o103 => gen32_case(ctx, DS, Offset::Reg(EBX), Imm32::Imm8),
+        0o104 => gen32_case(ctx, DS, Offset::Sib, Imm32::Imm8),
+        0o105 => gen32_case(ctx, SS, Offset::Reg(EBP), Imm32::Imm8),
+        0o106 => gen32_case(ctx, DS, Offset::Reg(ESI), Imm32::Imm8),
+        0o107 => gen32_case(ctx, DS, Offset::Reg(EDI), Imm32::Imm8),
+
+        0o200 => gen32_case(ctx, DS, Offset::Reg(EAX), Imm32::Imm32),
+        0o201 => gen32_case(ctx, DS, Offset::Reg(ECX), Imm32::Imm32),
+        0o202 => gen32_case(ctx, DS, Offset::Reg(EDX), Imm32::Imm32),
+        0o203 => gen32_case(ctx, DS, Offset::Reg(EBX), Imm32::Imm32),
+        0o204 => gen32_case(ctx, DS, Offset::Sib, Imm32::Imm32),
+        0o205 => gen32_case(ctx, SS, Offset::Reg(EBP), Imm32::Imm32),
+        0o206 => gen32_case(ctx, DS, Offset::Reg(ESI), Imm32::Imm32),
+        0o207 => gen32_case(ctx, DS, Offset::Reg(EDI), Imm32::Imm32),
+
+        _ => assert!(false),
+    }
+}
+
+fn gen_sib(ctx: &mut JitContext, mod_is_nonzero: bool) {
+    let sib_byte = ctx.cpu.read_imm8();
+    let r = sib_byte & 7;
+    let m = sib_byte >> 3 & 7;
+
+    let seg;
+
+    // Generates: get_seg_prefix(seg) + base
+    // Where base is a register or constant
+
+    if r == 4 {
+        seg = SS;
+        let base_addr = ::global_pointers::get_reg32_offset(ESP);
+        wasm_util::load_aligned_i32(&mut ctx.builder.instruction_body, base_addr);
+    }
+    else if r == 5 {
+        if mod_is_nonzero {
+            seg = SS;
+            let base_addr = ::global_pointers::get_reg32_offset(EBP);
+            wasm_util::load_aligned_i32(&mut ctx.builder.instruction_body, base_addr);
+        }
+        else {
+            seg = DS;
+            let base = ctx.cpu.read_imm32();
+            wasm_util::push_i32(&mut ctx.builder.instruction_body, base as i32);
+        }
+    }
+    else {
+        seg = DS;
+        let base_addr = ::global_pointers::get_reg32_offset(r as u32);
+        wasm_util::load_aligned_i32(&mut ctx.builder.instruction_body, base_addr);
+    }
+
+    jit_add_seg_offset(ctx, seg);
+
+    // We now have to generate an offset value to add
+
+    if m == 4 {
+        // offset is 0, we don't need to add anything
+        return;
+    }
+
+    // Offset is reg32s[m] << s, where s is:
+
+    let s = sib_byte >> 6 & 3;
+
+    wasm_util::load_aligned_i32(
+        &mut ctx.builder.instruction_body,
+        ::global_pointers::get_reg32_offset(m as u32),
+    );
+    wasm_util::push_i32(&mut ctx.builder.instruction_body, s as i32);
+    wasm_util::shl_i32(&mut ctx.builder.instruction_body);
+
+    wasm_util::add_i32(&mut ctx.builder.instruction_body);
+}
+
+fn can_optimize_get_seg(ctx: &mut JitContext, segment: u32) -> bool {
+    (segment == DS || segment == SS) && ctx.cpu.has_flat_segmentation()
+}
+
+fn jit_add_seg_offset(ctx: &mut JitContext, default_segment: u32) {
+    let prefix = ctx.cpu.prefixes & PREFIX_MASK_SEGMENT;
+    let seg = if prefix != 0 {
+        prefix - 1
+    }
+    else {
+        default_segment
+    };
+
+    if can_optimize_get_seg(ctx, seg) || prefix == SEG_PREFIX_ZERO {
+        return;
+    }
+
+    wasm_util::push_i32(&mut ctx.builder.instruction_body, seg as i32);
+    wasm_util::call_fn(&mut ctx.builder.instruction_body, ::jit::FN_GET_SEG_IDX);
+    wasm_util::add_i32(&mut ctx.builder.instruction_body);
+}

+ 49 - 0
src/rust/opstats.rs

@@ -0,0 +1,49 @@
+use codegen::gen_increment_mem32;
+use global_pointers;
+use wasmgen::module_init::WasmBuilder;
+
+pub fn gen_opstats(builder: &mut WasmBuilder, mut instruction: u32) {
+    if !cfg!(debug_assertions) {
+        return;
+    }
+    let mut is_0f = false;
+
+    for _ in 0..4 {
+        let opcode = instruction & 0xFF;
+        instruction >>= 8;
+
+        // TODO:
+        // - If instruction depends on middle bits of modrm_byte, split
+        // - Split depending on memory or register variant
+        // - If the instruction uses 4 or more prefixes, only the prefixes will be counted
+
+        if is_0f {
+            gen_increment_mem32(builder, global_pointers::OPSTATS_BUFFER_0F + 4 * opcode);
+            break;
+        }
+        else {
+            gen_increment_mem32(builder, global_pointers::OPSTATS_BUFFER + 4 * opcode);
+
+            if opcode == 0x0F {
+                is_0f = true;
+            }
+            else if opcode == 0x26
+                || opcode == 0x2E
+                || opcode == 0x36
+                || opcode == 0x3E
+                || opcode == 0x64
+                || opcode == 0x65
+                || opcode == 0x66
+                || opcode == 0x67
+                || opcode == 0xF0
+                || opcode == 0xF2
+                || opcode == 0xF3
+            {
+                // prefix
+            }
+            else {
+                break;
+            }
+        }
+    }
+}

+ 8 - 0
src/rust/page.rs

@@ -0,0 +1,8 @@
+#[derive(Copy, Clone, Eq, Hash, PartialEq)]
+pub struct Page(u32);
+impl Page {
+    pub fn page_of(address: u32) -> Page { Page(address >> 12) }
+    pub fn to_address(self) -> u32 { self.0 << 12 }
+
+    pub fn to_u32(self) -> u32 { self.0 }
+}

+ 14 - 0
src/rust/prefix.rs

@@ -0,0 +1,14 @@
+pub const PREFIX_REPZ: u32 = 0b01000;
+pub const PREFIX_REPNZ: u32 = 0b10000;
+
+pub const PREFIX_MASK_OPSIZE: u32 = 0b100000;
+pub const PREFIX_MASK_ADDRSIZE: u32 = 0b1000000;
+
+pub const PREFIX_66: u32 = PREFIX_MASK_OPSIZE;
+pub const PREFIX_67: u32 = PREFIX_MASK_ADDRSIZE;
+pub const PREFIX_F2: u32 = PREFIX_REPNZ;
+pub const PREFIX_F3: u32 = PREFIX_REPZ;
+
+pub const SEG_PREFIX_ZERO: u32 = 7;
+
+pub const PREFIX_MASK_SEGMENT: u32 = 0b111;

+ 63 - 0
src/rust/profiler.rs

@@ -0,0 +1,63 @@
+#[repr(C)]
+#[allow(non_camel_case_types, dead_code)]
+pub enum stat {
+    S_COMPILE,
+    S_COMPILE_SUCCESS,
+    S_COMPILE_CUT_OFF_AT_END_OF_PAGE,
+    S_COMPILE_WITH_LOOP_SAFETY,
+    S_COMPILE_BASIC_BLOCK,
+    S_COMPILE_ENTRY_POINT,
+    S_CACHE_MISMATCH,
+
+    S_RUN_INTERPRETED,
+    S_RUN_INTERPRETED_PENDING,
+    S_RUN_INTERPRETED_NEAR_END_OF_PAGE,
+    S_RUN_INTERPRETED_DIFFERENT_STATE,
+    S_RUN_INTERPRETED_STEPS,
+
+    S_RUN_FROM_CACHE,
+    S_RUN_FROM_CACHE_STEPS,
+
+    S_TRIGGER_CPU_EXCEPTION,
+
+    S_SAFE_READ32_FAST,
+    S_SAFE_READ32_SLOW_PAGE_CROSSED,
+    S_SAFE_READ32_SLOW_NOT_VALID,
+    S_SAFE_READ32_SLOW_NOT_USER,
+    S_SAFE_READ32_SLOW_IN_MAPPED_RANGE,
+
+    S_SAFE_WRITE32_FAST,
+    S_SAFE_WRITE32_SLOW_PAGE_CROSSED,
+    S_SAFE_WRITE32_SLOW_NOT_VALID,
+    S_SAFE_WRITE32_SLOW_NOT_USER,
+    S_SAFE_WRITE32_SLOW_IN_MAPPED_RANGE,
+    S_SAFE_WRITE32_SLOW_READ_ONLY,
+    S_SAFE_WRITE32_SLOW_HAS_CODE,
+
+    S_DO_RUN,
+    S_DO_MANY_CYCLES,
+    S_CYCLE_INTERNAL,
+
+    S_INVALIDATE_PAGE,
+    S_INVALIDATE_CACHE_ENTRY,
+
+    S_NONFAULTING_OPTIMIZATION,
+
+    S_CLEAR_TLB,
+    S_FULL_CLEAR_TLB,
+    S_TLB_FULL,
+    S_TLB_GLOBAL_FULL,
+}
+
+#[cfg(debug_assertions)]
+mod unsafe_extern {
+    extern "C" {
+        pub fn profiler_stat_increment(stat: ::profiler::stat);
+    }
+}
+
+#[cfg(debug_assertions)]
+pub fn stat_increment(stat: stat) { unsafe { unsafe_extern::profiler_stat_increment(stat) } }
+
+#[cfg(not(debug_assertions))]
+pub fn stat_increment(_stat: stat) {}

+ 25 - 0
src/rust/regs.rs

@@ -0,0 +1,25 @@
+pub const ES: u32 = 0;
+pub const CS: u32 = 1;
+pub const SS: u32 = 2;
+pub const DS: u32 = 3;
+pub const FS: u32 = 4;
+pub const GS: u32 = 5;
+
+pub const EAX: u32 = 0;
+pub const ECX: u32 = 1;
+pub const EDX: u32 = 2;
+pub const EBX: u32 = 3;
+pub const ESP: u32 = 4;
+pub const EBP: u32 = 5;
+pub const ESI: u32 = 6;
+pub const EDI: u32 = 7;
+
+// Note: Different from C constants
+pub const AX: u32 = 0;
+pub const CX: u32 = 1;
+pub const DX: u32 = 2;
+pub const BX: u32 = 3;
+pub const SP: u32 = 4;
+pub const BP: u32 = 5;
+pub const SI: u32 = 6;
+pub const DI: u32 = 7;

+ 19 - 0
src/rust/state_flags.rs

@@ -0,0 +1,19 @@
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub struct CachedStateFlags(u32);
+
+impl CachedStateFlags {
+    const MASK_IS_32: u32 = 1 << 0;
+    const MASK_SS32: u32 = 1 << 1;
+    const MASK_CPL3: u32 = 1 << 2;
+    const MASK_FLAT_SEGS: u32 = 1 << 3;
+
+    pub const EMPTY: CachedStateFlags = CachedStateFlags(0);
+
+    pub fn of_u32(f: u32) -> CachedStateFlags { CachedStateFlags(f) }
+    pub fn to_u32(&self) -> u32 { self.0 }
+
+    pub fn cpl3(&self) -> bool { self.0 & CachedStateFlags::MASK_CPL3 != 0 }
+    pub fn has_flat_segmentation(&self) -> bool { self.0 & CachedStateFlags::MASK_FLAT_SEGS != 0 }
+    pub fn is_32(&self) -> bool { self.0 & CachedStateFlags::MASK_IS_32 != 0 }
+    pub fn ssize_32(&self) -> bool { self.0 & CachedStateFlags::MASK_SS32 != 0 }
+}

+ 6 - 0
src/rust/tlb.rs

@@ -0,0 +1,6 @@
+pub const TLB_VALID: u32 = (1 << 0);
+pub const TLB_READONLY: u32 = (1 << 1);
+pub const TLB_NO_USER: u32 = (1 << 2);
+//const TLB_IN_MAPPED_RANGE: u32 = (1 << 3);
+pub const TLB_GLOBAL: u32 = (1 << 4);
+//const TLB_HAS_CODE: u32 = (1 << 5);

+ 0 - 105
src/rust/util.rs

@@ -71,84 +71,6 @@ impl SafeToI32 for usize {
     }
 }
 
-pub fn write_leb_i32(buf: &mut Vec<u8>, mut v: i32) {
-    // Super complex stuff. See the following:
-    // https://en.wikipedia.org/wiki/LEB128#Encode_signed_integer
-    // http://llvm.org/doxygen/LEB128_8h_source.html#l00048
-
-    let mut more = true;
-    let negative = v < 0;
-    let size = 32;
-    while more {
-        let mut byte = (v & 0b1111111) as u8; // get last 7 bits
-        v >>= 7; // shift them away from the value
-        if negative {
-            v |= (!0 as i32) << (size - 7); // extend sign
-        }
-        let sign_bit = byte & (1 << 6);
-        if (v == 0 && sign_bit == 0) || (v == -1 && sign_bit != 0) {
-            more = false;
-        }
-        else {
-            byte |= 0b10000000; // turn on MSB
-        }
-        buf.push(byte);
-    }
-}
-
-pub fn write_leb_u32(buf: &mut Vec<u8>, mut v: u32) {
-    loop {
-        let mut byte = v as u8 & 0b01111111; // get last 7 bits
-        v >>= 7; // shift them away from the value
-        if v != 0 {
-            byte |= 0b10000000; // turn on MSB
-        }
-        buf.push(byte);
-        if v == 0 {
-            break;
-        }
-    }
-}
-
-pub fn write_fixed_leb16_at_idx(vec: &mut Vec<u8>, idx: usize, x: u16) {
-    dbg_assert!(x < (1 << 14)); // we have 14 bits of available space in 2 bytes for leb
-    vec[idx] = ((x & 0b1111111) | 0b10000000) as u8;
-    vec[idx + 1] = (x >> 7) as u8;
-}
-
-pub fn write_fixed_leb32_at_idx(vec: &mut Vec<u8>, idx: usize, x: u32) {
-    dbg_assert!(x < (1 << 28)); // we have 28 bits of available space in 4 bytes for leb
-    vec[idx] = (x & 0b1111111) as u8 | 0b10000000;
-    vec[idx + 1] = (x >> 7 & 0b1111111) as u8 | 0b10000000;
-    vec[idx + 2] = (x >> 14 & 0b1111111) as u8 | 0b10000000;
-    vec[idx + 3] = (x >> 21 & 0b1111111) as u8;
-}
-
-pub type PackedStr = (u64, u64, u64);
-
-#[allow(dead_code)]
-pub fn pack_str(s: &str) -> PackedStr {
-    assert!(s.len() <= 24);
-    let mut a: [u8; 24] = [0; 24];
-    for (i, ch) in s.char_indices() {
-        a[i] = ch as u8;
-    }
-
-    unsafe { ::std::mem::transmute(a) }
-}
-
-pub fn unpack_str(s: PackedStr) -> String {
-    let mut buf = String::with_capacity(24);
-    let bytes: [u8; 24] = unsafe { ::std::mem::transmute(s) };
-    for i in 0..24 {
-        if bytes[i] == 0 {
-            break;
-        }
-        buf.push(bytes[i] as char);
-    }
-    buf
-}
-
 #[allow(dead_code)]
 pub const DEBUG: bool = cfg!(debug_assertions);
 
@@ -169,30 +91,3 @@ pub fn _log_to_js_console<T: ToString>(s: T) {
         log_from_wasm(s.as_bytes().as_ptr(), len);
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use quickcheck::TestResult;
-    use util::*;
-
-    #[test]
-    fn packed_strs() {
-        let pstr = pack_str("foo");
-        assert_eq!("foo", unpack_str(pstr));
-
-        let pstr = pack_str("abcdefghijkl");
-        assert_eq!("abcdefghijkl", unpack_str(pstr));
-    }
-
-    quickcheck! {
-        fn prop(xs: Vec<u8>) -> TestResult {
-            if xs.len() > 24 || xs.contains(&0) { return TestResult::discard(); }
-            let xs = match String::from_utf8(xs) {
-                Ok(x) => x,
-                Err(_) => { return TestResult::discard(); },
-            };
-            TestResult::from_bool(xs == unpack_str(pack_str(&xs)))
-        }
-    }
-
-}

+ 0 - 95
src/rust/wasmgen/c_api.rs

@@ -1,95 +0,0 @@
-use util::PackedStr;
-
-use wasmgen::module_init::get_module;
-pub use wasmgen::module_init::wg_setup;
-
-#[no_mangle]
-pub fn wg_get_code_section() -> *mut Vec<u8> { &mut get_module().code_section }
-
-#[no_mangle]
-pub fn wg_get_instruction_body() -> *mut Vec<u8> { &mut get_module().instruction_body }
-
-#[no_mangle]
-pub fn wg_reset() {
-    let m = get_module();
-    m.reset();
-}
-
-#[no_mangle]
-pub fn wg_finish(no_of_locals_i32: u8) {
-    let m = get_module();
-    m.finish(no_of_locals_i32);
-}
-
-#[no_mangle]
-pub fn wg_get_fn_idx(fn_name_a: u64, fn_name_b: u64, fn_name_c: u64, type_idx: u8) -> u16 {
-    let fn_name: PackedStr = (fn_name_a, fn_name_b, fn_name_c);
-    let m = get_module();
-    m.get_fn_idx(fn_name, type_idx)
-}
-
-#[no_mangle]
-pub fn wg_get_op_ptr() -> *const u8 {
-    let m = get_module();
-    m.get_op_ptr()
-}
-
-#[no_mangle]
-pub fn wg_get_op_len() -> usize {
-    let m = get_module();
-    m.get_op_len()
-}
-
-#[no_mangle]
-pub fn wg_commit_instruction_body_to_cs() {
-    let m = get_module();
-    m.commit_instruction_body_cs();
-}
-
-#[cfg(test)]
-mod tests {
-    use std::fs::File;
-    use std::io::prelude::*;
-    use util::*;
-    use wasmgen::c_api::*;
-    use wasmgen::module_init::*;
-    use wasmgen::wasm_util::*;
-
-    #[test]
-    fn c_api_test() {
-        wg_setup();
-        let m = get_module();
-        let cs = &mut get_module().code_section;
-        let instruction_body = &mut get_module().instruction_body;
-
-        wg_call_fn(cs, m.get_fn_idx(pack_str("foo"), FN0_TYPE_INDEX));
-        wg_call_fn(cs, m.get_fn_idx(pack_str("bar"), FN0_TYPE_INDEX));
-
-        wg_finish(2);
-        wg_reset();
-
-        wg_push_i32(cs, 2);
-        wg_call_fn(
-            instruction_body,
-            m.get_fn_idx(pack_str("baz"), FN1_RET_TYPE_INDEX),
-        );
-        wg_call_fn(
-            instruction_body,
-            m.get_fn_idx(pack_str("foo"), FN1_TYPE_INDEX),
-        );
-
-        wg_commit_instruction_body_to_cs();
-
-        wg_finish(0);
-
-        let op_ptr = wg_get_op_ptr();
-        let op_len = wg_get_op_len();
-        dbg_log!("op_ptr: {:?}, op_len: {:?}", op_ptr, op_len);
-
-        let mut f =
-            File::create("build/wg_dummy_output.wasm").expect("creating wg_dummy_output.wasm");
-        f.write_all(&get_module().output)
-            .expect("write wg_dummy_output.wasm");
-    }
-
-}

+ 1 - 3
src/rust/wasmgen/mod.rs

@@ -1,5 +1,3 @@
-mod module_init;
+pub mod module_init;
 mod wasm_opcodes;
-
-pub mod c_api;
 pub mod wasm_util;

+ 46 - 33
src/rust/wasmgen/module_init.rs

@@ -1,10 +1,5 @@
-use std::mem;
-use std::ptr::NonNull;
-
-use util::{
-    unpack_str, write_fixed_leb16_at_idx, write_fixed_leb32_at_idx, write_leb_u32, PackedStr,
-    SafeToU16, SafeToU8,
-};
+use leb::{write_fixed_leb16_at_idx, write_fixed_leb32_at_idx, write_leb_u32};
+use util::{SafeToU16, SafeToU8};
 use wasmgen::wasm_opcodes as op;
 
 #[allow(dead_code)]
@@ -25,20 +20,6 @@ pub const FN2_RET_TYPE_INDEX: u8 = 6;
 
 pub const NR_FN_TYPE_INDEXES: u8 = 7;
 
-static mut MODULE_PTR: NonNull<WasmBuilder> =
-    unsafe { NonNull::new_unchecked(mem::align_of::<WasmBuilder>() as *mut _) };
-
-#[no_mangle]
-pub fn wg_setup() {
-    let wm = Box::new(WasmBuilder::new());
-    unsafe {
-        MODULE_PTR = NonNull::new(Box::into_raw(wm)).expect("assigning module ptr");
-    }
-    get_module().init();
-}
-
-pub fn get_module<'a>() -> &'a mut WasmBuilder { unsafe { MODULE_PTR.as_mut() } }
-
 pub struct WasmBuilder {
     pub output: Vec<u8>,
     pub code_section: Vec<u8>,
@@ -200,8 +181,7 @@ impl WasmBuilder {
     }
 
     /// Goes over the import block to find index of an import entry by function name
-    pub fn get_import_index(&self, fn_name: PackedStr) -> Option<u16> {
-        let fn_name = unpack_str(fn_name);
+    pub fn get_import_index(&self, fn_name: &str) -> Option<u16> {
         let mut offset = self.idx_import_entries;
         for i in 0..self.import_count {
             offset += 1; // skip length of module name
@@ -269,10 +249,9 @@ impl WasmBuilder {
         self.set_import_table_size(new_table_size);
     }
 
-    pub fn write_import_entry(&mut self, fn_name: PackedStr, type_index: u8) -> u16 {
+    pub fn write_import_entry(&mut self, fn_name: &str, type_index: u8) -> u16 {
         self.output.push(1); // length of module name
         self.output.push('e' as u8); // module name
-        let fn_name = unpack_str(fn_name);
         self.output.push(fn_name.len().safe_to_u8());
         self.output.extend(fn_name.as_bytes());
         self.output.push(op::EXT_FUNCTION);
@@ -314,7 +293,7 @@ impl WasmBuilder {
         write_fixed_leb16_at_idx(&mut self.output, next_op_idx, self.import_count - 1);
     }
 
-    pub fn get_fn_idx(&mut self, fn_name: PackedStr, type_index: u8) -> u16 {
+    pub fn get_fn_idx(&mut self, fn_name: &str, type_index: u8) -> u16 {
         match self.get_import_index(fn_name) {
             Some(idx) => idx,
             None => {
@@ -326,26 +305,60 @@ impl WasmBuilder {
 
     pub fn get_op_ptr(&self) -> *const u8 { self.output.as_ptr() }
 
-    pub fn get_op_len(&self) -> usize { self.output.len() }
+    pub fn get_op_len(&self) -> u32 { self.output.len() as u32 }
 
-    pub fn commit_instruction_body_cs(&mut self) {
+    pub fn commit_instruction_body_to_cs(&mut self) {
         self.code_section.append(&mut self.instruction_body);
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use util::pack_str;
+    use std::fs::File;
+    use std::io::prelude::*;
     use wasmgen::module_init::*;
+    use wasmgen::wasm_util::*;
 
     #[test]
     fn import_table_management() {
         let mut w = WasmBuilder::new();
         w.init();
-        assert_eq!(0, w.get_fn_idx(pack_str("foo"), FN0_TYPE_INDEX));
-        assert_eq!(1, w.get_fn_idx(pack_str("bar"), FN1_TYPE_INDEX));
-        assert_eq!(0, w.get_fn_idx(pack_str("foo"), FN0_TYPE_INDEX));
-        assert_eq!(2, w.get_fn_idx(pack_str("baz"), FN2_TYPE_INDEX));
+        assert_eq!(0, w.get_fn_idx("foo", FN0_TYPE_INDEX));
+        assert_eq!(1, w.get_fn_idx("bar", FN1_TYPE_INDEX));
+        assert_eq!(0, w.get_fn_idx("foo", FN0_TYPE_INDEX));
+        assert_eq!(2, w.get_fn_idx("baz", FN2_TYPE_INDEX));
     }
 
+    #[test]
+    fn builder_test() {
+        let mut m = WasmBuilder::new();
+        m.init();
+
+        let mut foo_index = m.get_fn_idx("foo", FN0_TYPE_INDEX);
+        call_fn(&mut m.code_section, foo_index);
+
+        let bar_index = m.get_fn_idx("bar", FN0_TYPE_INDEX);
+        call_fn(&mut m.code_section, bar_index);
+
+        m.finish(2);
+        m.reset();
+
+        push_i32(&mut m.code_section, 2);
+
+        let baz_index = m.get_fn_idx("baz", FN1_RET_TYPE_INDEX);
+        call_fn(&mut m.instruction_body, baz_index);
+        foo_index = m.get_fn_idx("foo", FN1_TYPE_INDEX);
+        call_fn(&mut m.instruction_body, foo_index);
+
+        m.commit_instruction_body_to_cs();
+
+        m.finish(0);
+
+        let op_ptr = m.get_op_ptr();
+        let op_len = m.get_op_len();
+        dbg_log!("op_ptr: {:?}, op_len: {:?}", op_ptr, op_len);
+
+        let mut f = File::create("build/dummy_output.wasm").expect("creating dummy_output.wasm");
+        f.write_all(&m.output).expect("write dummy_output.wasm");
+    }
 }

+ 58 - 113
src/rust/wasmgen/wasm_util.rs

@@ -1,20 +1,12 @@
-use util::{write_fixed_leb16_at_idx, write_leb_i32, write_leb_u32};
+use leb::{write_fixed_leb16_at_idx, write_leb_i32, write_leb_u32};
 use wasmgen::wasm_opcodes as op;
 
-#[no_mangle]
-pub fn wg_push_i32(buf: &mut Vec<u8>, v: i32) {
+pub fn push_i32(buf: &mut Vec<u8>, v: i32) {
     buf.push(op::OP_I32CONST);
     write_leb_i32(buf, v);
 }
 
-#[no_mangle]
-pub fn wg_push_u32(buf: &mut Vec<u8>, v: u32) {
-    buf.push(op::OP_I32CONST);
-    write_leb_u32(buf, v);
-}
-
-#[no_mangle]
-pub fn wg_load_aligned_u16(buf: &mut Vec<u8>, addr: u32) {
+pub fn load_aligned_u16(buf: &mut Vec<u8>, addr: u32) {
     // doesn't cause a failure in the generated code, but it will be much slower
     dbg_assert!((addr & 1) == 0);
 
@@ -25,43 +17,35 @@ pub fn wg_load_aligned_u16(buf: &mut Vec<u8>, addr: u32) {
     buf.push(0); // immediate offset
 }
 
-#[no_mangle]
-pub fn wg_load_aligned_i32(buf: &mut Vec<u8>, addr: u32) {
+pub fn load_aligned_i32(buf: &mut Vec<u8>, addr: u32) {
     // doesn't cause a failure in the generated code, but it will be much slower
     dbg_assert!((addr & 3) == 0);
 
-    wg_push_i32(buf, addr as i32);
-    wg_load_aligned_i32_from_stack(buf, 0);
+    push_i32(buf, addr as i32);
+    load_aligned_i32_from_stack(buf, 0);
 }
 
-#[no_mangle]
-pub fn wg_store_aligned_u16(buf: &mut Vec<u8>) {
+pub fn store_aligned_u16(buf: &mut Vec<u8>) {
     buf.push(op::OP_I32STORE16);
     buf.push(op::MEM_ALIGN16);
     buf.push(0); // immediate offset
 }
 
-#[no_mangle]
-pub fn wg_store_aligned_i32(buf: &mut Vec<u8>) {
+pub fn store_aligned_i32(buf: &mut Vec<u8>) {
     buf.push(op::OP_I32STORE);
     buf.push(op::MEM_ALIGN32);
     buf.push(0); // immediate offset
 }
 
-#[no_mangle]
-pub fn wg_add_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32ADD); }
+pub fn add_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32ADD); }
 
-#[no_mangle]
-pub fn wg_and_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32AND); }
+pub fn and_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32AND); }
 
-#[no_mangle]
-pub fn wg_or_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32OR); }
+pub fn or_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32OR); }
 
-#[no_mangle]
-pub fn wg_shl_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32SHL); }
+pub fn shl_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32SHL); }
 
-#[no_mangle]
-pub fn wg_call_fn(buf: &mut Vec<u8>, fn_idx: u16) {
+pub fn call_fn(buf: &mut Vec<u8>, fn_idx: u16) {
     buf.push(op::OP_CALL);
     let buf_len = buf.len();
     buf.push(0);
@@ -69,60 +53,37 @@ pub fn wg_call_fn(buf: &mut Vec<u8>, fn_idx: u16) {
     write_fixed_leb16_at_idx(buf, buf_len, fn_idx);
 }
 
-#[no_mangle]
-pub fn wg_call_fn_with_arg(buf: &mut Vec<u8>, fn_idx: u16, arg0: i32) {
-    wg_push_i32(buf, arg0);
-    wg_call_fn(buf, fn_idx);
-}
-
-#[no_mangle]
-pub fn wg_eq_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32EQ); }
+pub fn eq_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32EQ); }
 
-#[no_mangle]
-pub fn wg_ne_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32NE); }
+pub fn ne_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32NE); }
 
-#[no_mangle]
-pub fn wg_le_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32LES); }
+pub fn le_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32LES); }
 
-#[no_mangle]
-pub fn wg_lt_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32LTS); }
+pub fn lt_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32LTS); }
 
-#[no_mangle]
-pub fn wg_ge_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32GES); }
+pub fn ge_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32GES); }
 
-#[no_mangle]
-pub fn wg_gt_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32GTS); }
+pub fn gt_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32GTS); }
 
-#[no_mangle]
-pub fn wg_if_i32(buf: &mut Vec<u8>) {
+pub fn if_i32(buf: &mut Vec<u8>) {
     buf.push(op::OP_IF);
     buf.push(op::TYPE_I32);
 }
 
-#[no_mangle]
-pub fn wg_block_i32(buf: &mut Vec<u8>) {
+pub fn block_i32(buf: &mut Vec<u8>) {
     buf.push(op::OP_BLOCK);
     buf.push(op::TYPE_I32);
 }
 
-#[no_mangle]
-pub fn wg_tee_local(buf: &mut Vec<u8>, idx: i32) {
-    buf.push(op::OP_TEELOCAL);
-    write_leb_i32(buf, idx);
-}
-
-#[no_mangle]
-pub fn wg_xor_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32XOR); }
+pub fn xor_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32XOR); }
 
-#[no_mangle]
-pub fn wg_load_unaligned_i32_from_stack(buf: &mut Vec<u8>, byte_offset: u32) {
+pub fn load_unaligned_i32_from_stack(buf: &mut Vec<u8>, byte_offset: u32) {
     buf.push(op::OP_I32LOAD);
     buf.push(op::MEM_NO_ALIGN);
     write_leb_u32(buf, byte_offset);
 }
 
-#[no_mangle]
-pub fn wg_load_aligned_i32_from_stack(buf: &mut Vec<u8>, byte_offset: u32) {
+pub fn load_aligned_i32_from_stack(buf: &mut Vec<u8>, byte_offset: u32) {
     buf.push(op::OP_I32LOAD);
     buf.push(op::MEM_ALIGN32);
     write_leb_u32(buf, byte_offset);
@@ -130,101 +91,85 @@ pub fn wg_load_aligned_i32_from_stack(buf: &mut Vec<u8>, byte_offset: u32) {
 
 // XXX: Function naming should be consistent regarding both alignment and accepting an
 // offset. Leaving as-is for the Rust port to cleanup
-#[no_mangle]
-pub fn wg_store_unaligned_i32(buf: &mut Vec<u8>, byte_offset: u32) {
+pub fn store_unaligned_i32(buf: &mut Vec<u8>, byte_offset: u32) {
     buf.push(op::OP_I32STORE);
     buf.push(op::MEM_NO_ALIGN);
     write_leb_u32(buf, byte_offset);
 }
 
-#[no_mangle]
-pub fn wg_shr_u32(buf: &mut Vec<u8>) { buf.push(op::OP_I32SHRU); }
+pub fn shr_u32(buf: &mut Vec<u8>) { buf.push(op::OP_I32SHRU); }
 
-#[no_mangle]
-pub fn wg_shr_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32SHRS); }
+pub fn shr_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32SHRS); }
 
-#[no_mangle]
-pub fn wg_eqz_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32EQZ); }
+pub fn eqz_i32(buf: &mut Vec<u8>) { buf.push(op::OP_I32EQZ); }
 
-#[no_mangle]
-pub fn wg_if_void(buf: &mut Vec<u8>) {
+pub fn if_void(buf: &mut Vec<u8>) {
     buf.push(op::OP_IF);
     buf.push(op::TYPE_VOID_BLOCK);
 }
 
-#[no_mangle]
-pub fn wg_else(buf: &mut Vec<u8>) { buf.push(op::OP_ELSE); }
+pub fn else_(buf: &mut Vec<u8>) { buf.push(op::OP_ELSE); }
 
-#[no_mangle]
-pub fn wg_loop_void(buf: &mut Vec<u8>) {
+pub fn loop_void(buf: &mut Vec<u8>) {
     buf.push(op::OP_LOOP);
     buf.push(op::TYPE_VOID_BLOCK);
 }
 
-#[no_mangle]
-pub fn wg_block_void(buf: &mut Vec<u8>) {
+pub fn block_void(buf: &mut Vec<u8>) {
     buf.push(op::OP_BLOCK);
     buf.push(op::TYPE_VOID_BLOCK);
 }
 
-#[no_mangle]
-pub fn wg_block_end(buf: &mut Vec<u8>) { buf.push(op::OP_END); }
+pub fn block_end(buf: &mut Vec<u8>) { buf.push(op::OP_END); }
 
-#[no_mangle]
-pub fn wg_return(buf: &mut Vec<u8>) { buf.push(op::OP_RETURN); }
+pub fn return_(buf: &mut Vec<u8>) { buf.push(op::OP_RETURN); }
 
-#[no_mangle]
-pub fn wg_drop(buf: &mut Vec<u8>) { buf.push(op::OP_DROP); }
+pub fn drop(buf: &mut Vec<u8>) { buf.push(op::OP_DROP); }
 
 // Generate a br_table where an input of [i] will branch [i]th outer block,
 // where [i] is passed on the wasm stack
-#[no_mangle]
-pub fn wg_brtable_and_cases(buf: &mut Vec<u8>, cases_count: i32) {
-    assert!(cases_count >= 0);
-
+pub fn brtable_and_cases(buf: &mut Vec<u8>, cases_count: u32) {
     buf.push(op::OP_BRTABLE);
-    write_leb_u32(buf, cases_count as u32);
+    write_leb_u32(buf, cases_count);
 
     for i in 0..(cases_count + 1) {
-        write_leb_u32(buf, i as u32);
+        write_leb_u32(buf, i);
     }
 }
 
-#[no_mangle]
-pub fn wg_br(buf: &mut Vec<u8>, depth: i32) {
+pub fn br(buf: &mut Vec<u8>, depth: u32) {
     buf.push(op::OP_BR);
-    write_leb_i32(buf, depth);
+    write_leb_u32(buf, depth);
 }
 
-#[no_mangle]
-pub fn wg_get_local(buf: &mut Vec<u8>, idx: i32) {
+pub fn get_local(buf: &mut Vec<u8>, idx: u32) {
     buf.push(op::OP_GETLOCAL);
-    write_leb_i32(buf, idx);
+    write_leb_u32(buf, idx);
 }
 
-#[no_mangle]
-pub fn wg_set_local(buf: &mut Vec<u8>, idx: i32) {
+pub fn set_local(buf: &mut Vec<u8>, idx: u32) {
     buf.push(op::OP_SETLOCAL);
-    write_leb_i32(buf, idx);
+    write_leb_u32(buf, idx);
+}
+
+pub fn tee_local(buf: &mut Vec<u8>, idx: u32) {
+    buf.push(op::OP_TEELOCAL);
+    write_leb_u32(buf, idx);
 }
 
-#[no_mangle]
-pub fn wg_unreachable(buf: &mut Vec<u8>) { buf.push(op::OP_UNREACHABLE); }
+pub fn unreachable(buf: &mut Vec<u8>) { buf.push(op::OP_UNREACHABLE); }
 
-#[no_mangle]
-pub fn wg_increment_mem32(buf: &mut Vec<u8>, addr: i32) { wg_increment_variable(buf, addr, 1) }
+pub fn increment_mem32(buf: &mut Vec<u8>, addr: u32) { increment_variable(buf, addr, 1) }
 
-#[no_mangle]
-pub fn wg_increment_variable(buf: &mut Vec<u8>, addr: i32, n: i32) {
-    wg_push_i32(buf, addr);
-    wg_load_aligned_i32(buf, addr as u32);
-    wg_push_i32(buf, n);
-    wg_add_i32(buf);
-    wg_store_aligned_i32(buf);
+pub fn increment_variable(buf: &mut Vec<u8>, addr: u32, n: i32) {
+    push_i32(buf, addr as i32);
+    load_aligned_i32(buf, addr);
+    push_i32(buf, n);
+    add_i32(buf);
+    store_aligned_i32(buf);
 }
 
-#[no_mangle]
-pub fn wg_load_aligned_u16_from_stack(buf: &mut Vec<u8>, byte_offset: u32) {
+pub fn load_aligned_u16_from_stack(buf: &mut Vec<u8>, byte_offset: u32) {
     buf.push(op::OP_I32LOAD16U);
     buf.push(op::MEM_ALIGN16);
     write_leb_u32(buf, byte_offset);

+ 1 - 1
tests/expect/run.js

@@ -192,7 +192,7 @@ Hint: Use tests/expect/run.js --interactive to interactively accept changes.
             const START_ADDRESS = 0x1000;
 
             cpu.mem8.set(executable, START_ADDRESS);
-            cpu.jit_force_generate_unsafe(START_ADDRESS);
+            cpu.jit_force_generate(START_ADDRESS);
         });
 }
 

+ 4 - 4
tests/expect/tests/mov32-mem.wast

@@ -56,7 +56,7 @@
                 (i32.const 4095))
               (i32.const 4092)))
           (then
-            (i32.store offset=12582912 align=1
+            (i32.store offset=8388608 align=1
               (tee_local $l4
                 (i32.xor
                   (i32.and
@@ -104,7 +104,7 @@
                 (i32.const 4095))
               (i32.const 4092)))
           (then
-            (i32.store offset=12582912 align=1
+            (i32.store offset=8388608 align=1
               (tee_local $l4
                 (i32.xor
                   (i32.and
@@ -150,7 +150,7 @@
                   (i32.const 4095))
                 (i32.const 4092)))
             (then
-              (i32.load offset=12582912 align=1
+              (i32.load offset=8388608 align=1
                 (i32.xor
                   (i32.and
                     (get_local $l3)
@@ -193,7 +193,7 @@
                   (i32.const 4095))
                 (i32.const 4092)))
             (then
-              (i32.load offset=12582912 align=1
+              (i32.load offset=8388608 align=1
                 (i32.xor
                   (i32.and
                     (get_local $l3)

+ 1 - 1
tests/nasm/run.js

@@ -216,7 +216,7 @@ else {
 
         if(FORCE_JIT)
         {
-            cpu.jit_force_generate_unsafe(cpu.instruction_pointer[0]);
+            cpu.jit_force_generate(cpu.instruction_pointer[0]);
 
             cpu.test_hook_did_finalize_wasm = function()
             {

+ 1 - 1
tests/rust/verify-wasmgen-dummy-output.js

@@ -6,7 +6,7 @@ process.on("unhandledRejection", exn => { throw exn; });
 const fs = require("fs");
 const path = require("path");
 
-const DUMMY_MODULE_PATH = path.resolve(__dirname, "../../build/wg_dummy_output.wasm");
+const DUMMY_MODULE_PATH = path.resolve(__dirname, "../../build/dummy_output.wasm");
 const dummy_module = fs.readFileSync(DUMMY_MODULE_PATH);
 
 const wm = new WebAssembly.Module(dummy_module);

Some files were not shown because too many files changed in this diff