ソースを参照

New block analysis, generation of state machine with multiple basic blocks

This commit consists of three components:

1. A new generated x86-parser that analyses instructions. For now, it
   only detects the control flow of an instruction: Whether it is a
   (conditional) jump, a normal instruction or a basic block boundary
2. A new function, jit_find_basic_blocks, that finds and connects basic
   blocks using 1. It loosely finds all basic blocks making up a function,
   i.e. it doesn't follow call or return instructions (but it does follow
   all near jumps). Different from our previous analysis, it also finds
   basic blocks in the strict sense that no basic block contains a jump
   into the middle of another basic block
3. A new code-generating function, jit_generate, that takes the output
   of 2 as input. It generates a state machine:
   - Each basic block becomes a case block in a switch-table
   - Each basic block ends with setting a state variable for the following basic block
   - The switch-table is inside a while(true) loop, which is terminated
     by return statements in basic blocks which are leaves

Additionally:
- Block linking has been removed as it is (mostly) obsoleted by these
  changes. It may later be reactived for call instructions
- The code generator API has been extended to generate the code for the state machine
- The iterations of the state machine are limited in order to avoid
  infinite loops that can't be interrupted
Fabian 6 年 前
コミット
f8349af093

+ 11 - 1
Makefile

@@ -4,12 +4,15 @@ BROWSER=chromium
 NASM_TEST_DIR=./tests/nasm
 COVERAGE_DIR=./tests/coverage
 
-INSTRUCTION_TABLES=build/jit.c build/jit0f_16.c build/jit0f_32.c build/interpreter.c build/interpreter0f_16.c build/interpreter0f_32.c
+INSTRUCTION_TABLES=build/jit.c build/jit0f_16.c build/jit0f_32.c \
+		   build/interpreter.c build/interpreter0f_16.c build/interpreter0f_32.c \
+		   build/analyzer.c build/analyzer0f_16.c build/analyzer0f_32.c \
 
 # Only the dependencies common to both generate_{jit,interpreter}.js
 GEN_DEPENDENCIES=$(filter-out gen/generate_interpreter.js gen/generate_jit.js, $(wildcard gen/*.js))
 JIT_DEPENDENCIES=$(GEN_DEPENDENCIES) gen/generate_jit.js
 INTERPRETER_DEPENDENCIES=$(GEN_DEPENDENCIES) gen/generate_interpreter.js
+ANALYZER_DEPENDENCIES=$(GEN_DEPENDENCIES) gen/generate_analyzer.js
 
 # Enable manually and recompile v86-debug.wasm for coverage-enabled tests
 ifeq ($(ENABLE_COV), 1)
@@ -181,6 +184,13 @@ build/interpreter0f_16.c: $(INTERPRETER_DEPENDENCIES)
 build/interpreter0f_32.c: $(INTERPRETER_DEPENDENCIES)
 	./gen/generate_interpreter.js --output-dir build/ --table interpreter0f_32
 
+build/analyzer.c: $(ANALYZER_DEPENDENCIES)
+	./gen/generate_analyzer.js --output-dir build/ --table analyzer
+build/analyzer0f_16.c: $(ANALYZER_DEPENDENCIES)
+	./gen/generate_analyzer.js --output-dir build/ --table analyzer0f_16
+build/analyzer0f_32.c: $(ANALYZER_DEPENDENCIES)
+	./gen/generate_analyzer.js --output-dir build/ --table analyzer0f_32
+
 .PHONY: phony
 build/JIT_ALWAYS: phony
 	@if [[ `cat build/JIT_ALWAYS 2>&1` != '$(JIT_ALWAYS)' ]]; then \

+ 574 - 0
gen/generate_analyzer.js

@@ -0,0 +1,574 @@
+#!/usr/bin/env node
+"use strict";
+
+const fs = require("fs");
+const path = require("path");
+const encodings = require("./x86_table");
+const c_ast = require("./c_ast");
+const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table } = require("./util");
+
+const APPEND_NONFAULTING_FLAG = "analysis.flags |= JIT_INSTR_NONFAULTING_FLAG;";
+
+const OUT_DIR = get_switch_value("--output-dir") || path.join(__dirname, "..", "build");
+
+mkdirpSync(OUT_DIR);
+
+const table_arg = get_switch_value("--table");
+const gen_all = get_switch_exist("--all");
+const to_generate = {
+    analyzer: gen_all || table_arg === "analyzer",
+    analyzer0f_16: gen_all || table_arg === "analyzer0f_16",
+    analyzer0f_32: gen_all || table_arg === "analyzer0f_32",
+};
+
+console.assert(
+    Object.keys(to_generate).some(k => to_generate[k]),
+    "Pass --table [analyzer|analyzer0f_16|analyzer0f_32] or --all to pick which tables to generate"
+);
+
+gen_table();
+
+function gen_read_imm_call(op, size_variant)
+{
+    let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
+
+    if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
+    {
+        if(op.imm8)
+        {
+            return "read_imm8()";
+        }
+        else if(op.imm8s)
+        {
+            return "read_imm8s()";
+        }
+        else
+        {
+            if(op.immaddr)
+            {
+                // immaddr: depends on address size
+                return "read_moffs()";
+            }
+            else
+            {
+                console.assert(op.imm1632 || op.imm16 || op.imm32);
+
+                if(op.imm1632 && size === 16 || op.imm16)
+                {
+                    return "read_imm16()";
+                }
+                else
+                {
+                    console.assert(op.imm1632 && size === 32 || op.imm32);
+                    return "read_imm32s()";
+                }
+            }
+        }
+    }
+    else
+    {
+        return undefined;
+    }
+}
+
+function gen_call(name, args)
+{
+    args = args || [];
+    return `${name}(${args.join(", ")});`;
+}
+
+function gen_codegen_call(args)
+{
+    return args.map(arg => arg + ";");
+}
+
+function gen_codegen_call_modrm(args)
+{
+    args = args.map(arg => arg + ";");
+    return [].concat(gen_call("modrm_skip", ["modrm_byte"]), args);
+}
+
+function gen_modrm_mem_reg_split(mem_args, reg_args, postfixes={})
+{
+    const { mem_postfix=[], reg_postfix=[] } = postfixes;
+
+    return {
+        type: "if-else",
+        if_blocks: [{
+            condition: "modrm_byte < 0xC0",
+            body: []
+                .concat(gen_codegen_call_modrm(mem_args))
+                .concat(mem_postfix),
+        }],
+        else_block: {
+            body: gen_codegen_call(reg_args).concat(reg_postfix),
+        },
+    };
+}
+
+/*
+ * Current naming scheme:
+ * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
+ */
+
+function make_instruction_name(encoding, size, prefix_variant)
+{
+    const suffix = encoding.os ? String(size) : "";
+    const opcode_hex = hex(encoding.opcode & 0xFF, 2);
+    const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
+    const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
+    const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
+
+    return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
+}
+
+function get_nonfaulting_mem_reg_postfix(encoding)
+{
+    const lea_special_case = encoding.opcode === 0x8D;
+    const mem_postfix = (encoding.nonfaulting && lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
+    const reg_postfix = (encoding.nonfaulting && !lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
+
+    return {
+        mem_postfix,
+        reg_postfix,
+    };
+}
+
+function gen_instruction_body(encodings, size)
+{
+    const encoding = encodings[0];
+
+    let has_66 = false;
+    let has_F2 = false;
+    let has_F3 = false;
+
+    for(let e of encodings)
+    {
+        if((e.opcode >>> 16) === 0x66) has_66 = true;
+        if((e.opcode >>> 16) === 0xF2) has_F2 = true;
+        if((e.opcode >>> 16) === 0xF3) has_F3 = true;
+    }
+
+    console.assert(
+        !encodings.some(e => e.nonfaulting && e.block_boundary),
+        "Unsupported: instruction cannot be both a jump and nonfaulting. Opcode: 0x" + hex(encoding.opcode)
+    );
+
+    if(has_66 || has_F2 || has_F3)
+    {
+        console.assert((encoding.opcode & 0xFF00) === 0x0F00);
+    }
+
+    const instruction_postfix = encoding.block_boundary ? ["analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [];
+
+    if(encoding.fixed_g !== undefined)
+    {
+        // instruction with modrm byte where the middle 3 bits encode the instruction
+
+        // group by opcode without prefix plus middle bits of modrm byte
+        let cases = encodings.reduce((cases_by_opcode, case_) => {
+            console.assert(typeof case_.fixed_g === "number");
+            cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
+            return cases_by_opcode;
+        }, Object.create(null));
+        cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
+
+        return [
+            "int32_t modrm_byte = read_imm8();",
+            {
+                type: "switch",
+                condition: "modrm_byte >> 3 & 7",
+                cases: cases.map(case_ => {
+                    const fixed_g = case_.fixed_g;
+                    const instruction_postfix = case_.block_boundary ? ["analysis.flags |=  JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [];
+
+                    const mem_args = [];
+                    const reg_args = [];
+
+                    const imm_read = gen_read_imm_call(case_, size);
+
+                    if(imm_read)
+                    {
+                        mem_args.push(imm_read);
+                        reg_args.push(imm_read);
+                    }
+
+                    if(has_66 || has_F2 || has_F3)
+                    {
+                        const if_blocks = [];
+
+                        if(has_66) {
+                            const name = make_instruction_name(case_, size, 0x66);
+                            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
+                            if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
+                        }
+                        if(has_F2) {
+                            const name = make_instruction_name(case_, size, 0xF2);
+                            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
+                            if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
+                        }
+                        if(has_F3) {
+                            const name = make_instruction_name(case_, size, 0xF3);
+                            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
+                            if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
+                        }
+
+                        const else_block = {
+                            body: [
+                                gen_modrm_mem_reg_split(
+                                    mem_args,
+                                    reg_args,
+                                    {}
+                                )
+                            ],
+                        };
+
+                        return {
+                            conditions: [fixed_g],
+                            body: [
+                                "int32_t prefixes_ = *prefixes;",
+                                {
+                                    type: "if-else",
+                                    if_blocks,
+                                    else_block,
+                                },
+                            ].concat(instruction_postfix),
+                        };
+                    }
+                    else
+                    {
+                        const body = [
+                            gen_modrm_mem_reg_split(
+                                mem_args,
+                                reg_args,
+                                get_nonfaulting_mem_reg_postfix(case_)
+                            )
+                        ].concat(instruction_postfix);
+
+                        return {
+                            conditions: [fixed_g],
+                            body,
+                        };
+                    }
+                }),
+
+                default_case: {
+                    body: [
+                        "assert(false);",
+                        "analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;",
+                    ],
+                }
+            },
+        ].concat(instruction_postfix);
+    }
+    else if(has_66 || has_F2 || has_F3)
+    {
+        // instruction without modrm byte but with prefix
+
+        console.assert(encoding.e);
+        console.assert(!encoding.ignore_mod);
+
+        const imm_read = gen_read_imm_call(encoding, size);
+
+        const mem_args = [];
+        const reg_args = [];
+
+        if(imm_read)
+        {
+            mem_args.push(imm_read);
+            reg_args.push(imm_read);
+        }
+
+        const if_blocks = [];
+
+        if(has_66) {
+            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
+            if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
+        }
+        if(has_F2) {
+            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
+            if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
+        }
+        if(has_F3) {
+            const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
+            if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
+        }
+
+        const else_block = {
+            body: [
+                gen_modrm_mem_reg_split(
+                    mem_args,
+                    reg_args,
+                    {}
+                )
+            ],
+        };
+
+        return [
+            "int32_t modrm_byte = read_imm8();",
+            "int32_t prefixes_ = *prefixes;",
+            {
+                type: "if-else",
+                if_blocks,
+                else_block,
+            }
+        ].concat(instruction_postfix);
+    }
+    else if(encoding.fixed_g === undefined && encoding.e)
+    {
+        // instruction with modrm byte where the middle 3 bits encode a register
+
+        console.assert(encodings.length === 1);
+
+        const imm_read = gen_read_imm_call(encoding, size);
+
+        if(encoding.ignore_mod)
+        {
+            console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
+
+            // Has modrm byte, but the 2 mod bits are ignored and both
+            // operands are always registers (0f20-0f24)
+
+            if(encoding.nonfaulting)
+            {
+                instruction_postfix.push(APPEND_NONFAULTING_FLAG);
+            }
+
+            return ["int32_t modrm_byte = read_imm8();"]
+                .concat(gen_codegen_call([]))
+                .concat(instruction_postfix);
+        }
+        else
+        {
+            const mem_args = [];
+            const reg_args = [];
+
+            if(imm_read)
+            {
+                mem_args.push(imm_read);
+                reg_args.push(imm_read);
+            }
+
+            return [
+                "int32_t modrm_byte = read_imm8();",
+                gen_modrm_mem_reg_split(
+                    mem_args,
+                    reg_args,
+                    get_nonfaulting_mem_reg_postfix(encoding)
+                ),
+            ].concat(instruction_postfix);
+        }
+    }
+    else if(encoding.prefix)
+    {
+        console.assert(!encoding.nonfaulting, "Prefix/custom instructions cannot be marked as nonfaulting.");
+
+        const instruction_name = make_instruction_name(encoding, size) + "_analyze";
+        const imm_read = gen_read_imm_call(encoding, size);
+        const args = [];
+
+        if(imm_read)
+        {
+            args.push(imm_read);
+        }
+
+        const call_prefix = encoding.prefix ? "return " : "";
+        // Prefix calls can add to the return flags
+        return [call_prefix + gen_call(instruction_name, args)].concat(instruction_postfix);
+    }
+    else
+    {
+        // instruction without modrm byte or prefix
+
+        const imm_read = gen_read_imm_call(encoding, size);
+
+        const args = [];
+
+        if(imm_read)
+        {
+            if(encoding.jump_offset_imm)
+            {
+                args.push("int32_t jump_offset = " + imm_read + ";");
+                args.push(`
+                    analysis.jump_target = is_osize_32() ?
+                        *instruction_pointer + jump_offset :
+                        get_seg_cs() + ((*instruction_pointer - get_seg_cs() + jump_offset) & 0xFFFF);`);
+            }
+
+            else
+            {
+                args.push(imm_read + ";");
+            }
+        }
+
+        if(encoding.extra_imm16)
+        {
+            console.assert(imm_read);
+            args.push(gen_call("read_imm16"));
+        }
+        else if(encoding.extra_imm8)
+        {
+            console.assert(imm_read);
+            args.push(gen_call("read_imm8"));
+        }
+
+        if(encoding.nonfaulting)
+        {
+            instruction_postfix.push(APPEND_NONFAULTING_FLAG);
+        }
+
+        if(encoding.conditional_jump)
+        {
+            instruction_postfix.push("analysis.condition_index = " + (encoding.opcode & 0xF) + ";");
+        }
+
+        return args.concat(instruction_postfix);
+    }
+}
+
+function gen_table()
+{
+    let by_opcode = Object.create(null);
+    let by_opcode0f = Object.create(null);
+
+    for(let o of encodings)
+    {
+        let opcode = o.opcode;
+
+        if(opcode >= 0x100)
+        {
+            if((opcode & 0xFF00) === 0x0F00)
+            {
+                opcode &= 0xFF;
+                by_opcode0f[opcode] = by_opcode0f[opcode] || [];
+                by_opcode0f[opcode].push(o);
+            }
+        }
+        else
+        {
+            by_opcode[opcode] = by_opcode[opcode] || [];
+            by_opcode[opcode].push(o);
+        }
+    }
+
+    let cases = [];
+    for(let opcode = 0; opcode < 0x100; opcode++)
+    {
+        let encoding = by_opcode[opcode];
+        console.assert(encoding && encoding.length);
+
+        let opcode_hex = hex(opcode, 2);
+
+        if(encoding[0].os)
+        {
+            cases.push({
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, 16),
+            });
+            cases.push({
+                conditions: [`0x${opcode_hex}|0x100`],
+                body: gen_instruction_body(encoding, 32),
+            });
+        }
+        else
+        {
+            cases.push({
+                conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
+                body: gen_instruction_body(encoding, undefined),
+            });
+        }
+    }
+    const table = {
+        type: "switch",
+        condition: "opcode",
+        cases,
+        default_case: {
+            body: ["assert(false);"]
+        },
+    };
+
+    if(to_generate.analyzer)
+    {
+        finalize_table(
+            OUT_DIR,
+            "analyzer",
+            c_ast.print_syntax_tree([table]).join("\n") + "\n"
+        );
+    }
+
+    const cases0f_16 = [];
+    const cases0f_32 = [];
+    for(let opcode = 0; opcode < 0x100; opcode++)
+    {
+        let encoding = by_opcode0f[opcode];
+
+        if(!encoding)
+        {
+            encoding = [
+                {
+                    opcode: 0x0F00 | opcode,
+                },
+            ];
+        }
+
+        console.assert(encoding && encoding.length);
+
+        let opcode_hex = hex(opcode, 2);
+
+        if(encoding[0].os)
+        {
+            cases0f_16.push({
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, 16),
+            });
+            cases0f_32.push({
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, 32),
+            });
+        }
+        else
+        {
+            let block = {
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, undefined),
+            };
+            cases0f_16.push(block);
+            cases0f_32.push(block);
+        }
+    }
+
+    const table0f_16 = {
+        type: "switch",
+        condition: "opcode",
+        cases: cases0f_16,
+        default_case: {
+            body: ["assert(false);"]
+        },
+    };
+    const table0f_32 = {
+        type: "switch",
+        condition: "opcode",
+        cases: cases0f_32,
+        default_case: {
+            body: ["assert(false);"]
+        },
+    };
+
+    if(to_generate.analyzer0f_16)
+    {
+        finalize_table(
+            OUT_DIR,
+            "analyzer0f_16",
+            c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n"
+        );
+    }
+
+    if(to_generate.analyzer0f_32)
+    {
+        finalize_table(
+            OUT_DIR,
+            "analyzer0f_32",
+            c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n"
+        );
+    }
+}

+ 40 - 35
gen/x86_table.js

@@ -140,8 +140,8 @@ const encodings = [
     { opcode: 0xAE, block_boundary: 1, is_string: 1, },
     { opcode: 0xAF, block_boundary: 1, is_string: 1, os: 1, },
 
-    { opcode: 0xC2, block_boundary: 1, os: 1, imm16: 1, skip: 1, }, // ret
-    { opcode: 0xC3, block_boundary: 1, os: 1, skip: 1, },
+    { opcode: 0xC2, block_boundary: 1, no_next_instruction: 1, os: 1, imm16: 1, skip: 1, }, // ret
+    { opcode: 0xC3, block_boundary: 1, no_next_instruction: 1, os: 1, skip: 1, },
 
     { opcode: 0xC4, os: 1, e: 1, skip: 1, },
     { opcode: 0xC5, block_boundary: 1, os: 1, e: 1, skip: 1, },
@@ -151,22 +151,25 @@ const encodings = [
 
     { opcode: 0xC8, os: 1, imm16: 1, extra_imm8: 1, }, // enter
     { opcode: 0xC9, os: 1, skip: 1, }, // leave: requires valid ebp
-    { opcode: 0xCA, block_boundary: 1, os: 1, imm16: 1, skip: 1, }, // retf
-    { opcode: 0xCB, block_boundary: 1, os: 1, skip: 1, },
-    { opcode: 0xCC, block_boundary: 1, skip: 1, },
+    { opcode: 0xCA, block_boundary: 1, no_next_instruction: 1, os: 1, imm16: 1, skip: 1, }, // retf
+    { opcode: 0xCB, block_boundary: 1, no_next_instruction: 1, os: 1, skip: 1, },
+    { opcode: 0xCC, block_boundary: 1, skip: 1, }, // int
     { opcode: 0xCD, block_boundary: 1, skip: 1, imm8: 1, },
     { opcode: 0xCE, block_boundary: 1, skip: 1, },
-    { opcode: 0xCF, block_boundary: 1, os: 1, skip: 1, },
+    { opcode: 0xCF, block_boundary: 1, no_next_instruction: 1, os: 1, skip: 1, }, // iret
 
     { opcode: 0xD4, imm8: 1, }, // aam, may trigger #de
     { opcode: 0xD5, nonfaulting: 1, imm8: 1, mask_flags: of | cf | af, },
     { opcode: 0xD6, nonfaulting: 1, },
     { opcode: 0xD7, skip: 1, },
 
-    { opcode: 0xE0, imm8s: 1, skip: 1, block_boundary: 1, },
-    { opcode: 0xE1, imm8s: 1, skip: 1, block_boundary: 1, },
-    { opcode: 0xE2, imm8s: 1, skip: 1, block_boundary: 1, },
-    { opcode: 0xE3, imm8s: 1, skip: 1, block_boundary: 1, },
+    // loop, jcxz, etc.
+    // Conditional jumps, but condition code not supported by code generator
+    // (these are never generated by modern compilers)
+    { opcode: 0xE0, imm8s: 1, skip: 1, block_boundary: 1, /* jump_offset_imm: 1, conditional_jump: 1, */ },
+    { opcode: 0xE1, imm8s: 1, skip: 1, block_boundary: 1, /* jump_offset_imm: 1, conditional_jump: 1, */ },
+    { opcode: 0xE2, imm8s: 1, skip: 1, block_boundary: 1, /* jump_offset_imm: 1, conditional_jump: 1, */ },
+    { opcode: 0xE3, imm8s: 1, skip: 1, block_boundary: 1, /* jump_offset_imm: 1, conditional_jump: 1, */ },
 
     // port functions aren't jumps, but they may modify eip due to how they are implemented
     { opcode: 0xE4, block_boundary: 1, imm8: 1, skip: 1, }, // in
@@ -174,10 +177,12 @@ const encodings = [
     { opcode: 0xE6, block_boundary: 1, imm8: 1, skip: 1, }, // out
     { opcode: 0xE7, block_boundary: 1, os: 1, imm8: 1, skip: 1, },
 
-    { opcode: 0xE8, block_boundary: 1, os: 1, imm1632: 1, custom: 1, skip: 1, },
-    { opcode: 0xE9, block_boundary: 1, os: 1, imm1632: 1, custom: 1, skip: 1, },
-    { opcode: 0xEA, block_boundary: 1, os: 1, imm1632: 1, extra_imm16: 1, skip: 1, }, // jmpf
-    { opcode: 0xEB, block_boundary: 1, imm8s: 1, custom: 1, skip: 1, },
+    // E8 call: Has immediate jump offset, but we don't really want to follow
+    //          into other functions while generating code
+    { opcode: 0xE8, block_boundary: 1, /* jump_offset_imm: 1, */ os: 1, imm1632: 1, custom: 1, skip: 1, },
+    { opcode: 0xE9, block_boundary: 1, jump_offset_imm: 1, no_next_instruction: 1, os: 1, imm1632: 1, custom: 1, skip: 1, },
+    { opcode: 0xEA, block_boundary: 1, no_next_instruction: 1, os: 1, imm1632: 1, extra_imm16: 1, skip: 1, }, // jmpf
+    { opcode: 0xEB, block_boundary: 1, jump_offset_imm: 1, no_next_instruction: 1, imm8s: 1, custom: 1, skip: 1, },
 
     { opcode: 0xEC, block_boundary: 1, skip: 1, },
     { opcode: 0xED, block_boundary: 1, os: 1, skip: 1, },
@@ -223,8 +228,8 @@ const encodings = [
     { opcode: 0xFF, os: 1, e: 1, fixed_g: 1, nonfaulting: 1, },
     { opcode: 0xFF, os: 1, e: 1, fixed_g: 2, block_boundary: 1, skip: 1, },
     { opcode: 0xFF, os: 1, e: 1, fixed_g: 3, block_boundary: 1, skip: 1, },
-    { opcode: 0xFF, os: 1, e: 1, fixed_g: 4, block_boundary: 1, skip: 1, },
-    { opcode: 0xFF, os: 1, e: 1, fixed_g: 5, block_boundary: 1, skip: 1, },
+    { opcode: 0xFF, os: 1, e: 1, fixed_g: 4, block_boundary: 1, no_next_instruction: 1, skip: 1, },
+    { opcode: 0xFF, os: 1, e: 1, fixed_g: 5, block_boundary: 1, no_next_instruction: 1, skip: 1, },
     { opcode: 0xFF, custom: 1, os: 1, e: 1, fixed_g: 6, },
 
     { opcode: 0x0F00, fixed_g: 0, e: 1, skip: 1 },
@@ -271,7 +276,7 @@ const encodings = [
     { opcode: 0x0F32, skip: 1 },
     { opcode: 0x0F33, skip: 1 },
     { opcode: 0x0F34, skip: 1, block_boundary: 1, }, // sysenter
-    { opcode: 0x0F35, skip: 1, block_boundary: 1, }, // sysexit
+    { opcode: 0x0F35, skip: 1, block_boundary: 1, no_next_instruction: 1, }, // sysexit
 
     { opcode: 0x0F40, nonfaulting: 1, e: 1, os: 1, },
     { opcode: 0x0F41, nonfaulting: 1, e: 1, os: 1, },
@@ -290,22 +295,22 @@ const encodings = [
     { opcode: 0x0F4E, nonfaulting: 1, e: 1, os: 1, },
     { opcode: 0x0F4F, nonfaulting: 1, e: 1, os: 1, },
 
-    { opcode: 0x0F80, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F81, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F82, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F83, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F84, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F85, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F86, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F87, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F88, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F89, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F8A, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F8B, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F8C, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F8D, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F8E, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
-    { opcode: 0x0F8F, block_boundary: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F80, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F81, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F82, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F83, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F84, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F85, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F86, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F87, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F88, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F89, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F8A, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F8B, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F8C, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F8D, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F8E, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
+    { opcode: 0x0F8F, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm1632: 1, os: 1, custom: 1, skip: 1, },
 
     { opcode: 0x0F90, nonfaulting: 1, e: 1, },
     { opcode: 0x0F91, nonfaulting: 1, e: 1, },
@@ -636,8 +641,8 @@ for(let i = 0; i < 8; i++)
         { opcode: 0x04 | i << 3, nonfaulting: 1, eax: 1, imm8: 1, },
         { opcode: 0x05 | i << 3, nonfaulting: 1, os: 1, eax: 1, imm1632: 1, },
 
-        { opcode: 0x70 | i, block_boundary: 1, imm8s: 1, custom: 1, skip: 1, },
-        { opcode: 0x78 | i, block_boundary: 1, imm8s: 1, custom: 1, skip: 1, },
+        { opcode: 0x70 | i, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm8s: 1, custom: 1, skip: 1, },
+        { opcode: 0x78 | i, block_boundary: 1, jump_offset_imm: 1, conditional_jump: 1, imm8s: 1, custom: 1, skip: 1, },
 
         { opcode: 0x80, nonfaulting: 1, e: 1, fixed_g: i, imm8: 1, },
         { opcode: 0x81, nonfaulting: 1, os: 1, e: 1, fixed_g: i, imm1632: 1, },

+ 3 - 0
src/browser/starter.js

@@ -230,6 +230,9 @@ function V86Starter(options)
             coverage_logger.log(fn_name_offset, num_blocks, visited_block);
         },
         "_log_uncompiled_code": (start, end) => cpu.log_uncompiled_code(start, end),
+        "_dump_function_code": (blocks_ptr, count, end) => {
+            cpu.dump_function_code(blocks_ptr, count, end);
+        },
 
         // see https://github.com/kripken/emscripten/blob/incoming/src/library.js
         "_atan2": Math.atan2,

+ 44 - 13
src/cpu.js

@@ -1256,25 +1256,28 @@ CPU.prototype.codegen_finalize = function(wasm_table_index, start, end, first_op
         {
             this.debug.dump_wasm(code);
 
-            seen_code[start] = true;
-
-            if((start ^ end) & ~0xFFF)
+            if(false)
             {
-                dbg_log("truncated disassembly start=" + h(start >>> 0) + " end=" + h(end >>> 0));
-                end = (start | 0xFFF) + 1; // until the end of the page
-            }
+                if((start ^ end) & ~0xFFF)
+                {
+                    dbg_log("truncated disassembly start=" + h(start >>> 0) + " end=" + h(end >>> 0));
+                    end = (start | 0xFFF) + 1; // until the end of the page
+                }
 
-            dbg_assert(end >= start);
+                dbg_assert(end >= start);
 
-            const buffer = new Uint8Array(end - start);
+                const buffer = new Uint8Array(end - start);
 
-            for(let i = start; i < end; i++)
-            {
-                buffer[i - start] = this.read8(i);
-            }
+                for(let i = start; i < end; i++)
+                {
+                    buffer[i - start] = this.read8(i);
+                }
 
-            this.debug.dump_code(this.is_32[0] ? 1 : 0, buffer, start);
+                this.debug.dump_code(this.is_32[0] ? 1 : 0, buffer, start);
+            }
         }
+
+        seen_code[start] = (seen_code[start] || 0) + 1;
     }
 
     // Make a copy of jit_imports, since some imports change and
@@ -1339,6 +1342,34 @@ CPU.prototype.log_uncompiled_code = function(start, end)
     }
 };
 
+CPU.prototype.dump_function_code = function(block_ptr, count)
+{
+    const SIZEOF_BASIC_BLOCK_IN_DWORDS = 5;
+
+    const mem32 = new Int32Array(this.wm.memory.buffer);
+
+    dbg_assert((block_ptr & 3) === 0);
+
+    const is_32 = this.is_32[0];
+
+    for(let i = 0; i < count; i++)
+    {
+        const struct_start = (block_ptr >> 2) + i * SIZEOF_BASIC_BLOCK_IN_DWORDS;
+        const start = mem32[struct_start + 0];
+        const end = mem32[struct_start + 1];
+
+        const buffer = new Uint8Array(end - start);
+
+        for(let i = start; i < end; i++)
+        {
+            buffer[i - start] = this.read8(this.translate_address_read(i));
+        }
+
+        this.debug.dump_code(is_32 ? 1 : 0, buffer, start);
+        dbg_log("---");
+    }
+};
+
 CPU.prototype.dbg_log = function()
 {
     dbg_log("from wasm: " + [].join.call(arguments));

+ 81 - 9
src/native/codegen/codegen.c

@@ -58,10 +58,10 @@ void gen_reset(void)
     import_table_size = import_table_size_reset_value;
 }
 
-uintptr_t gen_finish(void)
+uintptr_t gen_finish(int32_t no_of_locals_i32)
 {
     write_memory_import();
-    write_function_section();
+    write_function_section(1);
     write_export_section();
 
     uint8_t* ptr_code_section_size = (uint8_t*) 0; // initialized below
@@ -69,16 +69,20 @@ uintptr_t gen_finish(void)
 
     // write code section preamble
     write_raw_u8(&op, SC_CODE);
+
     ptr_code_section_size = op.ptr; // we will write to this location later
-    write_raw_u8(&op, 0); write_raw_u8(&op, 0); // write temp val for now using 2 bytes
+    write_raw_u8(&op, 0); write_raw_u8(&op, 0); // write temp val for now using 4 bytes
+    write_raw_u8(&op, 0); write_raw_u8(&op, 0);
 
     write_raw_u8(&op, 1); // number of function bodies: just 1
 
     // same as above but for body size of the function
     ptr_fn_body_size = op.ptr;
     write_raw_u8(&op, 0); write_raw_u8(&op, 0);
+    write_raw_u8(&op, 0); write_raw_u8(&op, 0);
 
-    write_raw_u8(&op, 0); // count of locals, none
+    write_raw_u8(&op, 1); // count of local blocks
+    write_raw_u8(&op, no_of_locals_i32); write_raw_u8(&op, TYPE_I32); // 2 locals of type i32
 
     copy_code_section();
 
@@ -87,8 +91,8 @@ uintptr_t gen_finish(void)
 
     // write the actual sizes to the pointer locations stored above. We subtract 1 from the actual
     // value because the ptr itself points to two bytes
-    write_fixed_leb16_to_ptr(ptr_fn_body_size, ((op.ptr - 1) - ptr_fn_body_size) - 1);
-    write_fixed_leb16_to_ptr(ptr_code_section_size, ((op.ptr - 1) - ptr_code_section_size) - 1);
+    write_fixed_leb32_to_ptr(ptr_fn_body_size, ((op.ptr - 1) - ptr_fn_body_size) - 3);
+    write_fixed_leb32_to_ptr(ptr_code_section_size, ((op.ptr - 1) - ptr_code_section_size) - 3);
 
     return (uintptr_t) op.ptr;
 }
@@ -245,22 +249,90 @@ void gen_fn3(char const* fn, uint8_t fn_len, int32_t arg0, int32_t arg1, int32_t
     call_fn(&instruction_body, fn_idx);
 }
 
-void gen_if_void()
+void gen_add_i32(void)
+{
+    add_i32(&instruction_body);
+}
+
+void gen_eqz_i32(void)
+{
+    write_raw_u8(&instruction_body, OP_I32EQZ);
+}
+
+void gen_if_void(void)
 {
     write_raw_u8(&instruction_body, OP_IF);
     write_raw_u8(&instruction_body, TYPE_VOID_BLOCK);
 }
 
-void gen_else()
+void gen_else(void)
 {
     write_raw_u8(&instruction_body, OP_ELSE);
 }
 
-void gen_block_end()
+void gen_loop_void(void)
+{
+    write_raw_u8(&instruction_body, OP_LOOP);
+    write_raw_u8(&instruction_body, TYPE_VOID_BLOCK);
+}
+
+void gen_block_void(void)
+{
+    write_raw_u8(&instruction_body, OP_BLOCK);
+    write_raw_u8(&instruction_body, TYPE_VOID_BLOCK);
+}
+
+void gen_block_end(void)
 {
     write_raw_u8(&instruction_body, OP_END);
 }
 
+void gen_return(void)
+{
+    write_raw_u8(&instruction_body, OP_RETURN);
+}
+
+// Generate a br_table where an input of [i] will branch [i]th outer block,
+// where [i] is passed on the wasm stack
+void gen_switch(int32_t cases_count)
+{
+    write_raw_u8(&instruction_body, OP_BRTABLE);
+    write_leb_u32(&instruction_body, cases_count);
+
+    for(int32_t i = 0; i < cases_count + 1; i++)
+    {
+        write_leb_u32(&instruction_body, i);
+    }
+}
+
+void gen_br(int32_t depth)
+{
+    write_raw_u8(&instruction_body, OP_BR);
+    write_leb_i32(&instruction_body, depth);
+}
+
+void gen_get_local(int32_t idx)
+{
+    write_raw_u8(&instruction_body, OP_GETLOCAL);
+    write_leb_i32(&instruction_body, idx);
+}
+
+void gen_set_local(int32_t idx)
+{
+    write_raw_u8(&instruction_body, OP_SETLOCAL);
+    write_leb_i32(&instruction_body, idx);
+}
+
+void gen_const_i32(int32_t v)
+{
+    push_i32(&instruction_body, v);
+}
+
+void gen_unreachable(void)
+{
+    write_raw_u8(&instruction_body, OP_UNREACHABLE);
+}
+
 #define MODRM_ENTRY(n, work)\
     case (n) | 0 << 3:\
     case (n) | 1 << 3:\

+ 18 - 1
src/native/codegen/codegen.h

@@ -21,7 +21,7 @@ static uint8_t const fn_get_seg_idx = 0;
 
 void gen_init(void);
 void gen_reset(void);
-uintptr_t gen_finish(void);
+uintptr_t gen_finish(int32_t no_of_locals_i32);
 uintptr_t gen_get_final_offset(void);
 
 int32_t get_fn_index(char const* fn, uint8_t fn_len, uint8_t type_index);
@@ -37,9 +37,26 @@ void gen_fn3(char const* fn, uint8_t fn_len, int32_t arg0, int32_t arg1, int32_t
 
 void gen_fn0_ret(char const* fn, uint8_t fn_len);
 
+void gen_add_i32(void);
+void gen_eqz_i32(void);
+
 void gen_if_void(void);
 void gen_else(void);
+void gen_loop_void(void);
+void gen_block_void(void);
 void gen_block_end(void);
+void gen_return(void);
+
+void gen_switch(int32_t);
+
+void gen_br(int32_t depth);
+
+void gen_const_i32(int32_t);
+
+void gen_get_local(int32_t);
+void gen_set_local(int32_t);
+
+void gen_unreachable(void);
 
 void gen_modrm_resolve(int32_t modrm_byte);
 void gen_modrm_fn0(char const* fn, uint8_t fn_len);

+ 8 - 4
src/native/codegen/module_init.h

@@ -158,12 +158,16 @@ static uint8_t write_import_entry(char const* fn_name, uint8_t fn_name_len, uint
     return *ptr_import_count - 1;
 }
 
-static void write_function_section()
+static void write_function_section(int32_t count)
 {
     write_raw_u8(&op, SC_FUNCTION);
-    write_raw_u8(&op, 2); // length of this section
-    write_raw_u8(&op, 1); // count of signature indices
-    write_raw_u8(&op, FN0_TYPE_INDEX); // we export one function which is nullary
+    write_raw_u8(&op, 1 + count); // length of this section
+    write_raw_u8(&op, count); // count of signature indices
+
+    for(int32_t i = 0; i < count; i++)
+    {
+        write_raw_u8(&op, FN0_TYPE_INDEX);
+    }
 }
 
 static void write_export_section()

+ 10 - 1
src/native/codegen/util.h

@@ -67,10 +67,19 @@ static void write_leb_u32(Buffer* buf, uint32_t v)
 static void inline write_fixed_leb16_to_ptr(uint8_t* ptr, uint16_t x)
 {
     dbg_assert(x < (1 << 14)); // we have 14 bits of available space in 2 bytes for leb
-    *ptr = (x & 0b1111111) | 0b10000000;
+    *(ptr    ) = (x & 0b1111111) | 0b10000000;
     *(ptr + 1) = x >> 7;
 }
 
+static void inline write_fixed_leb32_to_ptr(uint8_t* ptr, uint32_t x)
+{
+    dbg_assert(x < (1 << 28)); // we have 28 bits of available space in 4 bytes for leb
+    *(ptr    ) = (x       & 0b1111111) | 0b10000000;
+    *(ptr + 1) = (x >> 7  & 0b1111111) | 0b10000000;
+    *(ptr + 2) = (x >> 14 & 0b1111111) | 0b10000000;
+    *(ptr + 3) = (x >> 21 & 0b1111111);
+}
+
 static void append_buffer(Buffer *dest, Buffer *src)
 {
     assert(dest->len - (dest->ptr - dest->start) >= (src->ptr - src->start));

+ 2 - 3
src/native/const.h

@@ -172,8 +172,6 @@
 
 #define MXCSR_MASK (0xFFFF & ~(1 << 6))
 
-#define JIT_MIN_BLOCK_LENGTH 3
-
 #define WASM_TABLE_SIZE 0x10000
 
 // Mask used to map physical address to index in cache array
@@ -194,8 +192,9 @@
 #define JIT_NEXT_BLOCK_BRANCHED_IDX 0
 #define JIT_NEXT_BLOCK_NOT_BRANCHED_IDX 1
 
+#define JIT_MAX_ITERATIONS_PER_FUNCTION 10000
+
 #define ENABLE_JIT 1
-#define ENABLE_JIT_BLOCK_LINKING 1
 #define ENABLE_JIT_NONFAULTING_OPTIMZATION 1
 
 #ifndef ENABLE_JIT_ALWAYS

+ 552 - 88
src/native/cpu.c

@@ -524,6 +524,12 @@ int32_t modrm_resolve(int32_t modrm_byte)
     }
 }
 
+void modrm_skip(int32_t modrm_byte)
+{
+    // TODO: More efficient implementation is possible
+    modrm_resolve(modrm_byte);
+}
+
 uint32_t jit_hot_hash(uint32_t addr)
 {
     return addr % HASH_PRIME;
@@ -634,38 +640,32 @@ static bool is_near_end_of_page(uint32_t addr)
     return (addr & 0xFFF) >= (0x1000 - 16);
 }
 
-static void jit_generate(int32_t address_hash, uint32_t phys_addr, uint32_t page_dirtiness)
+static bool same_page(int32_t addr1, int32_t addr2)
 {
-    profiler_start(P_GEN_INSTR);
-    profiler_stat_increment(S_COMPILE);
-
-    int32_t virt_start_addr = *instruction_pointer;
-
-    // don't immediately retry to compile
-    hot_code_addresses[address_hash] = 0;
+    return (addr1 & ~0xFFF) == (addr2 & ~0xFFF);
+}
 
+static void jit_generate_basic_block(int32_t start_addr, int32_t stop_addr)
+{
     uint32_t len = 0;
     jit_block_boundary = false;
 
     int32_t end_addr;
-    int32_t first_opcode = -1;
     bool was_block_boundary = false;
     int32_t eip_delta = 0;
 
-    gen_reset();
+    *instruction_pointer = start_addr;
+    uint32_t phys_addr = translate_address_read(start_addr);
 
     // First iteration of do-while assumes the caller confirms this condition
-    assert(!is_near_end_of_page(phys_addr));
+    assert(!is_near_end_of_page(start_addr));
+    UNUSED(phys_addr);
+
     do
     {
         *previous_ip = *instruction_pointer;
         int32_t opcode = read_imm8();
 
-        if(len == 0)
-        {
-            first_opcode = opcode;
-        }
-
         int32_t start_eip = *instruction_pointer - 1;
         jit_instr_flags jit_ret = jit_instruction(opcode | !!*is_32 << 8);
         int32_t end_eip = *instruction_pointer;
@@ -724,7 +724,9 @@ static void jit_generate(int32_t address_hash, uint32_t phys_addr, uint32_t page
         end_addr = *eip_phys ^ *instruction_pointer;
         len++;
     }
-    while(!was_block_boundary && !is_near_end_of_page(*instruction_pointer));
+    while(!was_block_boundary &&
+            !is_near_end_of_page(*instruction_pointer) &&
+            *instruction_pointer != stop_addr);
 
 #if ENABLE_JIT_NONFAULTING_OPTIMZATION
     // When the block ends in a non-jump instruction, we may have uncommitted updates still
@@ -735,50 +737,13 @@ static void jit_generate(int32_t address_hash, uint32_t phys_addr, uint32_t page
     }
 #endif
 
+    gen_increment_timestamp_counter(len);
+
     // no page was crossed
     assert(((end_addr ^ phys_addr) & ~0xFFF) == 0);
 
     jit_block_boundary = false;
     assert(*prefixes == 0);
-
-    // at this point no exceptions can be raised
-
-    if(!ENABLE_JIT_ALWAYS && JIT_MIN_BLOCK_LENGTH != 0 && len < JIT_MIN_BLOCK_LENGTH)
-    {
-        // abort, block is too short to be considered useful for compilation
-        profiler_stat_increment(S_CACHE_SKIPPED);
-        profiler_end(P_GEN_INSTR);
-        *instruction_pointer = virt_start_addr;
-        return;
-    }
-
-    gen_increment_timestamp_counter(len);
-    gen_finish();
-
-    struct code_cache* entry = create_cache_entry(phys_addr);
-
-    entry->start_addr = phys_addr;
-    entry->state_flags = pack_current_state_flags();
-    entry->group_status = page_dirtiness;
-    entry->pending = true;
-
-#if DEBUG
-    assert(first_opcode != -1);
-    entry->opcode[0] = first_opcode;
-    entry->end_addr = end_addr;
-    entry->len = len;
-#endif
-
-    // will call codegen_finalize_finished asynchronously when finished
-    codegen_finalize(
-            entry->wasm_table_index, phys_addr, end_addr,
-            first_opcode, entry->state_flags, page_dirtiness);
-
-    // start execution at the beginning
-    *instruction_pointer = virt_start_addr;
-
-    profiler_stat_increment(S_COMPILE_SUCCESS);
-    profiler_end(P_GEN_INSTR);
 }
 
 void codegen_finalize_finished(
@@ -795,8 +760,13 @@ void codegen_finalize_finished(
         // sanity check that we're looking at the right entry
         assert(entry->pending);
         assert(entry->group_status == page_dirtiness);
-        assert(entry->state_flags == state_flags);
         assert(entry->start_addr == phys_addr);
+        assert(entry->state_flags == state_flags);
+        UNUSED(page_dirtiness);
+        UNUSED(phys_addr);
+        UNUSED(state_flags);
+        UNUSED(end_addr);
+        UNUSED(first_opcode);
 
         entry->pending = false;
     }
@@ -808,13 +778,15 @@ void codegen_finalize_finished(
 
 static struct code_cache* find_cache_entry(uint32_t phys_addr)
 {
+    cached_state_flags state_flags = pack_current_state_flags();
+
 #pragma clang loop unroll_count(CODE_CACHE_SEARCH_SIZE)
     for(int32_t i = 0; i < CODE_CACHE_SEARCH_SIZE; i++)
     {
         uint16_t addr_index = (phys_addr + i) & JIT_PHYS_MASK;
         struct code_cache* entry = &jit_cache_arr[addr_index];
 
-        if(entry->start_addr == phys_addr && entry->state_flags == pack_current_state_flags())
+        if(entry->start_addr == phys_addr && entry->state_flags == state_flags)
         {
             return entry;
         }
@@ -827,7 +799,7 @@ struct code_cache* find_link_block_target(int32_t target)
 {
     int32_t eip = *previous_ip;
 
-    if(ENABLE_JIT_BLOCK_LINKING && ((eip ^ target) & ~0xFFF) == 0) // same page
+    if(same_page(eip, target))
     {
         assert((eip & ~0xFFF) == *last_virt_eip);
         assert((target & ~0xFFF) == *last_virt_eip);
@@ -846,54 +818,543 @@ struct code_cache* find_link_block_target(int32_t target)
     return NULL;
 }
 
-void jit_link_block(int32_t target)
-{
-    struct code_cache* entry = find_link_block_target(target);
-
-    if(entry)
-    {
-        profiler_stat_increment(S_COMPILE_WITH_LINK);
-        set_jit_import(JIT_NEXT_BLOCK_BRANCHED_IDX, entry->wasm_table_index);
-        gen_fn0(JIT_NEXT_BLOCK_BRANCHED, sizeof(JIT_NEXT_BLOCK_BRANCHED) - 1);
-    }
-}
-
 void jit_link_block_conditional(int32_t offset, const char* condition)
 {
-    // > Generate the following code:
-    // if(test_XX()) { *instruction_pointer += offset; JIT_NEXT_BLOCK_BRANCHED() }
-    // else { JIT_NEXT_BLOCK_NOT_BRANCHED() }
-
     // Note: block linking cannot rely on the absolute value of eip, as blocks
     // are stored at their *physical* address, which can be executed from
     // multiple *virtual* addresses. Due to this, we cannot insert the value of
     // eip into generated code
 
-    struct code_cache* entry_branch_taken = find_link_block_target(*instruction_pointer + offset);
-    struct code_cache* entry_branch_not_taken = find_link_block_target(*instruction_pointer);
-
     gen_fn0_ret(condition, strlen(condition));
 
     gen_if_void();
     gen_relative_jump(offset);
+    gen_block_end();
+}
 
-    if(entry_branch_taken)
+struct analysis analyze_prefix_instruction()
+{
+    return analyze_step(read_imm8() | is_osize_32() << 8);
+}
+
+struct analysis instr_26_analyze() { return analyze_prefix_instruction(); }
+struct analysis instr_2E_analyze() { return analyze_prefix_instruction(); }
+struct analysis instr_36_analyze() { return analyze_prefix_instruction(); }
+struct analysis instr_3E_analyze() { return analyze_prefix_instruction(); }
+struct analysis instr_64_analyze() { return analyze_prefix_instruction(); }
+struct analysis instr_65_analyze() { return analyze_prefix_instruction(); }
+struct analysis instr_66_analyze() {
+    *prefixes |= PREFIX_MASK_OPSIZE;
+    struct analysis result = analyze_prefix_instruction();
+    *prefixes = 0;
+    return result;
+}
+struct analysis instr_67_analyze() {
+    *prefixes |= PREFIX_MASK_ADDRSIZE;
+    struct analysis result = analyze_prefix_instruction();
+    *prefixes = 0;
+    return result;
+}
+struct analysis instr_F0_analyze() { return analyze_prefix_instruction(); }
+struct analysis instr_F2_analyze() {
+    *prefixes |= PREFIX_F2;
+    struct analysis result = analyze_prefix_instruction();
+    *prefixes = 0;
+    return result;
+}
+struct analysis instr_F3_analyze() {
+    *prefixes |= PREFIX_F3;
+    struct analysis result = analyze_prefix_instruction();
+    *prefixes = 0;
+    return result;
+}
+
+struct analysis instr16_0F_analyze()
+{
+    int32_t opcode = read_imm8();
+    struct analysis analysis = { .flags = 0, .jump_target = 0, .condition_index = -1 };
+#include "../../build/analyzer0f_16.c"
+    return analysis;
+}
+struct analysis instr32_0F_analyze()
+{
+    int32_t opcode = read_imm8();
+    struct analysis analysis = { .flags = 0, .jump_target = 0, .condition_index = -1 };
+#include "../../build/analyzer0f_32.c"
+    return analysis;
+}
+
+struct analysis analyze_step(int32_t opcode)
+{
+    struct analysis analysis = { .flags = 0, .jump_target = 0, .condition_index = -1 };
+#include "../../build/analyzer.c"
+    return analysis;
+}
+
+int32_t find_basic_block_index(const struct basic_block_list* basic_blocks, int32_t addr)
+{
+    for(int32_t i = 0; i < basic_blocks->length; i++)
+    {
+        if(basic_blocks->blocks[i].addr == addr)
+        {
+            return i;
+        }
+    }
+
+    return -1;
+}
+
+bool is_sorted_and_unique(const struct basic_block_list* basic_blocks)
+{
+    for(int32_t i = 0; i < basic_blocks->length - 1; i++)
     {
-        profiler_stat_increment(S_COMPILE_WITH_LINK);
-        set_jit_import(JIT_NEXT_BLOCK_BRANCHED_IDX, entry_branch_taken->wasm_table_index);
-        gen_fn0(JIT_NEXT_BLOCK_BRANCHED, sizeof(JIT_NEXT_BLOCK_BRANCHED) - 1);
+        if(basic_blocks->blocks[i].addr >= basic_blocks->blocks[i + 1].addr)
+        {
+            return false;
+        }
     }
 
-    if(entry_branch_not_taken)
+    return true;
+}
+
+struct basic_block* add_basic_block_start(struct basic_block_list* basic_blocks, int32_t addr)
+{
+    assert(basic_blocks->length < BASIC_BLOCK_LIST_MAX);
+    assert(find_basic_block_index(basic_blocks, addr) == -1);
+
+    // sorted insert
+    int32_t index_to_insert = -1;
+
+    for(int32_t i = 0; i < basic_blocks->length; i++)
     {
-        gen_else();
+        assert(basic_blocks->blocks[i].addr != addr);
 
-        profiler_stat_increment(S_COMPILE_WITH_LINK);
-        set_jit_import(JIT_NEXT_BLOCK_NOT_BRANCHED_IDX, entry_branch_not_taken->wasm_table_index);
-        gen_fn0(JIT_NEXT_BLOCK_NOT_BRANCHED, sizeof(JIT_NEXT_BLOCK_NOT_BRANCHED) - 1);
+        if(basic_blocks->blocks[i].addr > addr)
+        {
+            // make space
+            for(int32_t j = basic_blocks->length - 1; j >= i; j--)
+            {
+                basic_blocks->blocks[j + 1] = basic_blocks->blocks[j];
+            }
+
+            assert(basic_blocks->blocks[i].addr == basic_blocks->blocks[i + 1].addr);
+
+            index_to_insert = i;
+            break;
+        }
+    }
+
+    if(index_to_insert == -1)
+    {
+        // if we're here addr is larger than all existing basic blocks or basic blocks is empty
+        assert(basic_blocks->length == 0 ||
+                basic_blocks->blocks[basic_blocks->length - 1].addr < addr);
+
+        index_to_insert = basic_blocks->length;
     }
 
+    basic_blocks->blocks[index_to_insert].addr = addr;
+    basic_blocks->length++;
+
+    assert(is_sorted_and_unique(basic_blocks));
+
+    if(index_to_insert != 0)
+    {
+        struct basic_block* previous_block = &basic_blocks->blocks[index_to_insert - 1];
+
+        if(previous_block->end_addr > addr)
+        {
+            // Split the previous block as it would overlap otherwise; change
+            // it to continue at this block
+
+            previous_block->end_addr = addr;
+            previous_block->next_block_addr = addr;
+            previous_block->condition_index = -1;
+            previous_block->next_block_branch_taken_addr = 0;
+        }
+    }
+
+    return &basic_blocks->blocks[index_to_insert];
+}
+
+static const char* condition_functions[] = {
+    "test_o",
+    "test_no",
+    "test_b",
+    "test_nb",
+    "test_z",
+    "test_nz",
+    "test_be",
+    "test_nbe",
+    "test_s",
+    "test_ns",
+    "test_p",
+    "test_np",
+    "test_l",
+    "test_nl",
+    "test_le",
+    "test_nle",
+};
+
+struct basic_block_list basic_blocks = {
+    .length = 0,
+    .blocks = {
+        {
+            .addr = 0,
+            .end_addr = 0,
+            .next_block_addr = 0,
+            .next_block_branch_taken_addr = 0,
+            .condition_index = 0,
+        }
+    }
+};
+int32_t to_visit_stack[1000];
+
+// populates the basic_blocks global variable
+static void jit_find_basic_blocks()
+{
+    int32_t start = *instruction_pointer;
+
+    basic_blocks.length = 0;
+
+    // keep a stack of locations to visit that are part of the current control flow
+
+    int32_t to_visit_stack_count = 0;
+    to_visit_stack[to_visit_stack_count++] = *instruction_pointer;
+
+    while(to_visit_stack_count)
+    {
+        int32_t to_visit = to_visit_stack[--to_visit_stack_count];
+
+        assert((*instruction_pointer & ~0xFFF) == (to_visit & ~0xFFF));
+        *instruction_pointer = *instruction_pointer & ~0xFFF | to_visit & 0xFFF;
+
+        if(find_basic_block_index(&basic_blocks, *instruction_pointer) != -1)
+        {
+            // been here already, next
+            continue;
+        }
+
+        struct basic_block* current_block = add_basic_block_start(&basic_blocks, *instruction_pointer);
+
+        current_block->next_block_branch_taken_addr = 0;
+
+        while(true)
+        {
+            int32_t phys_eip = translate_address_read(*instruction_pointer);
+
+            if(is_near_end_of_page(phys_eip))
+            {
+                current_block->next_block_branch_taken_addr = 0;
+                current_block->next_block_addr = 0;
+                current_block->end_addr = *instruction_pointer;
+                current_block->condition_index = -1;
+                break;
+            }
+
+            assert(!in_mapped_range(phys_eip));
+            int32_t opcode = mem8[phys_eip];
+            (*instruction_pointer)++;
+            struct analysis analysis = analyze_step(opcode | is_osize_32() << 8);
+
+            assert(*prefixes == 0);
+
+            int32_t jump_target = analysis.jump_target;
+
+            if((analysis.flags & JIT_INSTR_BLOCK_BOUNDARY_FLAG) == 0)
+            {
+                // ordinary instruction, continue at next
+
+                if(find_basic_block_index(&basic_blocks, *instruction_pointer) != -1)
+                {
+                    current_block->next_block_branch_taken_addr = 0;
+                    assert(*instruction_pointer);
+                    current_block->next_block_addr = *instruction_pointer;
+                    current_block->end_addr = *instruction_pointer;
+                    current_block->condition_index = -1;
+                    break;
+                }
+            }
+            else if(jump_target && analysis.condition_index == -1)
+            {
+                // non-conditional jump: continue at jump target
+
+                if(same_page(jump_target, *instruction_pointer))
+                {
+                    assert(jump_target);
+                    current_block->next_block_addr = jump_target;
+
+                    assert(to_visit_stack_count != 1000);
+                    to_visit_stack[to_visit_stack_count++] = jump_target;
+                }
+                else
+                {
+                    current_block->next_block_addr = 0;
+                }
+
+                current_block->next_block_branch_taken_addr = 0;
+                current_block->condition_index = -1;
+                current_block->end_addr = *instruction_pointer;
+
+                break;
+            }
+            else if(jump_target && analysis.condition_index != -1)
+            {
+                // conditional jump: continue at next and continue at jump target
+
+                assert(to_visit_stack_count != 1000);
+                to_visit_stack[to_visit_stack_count++] = *instruction_pointer;
+
+                if(same_page(jump_target, *instruction_pointer))
+                {
+                    assert(to_visit_stack_count != 1000);
+                    to_visit_stack[to_visit_stack_count++] = jump_target;
+
+                    assert(jump_target);
+                    current_block->next_block_branch_taken_addr = jump_target;
+                }
+                else
+                {
+                    current_block->next_block_branch_taken_addr = 0;
+                }
+
+                assert(*instruction_pointer);
+                current_block->next_block_addr = *instruction_pointer;
+                current_block->end_addr = *instruction_pointer;
+
+                assert(analysis.condition_index >= 0 && analysis.condition_index < 0x10);
+                current_block->condition_index = analysis.condition_index;
+
+                break;
+            }
+            else
+            {
+                // a block boundary but not a jump, get out
+
+                assert((analysis.flags & JIT_INSTR_BLOCK_BOUNDARY_FLAG) && !jump_target);
+
+                current_block->next_block_branch_taken_addr = 0;
+                current_block->next_block_addr = 0;
+                current_block->condition_index = -1;
+                current_block->end_addr = *instruction_pointer;
+                break;
+            }
+        }
+    }
+
+    if(DEBUG)
+    {
+        int32_t end = basic_blocks.blocks[basic_blocks.length - 1].end_addr;
+
+        dbg_log("Function with %d basic blocks, start at %x end at %x",
+                basic_blocks.length, start, end);
+
+        //for(int32_t i = 0; i < basic_blocks.length; i++)
+        //{
+        //    dbg_log("%x", basic_blocks.blocks[i].addr);
+        //}
+
+        dump_function_code(basic_blocks.blocks, basic_blocks.length, end);
+    }
+}
+
+static void jit_generate(uint32_t phys_addr, uint32_t page_dirtiness)
+{
+    profiler_stat_increment(S_COMPILE);
+    profiler_start(P_GEN_INSTR);
+
+    int32_t start = *instruction_pointer;
+
+    int32_t first_opcode = read8(get_phys_eip());
+
+    // populate basic_blocks
+    jit_find_basic_blocks();
+
+    // Code generation starts here
+
+    // local variables used by the generated wasm module
+    const int32_t STATE = 0;
+    const int32_t ITERATION_COUNTER = 1;
+
+    const int32_t NO_OF_LOCALS = 2;
+
+    gen_reset();
+
+    {
+        int32_t first_basic_block_index = find_basic_block_index(&basic_blocks, start);
+        assert(first_basic_block_index != -1);
+
+        // Set state variable to first basic block; in most cases the first
+        // basic block, but a jump may lead to before the function start, which
+        // is currently accepted as long as it is in the same page
+        gen_const_i32(first_basic_block_index);
+        gen_set_local(0);
+    }
+
+    // initialise max_iterations
+    gen_const_i32(JIT_MAX_ITERATIONS_PER_FUNCTION);
+    gen_set_local(ITERATION_COUNTER);
+
+    // main state machine loop
+    gen_loop_void();
+
+    // decrement max_iterations
+    gen_get_local(ITERATION_COUNTER);
+    gen_const_i32(-1);
+    gen_add_i32();
+    gen_set_local(ITERATION_COUNTER);
+
+
+    // if max_iterations == 0: return
+    gen_get_local(ITERATION_COUNTER);
+    gen_eqz_i32();
+    gen_if_void();
+    gen_return();
     gen_block_end();
+
+    gen_block_void(); // for the default case
+
+    // generate the opening blocks for the cases
+
+    for(int32_t i = 0; i < basic_blocks.length; i++)
+    {
+        gen_block_void();
+    }
+
+    gen_get_local(STATE);
+    gen_switch(basic_blocks.length);
+
+    for(int32_t i = 0; i < basic_blocks.length; i++)
+    {
+        // Case [i] will jump after the [i]th block, so we first generate the
+        // block end opcode and then the code for that block
+
+        gen_block_end();
+
+        struct basic_block block = basic_blocks.blocks[i];
+
+        int32_t next_block_start = block.end_addr;
+
+        if(block.addr == next_block_start)
+        {
+            // Empty basic block, generate no code (for example, jump to block
+            // that is near end of page)
+            assert(!block.next_block_addr);
+        }
+        else
+        {
+            gen_commit_instruction_body_to_cs();
+            jit_generate_basic_block(block.addr, next_block_start);
+            gen_commit_instruction_body_to_cs();
+        }
+
+        if(block.next_block_addr)
+        {
+            if(block.condition_index == -1)
+            {
+                assert(!block.next_block_branch_taken_addr);
+
+                // Unconditional jump to next basic block
+                // - All instructions that don't change eip
+                // - Unconditional jump
+
+                int32_t next_bb_index = find_basic_block_index(&basic_blocks, block.next_block_addr);
+                assert(next_bb_index != -1);
+
+                // set state variable to next basic block
+                gen_const_i32(next_bb_index);
+                gen_set_local(STATE);
+
+                gen_br(basic_blocks.length - i); // to the loop
+            }
+            else
+            {
+                // Conditional jump to next basic block
+                // - jnz, jc, etc.
+
+                assert(block.condition_index >= 0 && block.condition_index < 16);
+                const char* condition = condition_functions[block.condition_index];
+                gen_fn0_ret(condition, strlen(condition));
+
+                gen_if_void();
+
+                if(block.next_block_branch_taken_addr)
+                {
+                    // Branch taken
+                    int32_t next_basic_block_branch_taken_index = find_basic_block_index(
+                            &basic_blocks, block.next_block_branch_taken_addr);
+                    assert(next_basic_block_branch_taken_index != -1);
+
+                    gen_const_i32(next_basic_block_branch_taken_index);
+                    gen_set_local(STATE);
+                }
+                else
+                {
+                    // Jump to different page
+                    gen_return();
+                }
+
+                gen_else();
+
+                {
+                    // Branch not taken
+                    // TODO: Could use fall-through here
+                    int32_t next_basic_block_index = find_basic_block_index(
+                            &basic_blocks, block.next_block_addr);
+                    assert(next_basic_block_index != -1);
+
+                    gen_const_i32(next_basic_block_index);
+                    gen_set_local(STATE);
+                }
+
+                gen_block_end();
+
+                gen_br(basic_blocks.length - i); // to the loop
+            }
+        }
+        else
+        {
+            assert(!block.next_block_branch_taken_addr);
+            assert(block.condition_index == -1);
+
+            // Exit this function
+            gen_return();
+        }
+    }
+
+    gen_block_end(); // default case
+    gen_unreachable();
+
+    gen_block_end(); // loop
+
+    gen_commit_instruction_body_to_cs();
+    gen_finish(NO_OF_LOCALS);
+
+    struct code_cache* entry = create_cache_entry(phys_addr);
+
+    entry->start_addr = phys_addr;
+    entry->state_flags = pack_current_state_flags();
+    entry->group_status = page_dirtiness;
+    entry->pending = true;
+
+#if DEBUG
+    entry->opcode[0] = first_opcode;
+    //entry->end_addr = end_addr;
+    //entry->len = len;
+#endif
+    UNUSED(first_opcode);
+
+    int32_t end_addr = 0;
+
+    // will call codegen_finalize_finished asynchronously when finished
+    codegen_finalize(
+            entry->wasm_table_index, phys_addr, end_addr,
+            first_opcode, entry->state_flags, page_dirtiness);
+
+    profiler_stat_increment(S_COMPILE_SUCCESS);
+    profiler_end(P_GEN_INSTR);
+
+    *instruction_pointer = start;
 }
 
 void cycle_internal()
@@ -958,7 +1419,10 @@ void cycle_internal()
             )
           )
         {
-            jit_generate(address_hash, phys_addr, page_dirtiness);
+            // don't immediately retry to compile
+            hot_code_addresses[address_hash] = 0;
+
+            jit_generate(phys_addr, page_dirtiness);
         }
         else
         {

+ 35 - 1
src/native/cpu.h

@@ -5,7 +5,6 @@
 #include <stdint.h>
 
 #include "const.h"
-#include "instructions.h"
 #include "shared.h"
 
 #define CODE_CACHE_SEARCH_SIZE 8
@@ -65,6 +64,32 @@ struct code_cache jit_cache_arr[WASM_TABLE_SIZE];
 // state-altering, etc.)
 extern uint32_t jit_block_boundary;
 
+typedef uint32_t jit_instr_flags;
+
+#define JIT_INSTR_BLOCK_BOUNDARY_FLAG (1 << 0)
+#define JIT_INSTR_NONFAULTING_FLAG (1 << 1)
+
+struct analysis {
+    jit_instr_flags flags;
+    int32_t jump_target;
+    int32_t condition_index;
+};
+
+struct basic_block {
+    int32_t addr;
+    int32_t end_addr;
+    int32_t next_block_addr; // if 0 this is an exit block
+    int32_t next_block_branch_taken_addr;
+    int32_t condition_index; // if not -1 this block ends with a conditional jump
+};
+
+#define BASIC_BLOCK_LIST_MAX 1000
+
+struct basic_block_list {
+    int32_t length;
+    struct basic_block blocks[BASIC_BLOCK_LIST_MAX];
+};
+
 // Count of how many times prime_hash(address) has been called through a jump
 extern int32_t hot_code_addresses[HASH_PRIME];
 // An array indicating the current "initial group status" for entries that map
@@ -85,6 +110,13 @@ int32_t valid_tlb_entries_count;
 extern void call_indirect(int32_t index);
 
 void after_block_boundary(void);
+struct analysis analyze_step(int32_t);
+
+void after_jump(void);
+void diverged(void);
+void branch_taken(void);
+void branch_not_taken(void);
+
 int32_t get_eflags(void);
 uint32_t translate_address_read(int32_t address);
 uint32_t translate_address_write(int32_t address);
@@ -103,6 +135,8 @@ int32_t get_seg_prefix_ds(int32_t offset);
 int32_t get_seg_prefix_ss(int32_t offset);
 int32_t get_seg_prefix_cs(int32_t offset);
 int32_t modrm_resolve(int32_t modrm_byte);
+void modrm_skip(int32_t modrm_byte);
+
 uint32_t jit_hot_hash(uint32_t addr);
 void jit_link_block(int32_t target);
 void jit_link_block_conditional(int32_t offset, const char* condition);

+ 0 - 9
src/native/instructions.c

@@ -1383,9 +1383,6 @@ void instr16_E8_jit(int32_t imm16) {
 
 void instr32_E8_jit(int32_t imm32s) {
     gen_fn1("instr32_E8", 10, imm32s);
-
-    int32_t target = *instruction_pointer + imm32s;
-    jit_link_block(target);
 }
 
 void instr16_E9(int32_t imm16) {
@@ -1402,9 +1399,6 @@ void instr16_E9_jit(int32_t imm16) {
 }
 void instr32_E9_jit(int32_t imm32s) {
     gen_fn1("instr32_E9", 10, imm32s);
-
-    int32_t target = *instruction_pointer + imm32s;
-    jit_link_block(target);
 }
 
 void instr16_EA(int32_t new_ip, int32_t cs) {
@@ -1426,9 +1420,6 @@ void instr_EB(int32_t imm8) {
 
 void instr_EB_jit(int32_t imm8s) {
     gen_fn1("instr_EB", 8, imm8s);
-
-    int32_t target = *instruction_pointer + imm8s;
-    jit_link_block(target);
 }
 
 void instr_EC() {

+ 1 - 5
src/native/instructions.h

@@ -1,12 +1,8 @@
 #pragma once
 
+#include "cpu.h"
 #include <stdint.h>
 
-typedef uint32_t jit_instr_flags;
-
-#define JIT_INSTR_BLOCK_BOUNDARY_FLAG (1 << 0)
-#define JIT_INSTR_NONFAULTING_FLAG (1 << 1)
-
 #define SAFE_READ_WRITE8(addr, fun) \
     int32_t phys_addr = translate_address_write(addr); \
     int32_t ___ = read8(phys_addr); \

+ 5 - 0
src/native/js_imports.h

@@ -5,6 +5,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include "cpu.h"
+
 // like memcpy, but only efficient for large (approximately 10k) sizes
 // See memcpy in https://github.com/kripken/emscripten/blob/master/src/library.js
 extern void* memcpy_large(void* dest, const void* src, size_t n);
@@ -22,8 +24,11 @@ extern int32_t mmap_read8(uint32_t);
 extern int32_t set_cr0(int32_t);
 extern int32_t verr(int32_t);
 extern int32_t verw(int32_t);
+
 extern void codegen_finalize(int32_t, int32_t, int32_t, int32_t, int32_t, int32_t);
 extern void log_uncompiled_code(int32_t, int32_t);
+extern void dump_function_code(const struct basic_block* basic_block, int32_t basic_block_count, int32_t end);
+
 extern void cpl_changed(void);
 extern void cpuid(void);
 extern void enter16(int32_t, int32_t);