Browse Source

Squash

2e469796 Minor
fab422ef Improve generation of 0f instructions
08ad7fe9 Improved if-else generation
3f81014d Minor: Align test output
4a3a84ef Generate modrm tests
61aa1875 Simplify
a6e47954 Generate decoding of immediate operands
435b2c10 Fix warnings
e4933042 Add missing immediate operand
3f3810c7 Generate immediate operands for instructions with modrm byte
a0aa7b1f Make memory layout in nasm tests clearer
6b8ef212 Remove 'g' property from instruction table (implied by 'e')
bf15c58c Remove unused declarations
1e543035 Remove useless `| 0` and `>>> 0` javascriptisms
1ccc5d53 Fix headers
8b40c532 Update qemu tests with changes from qemu
ec9b0fb5 Port xchg instructions to C
c73613e7 Port virt_boundary_* to C
d61d1241 Add headers
fd19f22c Make written value in write8 and write16 int32_t
497dcaec Generate read_imm for instructions with a modrm byte
8b7003d6 Generate read_imm8s
0cc75498 Remove read_op
9d716086 Trigger unimplemented_sse for partial sse instructions with prefix
8d5edd03 Remove unimplmented sse c-to-js hack
585d3565 Remove | 0
308124b2 Use int32_t as return value
f193f8e1 Use JS version of cvttsd2si for now
12747b97 Generate trigger_ud for missing modrm branches
770f674e Split 0f00 and 0f01 into multiple instructions depending on modrm bits
1cb372a3 Generate decoder for some 0f-prefix instructions
cec7bc63 Disable unused parameter warnings in instruction functions
807665b1 Generate read_imm for 0f/jmpcc
cdf6eccc Generate modrm decoding for shld
04528429 Create temporary files in /tmp/, not cwd
d8f3fbd8 Generate modrm/imm decoding for shld
00ef0942 Generate modrm decoding for bts
f531984b Generate modrm decoding for shrd and imul
07569c53 Generate modrm decoding cmpxchg
535ff190 Generate modrm decoding for lfs/lgs/lss
2f8ced8d Generate modrm decoding for btr and btc
95de6c66 Generate modrm decoding for movzx
c4d07e7e Generate modrm decoding for bsf and bsr
f0985d26 Generate modrm decoding for movsx
4b30937a Generate modrm decoding for xadd
a422eb27 Generate modrm decoding for movnti
e5501d3c Generate modrm decoding for mov to/from control/debug registers
bce11ec5 Generate modrm decoding for lar/lsl
5729a23c Fix access to DR4 and DR5 when cr4.DE is clear
44269a81 Specify immediate size explicitly instead of inferring it
82b2867a Fix STR instruction
98a9cc89 Log failing assertion
6d2f9964 Fix rdtsc
00260694 Log GP exceptions
7916883d Port trigger_ud and trigger_nm to C
36fedae9 Remove unused code
e08fabd0 Generate modrm decoding for 0f00 and 0f01
8ae8174d Generate modrm decoding for 0fae and 0fc7 (fxsave, cmpxchg8, etc.)
26168164 Generate modrm+immediate decoding for 0fba (bit test with immediate operand)
6adf7fa7 Simplify create_tests.js (unused prefix call)
c77cbdd8 Add comments about the implementation of pop [addr]
4640b4fe Simplify prefix call
a81a5497 Don't use var
3ca5d13d Separate call name and arguments in code generator
3191a543 Simplify other prefix call (8D/lea)
5185080e Update generated code (stylistic changes and #ud generation)
93b51d41 Remove unused wasm externals
e4af0a7f Avoid hardcoding special cases in code generator (lea, pop r/m)
654a2b49 Avoid hardcoding special cases in code generator (enter/jumpf/callf)
fd1a1e86 Commit generated code (only stylistic changes)
7310fd1a Simplify code generator by merging code for with and without 0f prefix
e7eae4af Simplify code generator by merging code for immediate operands
00fafd8a Improve assertions
db084e49 Simplify code generator (modrm if-else)
0a0e4c9e Improve code generation of switch-case
ce292795 Clarify some comments
37cf33fa Generate code in if/else blocks
cbcc33fc Document naming scheme
e30b97eb Generate modrm decoding for 0f12 (sse) instruction
24b72c2f movlpd cannot be used for register-to-register moves
72d72995 Generate modrm decoding for 0f13 (sse) instruction and disable register-to-register moving
75d76fbb Generate modrm decoding for 0f14 (sse) instruction
ac8965a7 Generate modrm decoding for 0f28-0f2b (movap, movntp)
e919d33e Generate modrm decoding for cvttsd2si
5f2ca2b4 Generate modrm decoding for andp and xorp
c8d1c6de Generate modrm decoding for 0f60-0f70 (sse instructions)
ae4ed46d Add multi-byte nop and prefetch to nasm test, generate modrm decoding
718a1acf Print qemu test error message more useful
d1ecc37e Generate modrm decoding for 0f70-0f80 (sse instructions)
6a7219a5 Generate modrm decoding for popcnt
25278217 Generate modrm decoding for 0f71-0f73 (sse shift with immediate byte)
ed1ec81b Generate modrm decoding for the remaining sse instructions (0fc0-0fff)
42bc5a6f Use 64-bit multiplication for native code
dda3fb39 Remove old modrm-decoding functions
717975ef Move register access functions to cpu.c
aee8138f Remove read_op, read_sib, read_op0F, read_disp
f31317f2 Rename xmm/mmx register access functions
a525e70b Remove 32-bit access to reg_xmm and reg_mmx
c803eabc Rename s8/s16/s32 to i8/i16/i32
9fbd2ddf Don't use uninitialised structs
942eb4f7 Use 64-bit load for mmx registers and assert reg64 and reg128 size
f94ec612 Use 64-bit writes for write_xmm64
08022de9 Use more efficient method for some 128-bit stores
9d5b084c Make timestamp counter unsigned
2ef388b3 Pass 64-bit value to safe_write64
4cb2b1be Optimise safe_write64 and safe_write128
b0ab09fb Implement psllq (660ff3)
9935e5d4 Optimise safe_read64s and safe_read128s
af9ea1cc Log cl in cpuid only if relevant
be5fe23e Add multi-op cache (disabled by default through ENABLE_JIT macro) and JIT paging test (similar to QEMU test).
aa2f286e Don't initialise group_dirtiness with 1 as it increases the binary size significantly
b8e14ed9 Remove unused reg_xmm32s
bc726e03 Implement dbg_log for native code with format characters 'd' and 'x'
454039d6 Fix store task register
63a4b349 Remove unnecessary parens and clean up some log statements
4cc96814 Add logop and dbg_trace imports
7940655d Only inhibit interrupts if the interrupt flag was 0 in STI
876c68a7 Split create_tests into create_tests and generate_interpreter
aa82499f Move detection of string instructions to x86_table
f3840ec2 Move C ast to separate file
90400703 Skip tests for lfence/mfence/sfence, clarify their encoding
4a9d8204 elf: Hide log messages when log level is zero
a601c526 Allow setting log level via settings
8a624453 Add cpu_exception_hook to debug builds
f9e335bf Nasm: Test exceptions
599ad088 logop: Format instruction pointer as unsigned
f95cf22b Don't skip zero dividing tests
2a655a0e Remove get_seg_prefix_ds from read_moffs (preparation for calling read_moffs from the code generator)
bc580b71 Remove obsolete comment
e556cee0 Fix nasmtest dependencies in makefile and clean
dcb1e72b Use all cores on travis
86efa737 Replace all instances of u32 & 0xFFFF with the respective u16 accesses
98b9f439 Use u8 instead of bit-shifts and masks from u32
b43f6569 Replace all instances of u32 >> 16 with the respective u16 accesses
9bfa72c7 Remove unnecessary parens
9cf93734 Clean up remaining instance of u32 with a mask instead of u16
22d4117f Correct order of writes in virt_boundary_write32
6734c7c1 Fix keyboard on ios, fixes #105
858a4506 Add missing file, c_ast.js
1d62e39e Move instruction naming scheme into function
f4816852 Reorder some code
69d49788 Minor improvements
0493e05f Add util.js
af9000c1 Improve full test
e5feba31 Add missing export
c7c42065 Replace prefix_call with custom_resolve_modrm
3186e6ad Add support for "%%" format string to dbg_log_wasm for printf import
efe54fad Add barebones instrumentation profiler (disabled by default).
c9f0d462 Implement movlps m64, xmm and enable its test
42869a12 Add tests for cross-page reads/writes confirmed with byte reads/writes
d68976ea Mask word values in port byte reads
9758d51e Add PS2_LOG_VERBOSE
5f52f037 Update NASM Makefile to include all dependencies to prevent unnecessary recompilation
2c71f927 Have NASM test generator use a seedable PRNG to allow for faster incremental tests
e4aa45bb Add chunk{16,32}_rw paging tests; instructions that read and write to memory
bdf538a2 add codegen to cpu constructor
aa76ce8e add resolve_modrm16
14d7ecf1 refactor codegen
b710319f [rebased] Merge branch codegen
0565ea42 minor refactoring
071dff3f temporary fix for automatic cast warnings
57c504f2 fix modrm16 issue
c2db5d9e jit modrm32
85c04245 reinstate modrm_fn0 and modrm_fn1
be65dafd add ip and previous ip manipulating functions
ae00ef89 update codegen js interface
530a74fa squashed commit for refactor
2c692199 add codegen-test to build
c15afe68 prefix gen to codegen api
c9611533 codegen tests fixes
Fabian 4 years ago
parent
commit
975a06269e
63 changed files with 5043 additions and 2349 deletions
  1. 1 0
      .gitignore
  2. 2 1
      .travis-run-nasm.sh
  3. 33 11
      Makefile
  4. 4 2
      debug.html
  5. 97 0
      gen/c_ast.js
  6. 439 0
      gen/generate_interpreter.js
  7. 14 0
      gen/util.js
  8. 648 0
      gen/x86_table.js
  9. 3 2
      index.html
  10. 1 1
      loader.js
  11. 44 0
      src/browser/lib.js
  12. 13 2
      src/browser/main.js
  13. 1 1
      src/browser/screen.js
  14. 35 21
      src/browser/starter.js
  15. 91 0
      src/codegen.js
  16. 5 0
      src/const.js
  17. 150 18
      src/cpu.js
  18. 2 0
      src/debug.js
  19. 3 3
      src/elf.js
  20. 1 1
      src/externs.js
  21. 2 2
      src/ide.js
  22. 34 9
      src/log.js
  23. 3 2
      src/main.js
  24. 18 9
      src/memory.js
  25. 5 1
      src/native/all.c
  26. 33 60
      src/native/arith.c
  27. 332 0
      src/native/codegen/api.c
  28. 331 0
      src/native/codegen/codegen.h
  29. 132 0
      src/native/codegen/cstring.h
  30. 135 0
      src/native/codegen/util.h
  31. 212 0
      src/native/codegen/wasm_opcodes.h
  32. 14 0
      src/native/const.h
  33. 295 328
      src/native/cpu.c
  34. 2 2
      src/native/fpu.c
  35. 15 24
      src/native/global_pointers.h
  36. 305 332
      src/native/instructions.c
  37. 359 405
      src/native/instructions_0f.c
  38. 4 1
      src/native/log.c
  39. 101 10
      src/native/memory.c
  40. 64 26
      src/native/misc_instr.c
  41. 7 0
      src/native/modrm.c
  42. 67 0
      src/native/profiler/profiler.c
  43. 29 0
      src/native/profiler/profiler.h
  44. 49 0
      src/native/shared.h
  45. 49 48
      src/native/string.c
  46. 12 2
      src/ps2.js
  47. 144 0
      tests/codegen.js
  48. 13 3
      tests/full/run.js
  49. 11 0
      tests/jit-paging/Makefile
  50. 11 0
      tests/jit-paging/README.md
  51. 75 0
      tests/jit-paging/run.js
  52. 84 0
      tests/jit-paging/test-jit.c
  53. 3 1
      tests/nasm/Makefile
  54. 89 946
      tests/nasm/create_tests.js
  55. 18 16
      tests/nasm/gdbauto
  56. 1 1
      tests/nasm/header.inc
  57. 21 0
      tests/nasm/prand.js
  58. 66 11
      tests/nasm/run.js
  59. 1 1
      tests/qemu/Makefile
  60. 67 11
      tests/qemu/compiler.h
  61. 5 1
      tests/qemu/run.js
  62. 232 32
      tests/qemu/test-i386.c
  63. 6 2
      v86.css

+ 1 - 0
.gitignore

@@ -3,6 +3,7 @@
 deploy.sh
 screenshots/
 tests/qemu/test-i386
+tests/jit-paging/test-jit
 *.map
 build/
 closure-compiler/

+ 2 - 1
.travis-run-nasm.sh

@@ -1,3 +1,4 @@
 #!/usr/bin/env bash
 set -e
-make nasmtests
+./tests/nasm/create_tests.js
+make -j $(nproc --all) nasmtests

+ 33 - 11
Makefile

@@ -76,7 +76,7 @@ CORE_FILES=const.js config.js io.js main.js lib.js fpu.js ide.js pci.js floppy.j
 	   dma.js pit.js vga.js ps2.js pic.js rtc.js uart.js hpet.js acpi.js apic.js ioapic.js \
 	   state.js ne2k.js virtio.js bus.js log.js \
 	   cpu.js translate.js modrm.js string.js arith.js misc_instr.js instructions.js debug.js \
-	   elf.js
+	   elf.js codegen.js
 LIB_FILES=9p.js filesystem.js jor1k.js marshall.js utf8.js
 BROWSER_FILES=screen.js \
 	      keyboard.js mouse.js serial.js \
@@ -135,14 +135,15 @@ build/libv86-debug.js: $(CLOSURE) src/*.js lib/*.js src/browser/*.js
 		--js $(BROWSER_FILES)\
 		--js $(LIB_FILES)
 
-build/v86.wasm: src/native/*.c src/native/*.h
+build/v86.wasm: src/native/*.c src/native/*.h src/native/codegen/*.c src/native/codegen/*.h
 	mkdir -p build
 	-ls -lh build/v86.wasm
 	# --llvm-opts 3
 	# -Wno-extra-semi
 	# EMCC_DEBUG=1  EMCC_WASM_BACKEND=1
 	# -fno-inline
-	emcc src/native/all.c \
+	emcc src/native/all.c src/native/codegen/api.c \
+	    -Isrc/native/ -Isrc/native/profiler/ \
 	    -Wall -Wpedantic -Wextra \
 	    -DDEBUG=false \
 	    -DNDEBUG \
@@ -150,6 +151,7 @@ build/v86.wasm: src/native/*.c src/native/*.h
 	    -fcolor-diagnostics \
 	    -fwrapv \
 	    --llvm-opts 3 \
+	    --llvm-lto 3 \
 	    -O3 \
 	    -g4 \
 	    -s LEGALIZE_JS_FFI=0 \
@@ -157,8 +159,9 @@ build/v86.wasm: src/native/*.c src/native/*.h
 	    -s WASM=1 -s SIDE_MODULE=1 -o build/v86.wasm
 	ls -lh build/v86.wasm
 
-build/v86-debug.wasm: src/native/*.c src/native/*.h
-	emcc src/native/all.c \
+build/v86-debug.wasm: src/native/*.c src/native/*.h src/native/codegen/*.c src/native/codegen/*.h
+	emcc src/native/all.c src/native/codegen/api.c \
+	    -Isrc/native/ -Isrc/native/profiler/ \
 	    -Wall -Wpedantic -Wextra \
 	    -Wno-bitwise-op-parentheses -Wno-gnu-binary-literal \
 	    -fcolor-diagnostics \
@@ -170,12 +173,27 @@ build/v86-debug.wasm: src/native/*.c src/native/*.h
 	    -s WASM=1 -s SIDE_MODULE=1 -o build/v86-debug.wasm
 	ls -lh build/v86-debug.wasm
 
+build/codegen-test.wasm: src/native/*.c src/native/*.h src/native/codegen/*.c src/native/codegen/*.h
+	emcc src/native/codegen/api.c \
+	    -Wall -Wpedantic -Wextra \
+	    -Wno-bitwise-op-parentheses -Wno-gnu-binary-literal \
+	    -fcolor-diagnostics \
+	    -fwrapv \
+	    -Os \
+	    -g4 \
+	    -s LEGALIZE_JS_FFI=0 \
+	    -s "BINARYEN_TRAP_MODE='allow'" \
+	    -s WASM=1 -s SIDE_MODULE=1 -o build/codegen-test.wasm
+	ls -lh build/codegen-test.wasm
+
 clean:
 	-rm build/libv86.js
+	-rm build/libv86-debug.js
 	-rm build/v86_all.js
-	-rm build/libv86.js.map
-	-rm build/v86_all.js.map
 	-rm build/v86.wasm
+	-rm build/v86-debug.wasm
+	-rm build/*.map
+	-rm build/*.wast
 	$(MAKE) -C $(NASM_TEST_DIR) clean
 
 run:
@@ -202,15 +220,19 @@ $(CLOSURE):
 tests: build/libv86.js build/v86.wasm
 	./tests/full/run.js
 
-nasmtests: build/libv86.js build/v86.wasm
+nasmtests: build/libv86-debug.js build/v86-debug.wasm
 	$(MAKE) -C $(NASM_TEST_DIR) all
 	$(NASM_TEST_DIR)/run.js
 
+jitpagingtests: build/libv86.js build/v86.wasm
+	$(MAKE) -C tests/jit-paging test-jit
+	./tests/jit-paging/run.js
+
 qemutests: build/libv86.js build/v86.wasm
 	$(MAKE) -C tests/qemu test-i386
-	./tests/qemu/run.js > result
-	./tests/qemu/test-i386 > reference
-	diff result reference
+	./tests/qemu/run.js > /tmp/v86-test-result
+	./tests/qemu/test-i386 > /tmp/v86-test-reference
+	diff /tmp/v86-test-result /tmp/v86-test-reference
 
 kvm-unit-test: build/libv86.js build/v86.wasm
 	(cd tests/kvm-unit-tests && ./configure)

+ 4 - 2
debug.html

@@ -185,9 +185,11 @@
 <div id="screen_container" style="display: none">
     <div id="screen"></div>
     <canvas id="vga"></canvas>
-</div>
 
-<input type="text" class="phone_keyboard" style="display: none">
+    <div style="position: absolute; top: 0; z-index: 10">
+        <textarea class="phone_keyboard"></textarea>
+    </div>
+</div>
 
 <div id="runtime_infos" style="display: none">
     Running: <span id="running_time">0s</span> <br>

+ 97 - 0
gen/c_ast.js

@@ -0,0 +1,97 @@
+"use strict";
+
+function repeat(s, n)
+{
+    let out = "";
+    for(let i = 0; i < n; i++) out += s;
+    return out;
+}
+
+function indent(lines, how_much)
+{
+    return lines.map(line => repeat(" ", how_much) + line);
+}
+
+function print_syntax_tree(statements)
+{
+    let code = [];
+
+    for(let statement of statements)
+    {
+        if(typeof statement === "string")
+        {
+            code.push(statement);
+        }
+        else if(statement.type === "switch")
+        {
+            console.assert(statement.condition);
+
+            const cases = [];
+
+            for(let case_ of statement.cases)
+            {
+                console.assert(case_.conditions.length >= 1);
+
+                for(let condition of case_.conditions)
+                {
+                    cases.push(`case ${condition}:`);
+                }
+
+                cases.push(`{`);
+                cases.push.apply(cases, indent(print_syntax_tree(case_.body), 4));
+                cases.push(`}`);
+                cases.push(`break;`);
+            }
+
+            if(statement.default_case)
+            {
+                cases.push(`default:`);
+                cases.push.apply(cases, indent(print_syntax_tree(statement.default_case.body), 4));
+            }
+
+            code.push(`switch(${statement.condition})`);
+            code.push(`{`);
+            code.push.apply(code, indent(cases, 4));
+            code.push(`}`);
+        }
+        else if(statement.type === "if-else")
+        {
+            console.assert(statement.if_blocks.length >= 1);
+
+            let first_if_block = statement.if_blocks[0];
+
+            code.push(`if(${first_if_block.condition})`);
+            code.push(`{`);
+            code.push.apply(code, indent(print_syntax_tree(first_if_block.body), 4));
+            code.push(`}`);
+
+            for(let i = 1; i < statement.if_blocks.length; i++)
+            {
+                let if_block = statement.if_blocks[i];
+
+                code.push(`else if(${if_block.condition})`);
+                code.push(`{`);
+                code.push.apply(code, indent(print_syntax_tree(if_block.body), 4));
+                code.push(`}`);
+            }
+
+            if(statement.else_block)
+            {
+                code.push(`else`);
+                code.push(`{`);
+                code.push.apply(code, indent(print_syntax_tree(statement.else_block.body), 4));
+                code.push(`}`);
+            }
+        }
+        else
+        {
+            console.assert(false, "Unexpected type: " + statement.type);
+        }
+    }
+
+    return code;
+}
+
+module.exports = {
+    print_syntax_tree,
+};

+ 439 - 0
gen/generate_interpreter.js

@@ -0,0 +1,439 @@
+#!/usr/bin/env node
+"use strict";
+
+const fs = require("fs");
+const encodings = require("./x86_table");
+const c_ast = require("./c_ast");
+const { hex } = require("./util");
+
+gen_table();
+
+
+function gen_read_imm_call(op, size_variant)
+{
+    let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
+
+    if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
+    {
+        if(op.imm8)
+        {
+            return "read_imm8()";
+        }
+        else if(op.imm8s)
+        {
+            return "read_imm8s()";
+        }
+        else
+        {
+            if(op.immaddr)
+            {
+                // immaddr: depends on address size
+                return "read_moffs()";
+            }
+            else
+            {
+                console.assert(op.imm1632 || op.imm16 || op.imm32);
+
+                if(op.imm1632 && size === 16 || op.imm16)
+                {
+                    return "read_imm16()";
+                }
+                else
+                {
+                    console.assert(op.imm1632 && size === 32 || op.imm32);
+                    return "read_imm32s()";
+                }
+            }
+        }
+    }
+    else
+    {
+        return undefined;
+    }
+}
+
+function gen_call(name, args)
+{
+    args = args || [];
+    return `${name}(${args.join(", ")});`;
+}
+
+function gen_modrm_mem_reg_split(name, mem_args, reg_args)
+{
+    return {
+        type: "if-else",
+        if_blocks: [{
+            condition: "modrm_byte < 0xC0",
+            body: [gen_call(`${name}_mem`, mem_args)],
+        }],
+        else_block: {
+            body: [gen_call(`${name}_reg`, reg_args)],
+        },
+    };
+}
+
+/*
+ * Current naming scheme:
+ * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
+ */
+
+function make_instruction_name(encoding, size, prefix_variant)
+{
+    const suffix = encoding.os ? String(size) : "";
+    const opcode_hex = hex(encoding.opcode & 0xFF, 2);
+    const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
+    const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
+    const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
+
+    return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
+}
+
+function gen_instruction_body(encodings, size)
+{
+    const encoding = encodings[0];
+
+    let has_66 = false;
+    let has_F2 = false;
+    let has_F3 = false;
+
+    for(let e of encodings)
+    {
+        if((e.opcode >>> 16) === 0x66) has_66 = true;
+        if((e.opcode >>> 16) === 0xF2) has_F2 = true;
+        if((e.opcode >>> 16) === 0xF3) has_F3 = true;
+    }
+
+    if(has_66 || has_F2 || has_F3)
+    {
+        console.assert((encoding.opcode & 0xFF00) === 0x0F00);
+    }
+
+    if(encoding.fixed_g !== undefined)
+    {
+        // instruction with modrm byte where the middle 3 bits encode the instruction
+
+        // group by opcode without prefix plus middle bits of modrm byte
+        let cases = encodings.reduce((cases_by_opcode, case_) => {
+            console.assert(typeof case_.fixed_g === "number");
+            cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
+            return cases_by_opcode;
+        }, Object.create(null));
+        cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
+
+        return [
+            "int32_t modrm_byte = read_imm8();",
+            {
+                type: "switch",
+                condition: "modrm_byte >> 3 & 7",
+                cases: cases.map(case_ => {
+                    const fixed_g = case_.fixed_g;
+                    const instruction_name = make_instruction_name(case_, size, undefined);
+
+                    const modrm_resolve = case_.custom_modrm_resolve ? instruction_name + "_modrm_resolve" : "modrm_resolve";
+                    const mem_args = [`${modrm_resolve}(modrm_byte)`];
+                    const reg_args = ["modrm_byte & 7"];
+
+                    const imm_read = gen_read_imm_call(case_, size);
+                    if(imm_read)
+                    {
+                        mem_args.push(imm_read);
+                        reg_args.push(imm_read);
+                    }
+
+                    if(has_66 || has_F2 || has_F3)
+                    {
+                        const if_blocks = [];
+
+                        if(has_66) {
+                            const name = make_instruction_name(case_, size, 0x66);
+                            const body = [gen_modrm_mem_reg_split(name, mem_args, reg_args)];
+                            if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
+                        }
+                        if(has_F2) {
+                            const name = make_instruction_name(case_, size, 0xF2);
+                            const body = [gen_modrm_mem_reg_split(name, mem_args, reg_args)];
+                            if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
+                        }
+                        if(has_F3) {
+                            const name = make_instruction_name(case_, size, 0xF3);
+                            const body = [gen_modrm_mem_reg_split(name, mem_args, reg_args)];
+                            if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
+                        }
+
+                        const else_block = {
+                            body: [gen_modrm_mem_reg_split(instruction_name, mem_args, reg_args)],
+                        };
+
+                        return {
+                            conditions: [fixed_g],
+                            body: [
+                                "int32_t prefixes_ = *prefixes;",
+                                {
+                                    type: "if-else",
+                                    if_blocks,
+                                    else_block,
+                                },
+                            ],
+                        };
+                    }
+                    else
+                    {
+                        const body = [gen_modrm_mem_reg_split(instruction_name, mem_args, reg_args)];
+
+                        return {
+                            conditions: [fixed_g],
+                            body,
+                        };
+                    }
+                }),
+
+                default_case: {
+                    body: [
+                        "assert(false);",
+                        "trigger_ud();",
+                    ],
+                }
+            }
+        ];
+    }
+    else if(has_66 || has_F2 || has_F3)
+    {
+        // instruction withoud modrm byte but with prefix
+
+        console.assert(encoding.e);
+        console.assert(!encoding.ignore_mod);
+        console.assert(!encoding.requires_prefix_call, "Unexpected instruction (66/f2/f3 with prefix call)");
+
+        const imm_read = gen_read_imm_call(encoding, size);
+        const modrm_resolve = encoding.custom_modrm_resolve ? instruction_name + "_modrm_resolve" : "modrm_resolve";
+        const mem_args = [`${modrm_resolve}(modrm_byte)`, "modrm_byte >> 3 & 7"];
+        const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
+
+        if(imm_read)
+        {
+            mem_args.push(imm_read);
+            reg_args.push(imm_read);
+        }
+
+        const if_blocks = [];
+
+        if(has_66) {
+            const name = make_instruction_name(encoding, size, 0x66);
+            const body = [gen_modrm_mem_reg_split(name, mem_args, reg_args)];
+            if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
+        }
+        if(has_F2) {
+            const name = make_instruction_name(encoding, size, 0xF2);
+            const body = [gen_modrm_mem_reg_split(name, mem_args, reg_args)];
+            if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
+        }
+        if(has_F3) {
+            const name = make_instruction_name(encoding, size, 0xF3);
+            const body = [gen_modrm_mem_reg_split(name, mem_args, reg_args)];
+            if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
+        }
+
+        const else_block = {
+            body: [gen_modrm_mem_reg_split(make_instruction_name(encoding, size), mem_args, reg_args)],
+        };
+
+        return [
+            "int32_t modrm_byte = read_imm8();",
+            "int32_t prefixes_ = *prefixes;",
+            {
+                type: "if-else",
+                if_blocks,
+                else_block,
+            }
+        ];
+    }
+    else if(encoding.fixed_g === undefined && encoding.e)
+    {
+        // instruction with modrm byte where the middle 3 bits encode a register
+
+        console.assert(encodings.length === 1);
+
+        const instruction_name = make_instruction_name(encoding, size);
+        const imm_read = gen_read_imm_call(encoding, size);
+
+        if(encoding.ignore_mod)
+        {
+            console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
+
+            // Has modrm byte, but the 2 mod bits are ignored and both
+            // operands are always registers (0f20-0f24)
+
+            return [
+                "int32_t modrm_byte = read_imm8();",
+                gen_call(instruction_name, ["modrm_byte & 7", "modrm_byte >> 3 & 7"]),
+            ];
+        }
+        else
+        {
+            const modrm_resolve = encoding.custom_modrm_resolve ? instruction_name + "_modrm_resolve" : "modrm_resolve";
+            const mem_args = [`${modrm_resolve}(modrm_byte)`, "modrm_byte >> 3 & 7"];
+            const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
+
+            if(imm_read)
+            {
+                mem_args.push(imm_read);
+                reg_args.push(imm_read);
+            }
+
+            return [
+                "int32_t modrm_byte = read_imm8();",
+                gen_modrm_mem_reg_split(instruction_name, mem_args, reg_args),
+            ];
+        }
+    }
+    else
+    {
+        // instruction without modrm byte or prefix
+
+        const imm_read = gen_read_imm_call(encoding, size);
+        const instruction_name = make_instruction_name(encoding, size);
+
+        const args = [];
+
+        if(imm_read)
+        {
+            args.push(imm_read);
+        }
+
+        if(encoding.extra_imm16)
+        {
+            console.assert(imm_read);
+            args.push("read_imm16()");
+        }
+        else if(encoding.extra_imm8)
+        {
+            console.assert(imm_read);
+            args.push("read_imm8()");
+        }
+
+        return [gen_call(instruction_name, args)];
+    }
+}
+
+function gen_table()
+{
+    let by_opcode = Object.create(null);
+    let by_opcode0f = Object.create(null);
+
+    for(let o of encodings)
+    {
+        let opcode = o.opcode;
+
+        if(opcode >= 0x100)
+        {
+            if((opcode & 0xFF00) === 0x0F00)
+            {
+                opcode &= 0xFF;
+                by_opcode0f[opcode] = by_opcode0f[opcode] || [];
+                by_opcode0f[opcode].push(o);
+            }
+        }
+        else
+        {
+            by_opcode[opcode] = by_opcode[opcode] || [];
+            by_opcode[opcode].push(o);
+        }
+    }
+
+    let cases = [];
+    for(let opcode = 0; opcode < 0x100; opcode++)
+    {
+        let encoding = by_opcode[opcode];
+        console.assert(encoding && encoding.length);
+
+        let opcode_hex = hex(opcode, 2);
+
+        if(encoding[0].os)
+        {
+            cases.push({
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, 16),
+            });
+            cases.push({
+                conditions: [`0x${opcode_hex}|0x100`],
+                body: gen_instruction_body(encoding, 32),
+            });
+        }
+        else
+        {
+            cases.push({
+                conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
+                body: gen_instruction_body(encoding, undefined),
+            });
+        }
+    }
+    const table = {
+        type: "switch",
+        condition: "opcode",
+        cases,
+        default_case: {
+            body: ["assert(false);"]
+        },
+    };
+    fs.writeFileSync("/tmp/table", c_ast.print_syntax_tree([table]).join("\n") + "\n");
+
+    const cases0f_16 = [];
+    const cases0f_32 = [];
+    for(let opcode = 0; opcode < 0x100; opcode++)
+    {
+        let encoding = by_opcode0f[opcode];
+
+        if(!encoding)
+        {
+            encoding = [
+                {
+                    opcode: 0x0F00 | opcode,
+                },
+            ];
+        }
+
+        console.assert(encoding && encoding.length);
+
+        let opcode_hex = hex(opcode, 2);
+
+        if(encoding[0].os)
+        {
+            cases0f_16.push({
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, 16),
+            });
+            cases0f_32.push({
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, 32),
+            });
+        }
+        else
+        {
+            let block = {
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, undefined),
+            };
+            cases0f_16.push(block);
+            cases0f_32.push(block);
+        }
+    }
+
+    const table0f_16 = {
+        type: "switch",
+        condition: "opcode",
+        cases: cases0f_16,
+        default_case: {
+            body: ["assert(false);"]
+        },
+    };
+    const table0f_32 = {
+        type: "switch",
+        condition: "opcode",
+        cases: cases0f_32,
+        default_case: {
+            body: ["assert(false);"]
+        },
+    };
+    fs.writeFileSync("/tmp/table0f_16", c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n");
+    fs.writeFileSync("/tmp/table0f_32", c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n");
+}

+ 14 - 0
gen/util.js

@@ -0,0 +1,14 @@
+"use strict";
+
+
+function hex(n, pad)
+{
+    pad = pad || 0;
+    let s = n.toString(16).toUpperCase();
+    while(s.length < pad) s = "0" + s;
+    return s;
+}
+
+module.exports = {
+    hex,
+};

+ 648 - 0
gen/x86_table.js

@@ -0,0 +1,648 @@
+"use strict";
+
+// http://ref.x86asm.net/coder32.html
+
+const zf = 1 << 6;
+const of = 1 << 11;
+const cf = 1 << 0;
+const af = 1 << 4;
+const pf = 1 << 2;
+const sf = 1 << 7;
+
+// TODO:
+// - describe which registers are written and read
+
+// os: the instruction behaves differently depending on the operand size
+const encodings = [
+    { opcode: 0x06, os: 1, skip: 1, },
+    { opcode: 0x07, os: 1, skip: 1, },
+    { opcode: 0x0E, os: 1, skip: 1, },
+    { opcode: 0x0F, os: 1, prefix: 1, },
+    { opcode: 0x16, os: 1, skip: 1, },
+    { opcode: 0x17, os: 1, skip: 1, },
+    { opcode: 0x1E, os: 1, skip: 1, },
+    { opcode: 0x1F, os: 1, skip: 1, },
+    { opcode: 0x26, prefix: 1, },
+    { opcode: 0x27, mask_flags: of, },
+    { opcode: 0x2E, prefix: 1, },
+    { opcode: 0x2F, mask_flags: of, },
+    { opcode: 0x36, prefix: 1, },
+    { opcode: 0x37, mask_flags: of | sf | pf | zf, },
+    { opcode: 0x3E, prefix: 1, },
+    { opcode: 0x3F, mask_flags: of | sf | pf | zf, },
+
+    { opcode: 0x40, os: 1, },
+    { opcode: 0x41, os: 1, },
+    { opcode: 0x42, os: 1, },
+    { opcode: 0x43, os: 1, },
+    { opcode: 0x44, os: 1, },
+    { opcode: 0x45, os: 1, },
+    { opcode: 0x46, os: 1, },
+    { opcode: 0x47, os: 1, },
+
+    { opcode: 0x48, os: 1, },
+    { opcode: 0x49, os: 1, },
+    { opcode: 0x4A, os: 1, },
+    { opcode: 0x4B, os: 1, },
+    { opcode: 0x4C, os: 1, },
+    { opcode: 0x4D, os: 1, },
+    { opcode: 0x4E, os: 1, },
+    { opcode: 0x4F, os: 1, },
+
+    { opcode: 0x50, os: 1, },
+    { opcode: 0x51, os: 1, },
+    { opcode: 0x52, os: 1, },
+    { opcode: 0x53, os: 1, },
+    { opcode: 0x54, os: 1, },
+    { opcode: 0x55, os: 1, },
+    { opcode: 0x56, os: 1, },
+    { opcode: 0x57, os: 1, },
+
+    { opcode: 0x58, os: 1, },
+    { opcode: 0x59, os: 1, },
+    { opcode: 0x5A, os: 1, },
+    { opcode: 0x5B, os: 1, },
+    { opcode: 0x5C, os: 1, },
+    { opcode: 0x5D, os: 1, },
+    { opcode: 0x5E, os: 1, },
+    { opcode: 0x5F, os: 1, },
+
+    { opcode: 0x60, os: 1, },
+    { opcode: 0x61, os: 1, },
+    { opcode: 0x62, e: 1, skip: 1, },
+    { opcode: 0x63, e: 1, },
+    { opcode: 0x64, prefix: 1, },
+    { opcode: 0x65, prefix: 1, },
+    { opcode: 0x66, prefix: 1, },
+    { opcode: 0x67, prefix: 1, },
+
+    { opcode: 0x68, os: 1, imm1632: 1, },
+    { opcode: 0x69, os: 1, e: 1, imm1632: 1, mask_flags: af, }, // zf?
+    { opcode: 0x6A, os: 1, imm8s: 1, },
+    { opcode: 0x6B, os: 1, e: 1, imm8s: 1, mask_flags: af, }, // zf?
+
+    { opcode: 0x6C, is_string: 1, skip: 1, },
+    { opcode: 0x6D, is_string: 1, os: 1, skip: 1, },
+    { opcode: 0x6E, is_string: 1, skip: 1, },
+    { opcode: 0x6F, is_string: 1, os: 1, skip: 1, },
+
+    { opcode: 0x84, e: 1, },
+    { opcode: 0x85, os: 1, e: 1, },
+    { opcode: 0x86, e: 1, },
+    { opcode: 0x87, os: 1, e: 1, },
+    { opcode: 0x88, e: 1, },
+    { opcode: 0x89, os: 1, e: 1, },
+    { opcode: 0x8A, e: 1, },
+    { opcode: 0x8B, os: 1, e: 1, },
+
+    { opcode: 0x8C, os: 1, e: 1, skip: 1, },
+    { opcode: 0x8D, os: 1, e: 1, only_mem: 1, custom_modrm_resolve: 1, }, // lea
+    { opcode: 0x8E, e: 1, skip: 1, },
+    { opcode: 0x8F, os: 1, e: 1, fixed_g: 0, custom_modrm_resolve: 1, }, // pop r/m
+
+    { opcode: 0x90, },
+    { opcode: 0x91, os: 1, },
+    { opcode: 0x92, os: 1, },
+    { opcode: 0x93, os: 1, },
+    { opcode: 0x94, os: 1, },
+    { opcode: 0x95, os: 1, },
+    { opcode: 0x96, os: 1, },
+    { opcode: 0x97, os: 1, },
+
+    { opcode: 0x98, os: 1, },
+    { opcode: 0x99, os: 1, },
+    { opcode: 0x9A, os: 1, imm1632: 1, extra_imm16: 1, skip: 1, }, // callf
+    { opcode: 0x9B, skip: 1, },
+    { opcode: 0x9C, os: 1, },
+    { opcode: 0x9D, os: 1, skip: 1, }, // popf
+    { opcode: 0x9E, },
+    { opcode: 0x9F, },
+
+    { opcode: 0xA0, immaddr: 1, },
+    { opcode: 0xA1, os: 1, immaddr: 1, },
+    { opcode: 0xA2, immaddr: 1, },
+    { opcode: 0xA3, os: 1, immaddr: 1, },
+
+    { opcode: 0xA4, is_string: 1, },
+    { opcode: 0xA5, is_string: 1, os: 1, },
+    { opcode: 0xA6, is_string: 1, },
+    { opcode: 0xA7, is_string: 1, os: 1, },
+
+    { opcode: 0xA8, imm8: 1, },
+    { opcode: 0xA9, os: 1, imm1632: 1, },
+
+    { opcode: 0xAA, is_string: 1, },
+    { opcode: 0xAB, is_string: 1, os: 1, },
+    { opcode: 0xAC, is_string: 1, },
+    { opcode: 0xAD, is_string: 1, os: 1, },
+    { opcode: 0xAE, is_string: 1, },
+    { opcode: 0xAF, is_string: 1, os: 1, },
+
+    { opcode: 0xC2, os: 1, imm16: 1, skip: 1, },
+    { opcode: 0xC3, os: 1, skip: 1, },
+
+    { opcode: 0xC4, os: 1, e: 1, skip: 1, },
+    { opcode: 0xC5, os: 1, e: 1, skip: 1, },
+
+    { opcode: 0xC6, e: 1, fixed_g: 0, imm8: 1, },
+    { opcode: 0xC7, os: 1, e: 1, fixed_g: 0, imm1632: 1, },
+
+    { opcode: 0xC8, os: 1, imm16: 1, extra_imm8: 1, }, // enter
+    { opcode: 0xC9, os: 1, skip: 1, }, // leave: requires valid ebp
+    { opcode: 0xCA, os: 1, imm16: 1, skip: 1, },
+    { opcode: 0xCB, os: 1, skip: 1, },
+    { opcode: 0xCC, skip: 1, },
+    { opcode: 0xCD, imm8: 1, skip: 1, },
+    { opcode: 0xCE, skip: 1, },
+    { opcode: 0xCF, os: 1, skip: 1, },
+
+    { opcode: 0xD4, imm8: 1, }, // aam, may trigger #de
+    { opcode: 0xD5, imm8: 1, mask_flags: of | cf | af, },
+    { opcode: 0xD6, },
+    { opcode: 0xD7, skip: 1, },
+
+    { opcode: 0xD8, e: 1, skip: 1, },
+    { opcode: 0xD9, e: 1, skip: 1, },
+    { opcode: 0xDA, e: 1, skip: 1, },
+    { opcode: 0xDB, e: 1, skip: 1, },
+    { opcode: 0xDC, e: 1, skip: 1, },
+    { opcode: 0xDD, e: 1, skip: 1, },
+    { opcode: 0xDE, e: 1, skip: 1, },
+    { opcode: 0xDF, e: 1, skip: 1, },
+
+    { opcode: 0xE0, imm8s: 1, skip: 1, },
+    { opcode: 0xE1, imm8s: 1, skip: 1, },
+    { opcode: 0xE2, imm8s: 1, skip: 1, },
+    { opcode: 0xE3, imm8s: 1, skip: 1, },
+
+    { opcode: 0xE4, imm8: 1, skip: 1, },
+    { opcode: 0xE5, os: 1, imm8: 1, skip: 1, },
+    { opcode: 0xE6, imm8: 1, skip: 1, },
+    { opcode: 0xE7, os: 1, imm8: 1, skip: 1, },
+
+    { opcode: 0xE8, os: 1, imm1632: 1, skip: 1, },
+    { opcode: 0xE9, os: 1, imm1632: 1, skip: 1, },
+    { opcode: 0xEA, os: 1, imm1632: 1, extra_imm16: 1, skip: 1, }, // jmpf
+    { opcode: 0xEB, imm8s: 1, skip: 1, },
+
+    { opcode: 0xEC, skip: 1, },
+    { opcode: 0xED, os: 1, skip: 1, },
+    { opcode: 0xEE, skip: 1, },
+    { opcode: 0xEF, os: 1, skip: 1, },
+
+    { opcode: 0xF0, prefix: 1, },
+    { opcode: 0xF1, skip: 1, },
+    { opcode: 0xF2, prefix: 1, },
+    { opcode: 0xF3, prefix: 1, },
+    { opcode: 0xF4, skip: 1, },
+    { opcode: 0xF5, },
+
+    { opcode: 0xF6, fixed_g: 0, imm8: 1, },
+    { opcode: 0xF6, fixed_g: 1, imm8: 1, },
+    { opcode: 0xF6, fixed_g: 2, },
+    { opcode: 0xF6, fixed_g: 3, },
+    { opcode: 0xF6, fixed_g: 4, mask_flags: af | zf, },
+    { opcode: 0xF6, fixed_g: 5, mask_flags: af | zf, },
+    { opcode: 0xF6, fixed_g: 6, },
+    { opcode: 0xF6, fixed_g: 7, },
+
+    { opcode: 0xF7, os: 1, fixed_g: 0, imm1632: 1, },
+    { opcode: 0xF7, os: 1, fixed_g: 1, imm1632: 1, },
+    { opcode: 0xF7, os: 1, fixed_g: 2, },
+    { opcode: 0xF7, os: 1, fixed_g: 3, },
+    { opcode: 0xF7, os: 1, fixed_g: 4, mask_flags: zf | af, },
+    { opcode: 0xF7, os: 1, fixed_g: 5, mask_flags: zf | af, },
+    { opcode: 0xF7, os: 1, fixed_g: 6, },
+    { opcode: 0xF7, os: 1, fixed_g: 7, },
+
+    { opcode: 0xF8, },
+    { opcode: 0xF9, },
+    { opcode: 0xFA, skip: 1, },
+    { opcode: 0xFB, skip: 1, },
+    { opcode: 0xFC, },
+    { opcode: 0xFD, },
+
+    { opcode: 0xFE, e: 1, fixed_g: 0, },
+    { opcode: 0xFE, e: 1, fixed_g: 1, },
+    { opcode: 0xFF, os: 1, e: 1, fixed_g: 0, },
+    { opcode: 0xFF, os: 1, e: 1, fixed_g: 1, },
+    { opcode: 0xFF, os: 1, e: 1, fixed_g: 2, skip: 1, },
+    { opcode: 0xFF, os: 1, e: 1, fixed_g: 3, skip: 1, },
+    { opcode: 0xFF, os: 1, e: 1, fixed_g: 4, skip: 1, },
+    { opcode: 0xFF, os: 1, e: 1, fixed_g: 5, skip: 1, },
+    { opcode: 0xFF, os: 1, e: 1, fixed_g: 6, },
+
+    { opcode: 0x0F00, fixed_g: 0, e: 1, skip: 1 },
+    { opcode: 0x0F00, fixed_g: 1, e: 1, skip: 1 },
+    { opcode: 0x0F00, fixed_g: 2, e: 1, skip: 1 },
+    { opcode: 0x0F00, fixed_g: 3, e: 1, skip: 1 },
+    { opcode: 0x0F00, fixed_g: 4, e: 1, skip: 1 },
+    { opcode: 0x0F00, fixed_g: 5, e: 1, skip: 1 },
+
+    { opcode: 0x0F01, fixed_g: 0, e: 1, skip: 1 },
+    { opcode: 0x0F01, fixed_g: 1, e: 1, skip: 1 },
+    { opcode: 0x0F01, fixed_g: 2, e: 1, skip: 1 },
+    { opcode: 0x0F01, fixed_g: 3, e: 1, skip: 1 },
+    { opcode: 0x0F01, fixed_g: 4, e: 1, skip: 1 },
+    { opcode: 0x0F01, fixed_g: 6, e: 1, skip: 1 },
+    { opcode: 0x0F01, fixed_g: 7, e: 1, skip: 1 },
+
+    { opcode: 0x0F02, os: 1, e: 1, skip: 1 },
+    { opcode: 0x0F03, os: 1, e: 1, skip: 1 },
+    { opcode: 0x0F04, skip: 1 },
+    { opcode: 0x0F05, skip: 1 },
+    { opcode: 0x0F06, skip: 1 },
+    { opcode: 0x0F07, skip: 1 },
+    { opcode: 0x0F08, skip: 1 },
+    { opcode: 0x0F09, skip: 1 },
+    { opcode: 0x0F09, skip: 1 },
+    { opcode: 0x0F0A, skip: 1 },
+    { opcode: 0x0F0B, skip: 1 },
+    { opcode: 0x0F0C, skip: 1 },
+    { opcode: 0x0F0D, skip: 1 },
+    { opcode: 0x0F0E, skip: 1 },
+    { opcode: 0x0F0F, skip: 1 },
+
+    { opcode: 0x0F18, only_mem: 1, e: 1, },
+    { opcode: 0x0F1F, e: 1, },
+
+    { opcode: 0x0F20, ignore_mod: 1, e: 1, skip: 1 },
+    { opcode: 0x0F21, ignore_mod: 1, e: 1, skip: 1 },
+    { opcode: 0x0F22, ignore_mod: 1, e: 1, skip: 1 },
+    { opcode: 0x0F23, ignore_mod: 1, e: 1, skip: 1 },
+
+    { opcode: 0x0F30, skip: 1 },
+    { opcode: 0x0F31, skip: 1 },
+    { opcode: 0x0F32, skip: 1 },
+    { opcode: 0x0F33, skip: 1 },
+    { opcode: 0x0F34, skip: 1 },
+    { opcode: 0x0F35, skip: 1 },
+
+    { opcode: 0x0F40, e: 1, os: 1, },
+    { opcode: 0x0F41, e: 1, os: 1, },
+    { opcode: 0x0F42, e: 1, os: 1, },
+    { opcode: 0x0F43, e: 1, os: 1, },
+    { opcode: 0x0F44, e: 1, os: 1, },
+    { opcode: 0x0F45, e: 1, os: 1, },
+    { opcode: 0x0F46, e: 1, os: 1, },
+    { opcode: 0x0F47, e: 1, os: 1, },
+    { opcode: 0x0F48, e: 1, os: 1, },
+    { opcode: 0x0F49, e: 1, os: 1, },
+    { opcode: 0x0F4A, e: 1, os: 1, },
+    { opcode: 0x0F4B, e: 1, os: 1, },
+    { opcode: 0x0F4C, e: 1, os: 1, },
+    { opcode: 0x0F4D, e: 1, os: 1, },
+    { opcode: 0x0F4E, e: 1, os: 1, },
+    { opcode: 0x0F4F, e: 1, os: 1, },
+
+    { opcode: 0x0F80, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F81, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F82, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F83, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F84, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F85, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F86, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F87, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F88, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F89, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F8A, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F8B, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F8C, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F8D, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F8E, imm1632: 1, os: 1, skip: 1, },
+    { opcode: 0x0F8F, imm1632: 1, os: 1, skip: 1, },
+
+    { opcode: 0x0F90, e: 1, },
+    { opcode: 0x0F91, e: 1, },
+    { opcode: 0x0F92, e: 1, },
+    { opcode: 0x0F93, e: 1, },
+    { opcode: 0x0F94, e: 1, },
+    { opcode: 0x0F95, e: 1, },
+    { opcode: 0x0F96, e: 1, },
+    { opcode: 0x0F97, e: 1, },
+    { opcode: 0x0F98, e: 1, },
+    { opcode: 0x0F99, e: 1, },
+    { opcode: 0x0F9A, e: 1, },
+    { opcode: 0x0F9B, e: 1, },
+    { opcode: 0x0F9C, e: 1, },
+    { opcode: 0x0F9D, e: 1, },
+    { opcode: 0x0F9E, e: 1, },
+    { opcode: 0x0F9F, e: 1, },
+
+    { opcode: 0x0FA0, os: 1, skip: 1, },
+    { opcode: 0x0FA1, os: 1, skip: 1, },
+    { opcode: 0x0FA2, skip: 1, },
+
+    { opcode: 0x0FA8, os: 1, skip: 1, },
+    { opcode: 0x0FA9, os: 1, skip: 1, },
+
+    { opcode: 0x0FA3, os: 1, e: 1, only_reg: 1, }, // bt (can also index memory, but not supported by test right now)
+    { opcode: 0x0FAB, os: 1, e: 1, only_reg: 1, },
+    { opcode: 0x0FB3, os: 1, e: 1, only_reg: 1, },
+    { opcode: 0x0FBB, os: 1, e: 1, only_reg: 1, },
+
+    { opcode: 0x0FBA, os: 1, e: 1, fixed_g: 4, imm8: 1, only_reg: 1, }, // bt
+    { opcode: 0x0FBA, os: 1, e: 1, fixed_g: 5, imm8: 1, only_reg: 1, },
+    { opcode: 0x0FBA, os: 1, e: 1, fixed_g: 6, imm8: 1, only_reg: 1, },
+    { opcode: 0x0FBA, os: 1, e: 1, fixed_g: 7, imm8: 1, only_reg: 1, },
+
+    { opcode: 0x0FBC, os: 1, e: 1, mask_flags: af, }, // bsf
+    { opcode: 0x0FBD, os: 1, e: 1, mask_flags: af, },
+
+    // note: overflow flag only undefined if shift is > 1
+    { opcode: 0x0FA4, os: 1, e: 1, imm8: 1, mask_flags: af | of, }, // shld
+    { opcode: 0x0FA5, os: 1, e: 1, mask_flags: af | of, },
+    { opcode: 0x0FAC, os: 1, e: 1, imm8: 1, mask_flags: af | of, },
+    { opcode: 0x0FAD, os: 1, e: 1, mask_flags: af | of, },
+
+    { opcode: 0x0FAE, e: 1, fixed_g: 0, only_mem: 1, skip: 1, }, // fxsave, ...
+    { opcode: 0x0FAE, e: 1, fixed_g: 1, only_mem: 1, skip: 1, },
+    { opcode: 0x0FAE, e: 1, fixed_g: 2, only_mem: 1, skip: 1, },
+    { opcode: 0x0FAE, e: 1, fixed_g: 3, only_mem: 1, skip: 1, },
+    { opcode: 0x0FAE, e: 1, fixed_g: 4, only_mem: 1, skip: 1, },
+
+    { opcode: 0x0FAE, e: 1, fixed_g: 5, only_reg: 1, skip: 1, }, // lfence (reg, only 0), xrstor (mem)
+    { opcode: 0x0FAE, e: 1, fixed_g: 6, only_reg: 1, skip: 1, }, // mfence (reg, only 0)
+    { opcode: 0x0FAE, e: 1, fixed_g: 7, only_reg: 1, skip: 1, }, // sfence (reg, only 0), clflush (mem)
+
+    { opcode: 0x0FAF, os: 1, e: 1, mask_flags: af | zf }, // imul
+
+    { opcode: 0x0FB0, e: 1 }, // cmxchg
+    { opcode: 0x0FB1, os: 1, e: 1 },
+    { opcode: 0x0FC7, e: 1, fixed_g: 1, only_mem: 1, }, // cmpxchg8b (memory)
+    { opcode: 0x0FC7, e: 1, fixed_g: 6, only_reg: 1, skip: 1, }, // rdrand
+
+    { opcode: 0x0FB2, os: 1, e: 1, skip: 1, }, // lss, lfs, lgs
+    { opcode: 0x0FB4, os: 1, e: 1, skip: 1, },
+    { opcode: 0x0FB5, os: 1, e: 1, skip: 1, },
+
+    { opcode: 0x0FB6, os: 1, e: 1, }, // movzx
+    { opcode: 0x0FB7, os: 1, e: 1, },
+
+    { opcode: 0xF30FB8, os: 1, e: 1 }, // popcnt
+
+    { opcode: 0x0FBE, os: 1, e: 1, }, // movsx
+    { opcode: 0x0FBF, os: 1, e: 1, },
+
+    { opcode: 0x0FC0, e: 1, }, // xadd
+    { opcode: 0x0FC1, os: 1, e: 1, },
+
+    { opcode: 0x0FC8, }, // bswap
+    { opcode: 0x0FC9, },
+    { opcode: 0x0FCA, },
+    { opcode: 0x0FCB, },
+    { opcode: 0x0FCC, },
+    { opcode: 0x0FCD, },
+    { opcode: 0x0FCE, },
+    { opcode: 0x0FCF, },
+
+
+    // mmx, sse
+    // - Skipped are not implemented
+    // - Missing are sse3+, and floating point
+
+    { opcode: 0x0F12, e: 1, skip: 1, },
+    { opcode: 0x660F12, only_mem: 1, e: 1 },
+    { opcode: 0xF20F12, e: 1, skip: 1, },
+    { opcode: 0xF30F12, e: 1, skip: 1, },
+    { opcode: 0x0F13, only_mem: 1, e: 1 },
+    { opcode: 0x660F13, only_mem: 1, e: 1 },
+    { opcode: 0x0F14, e: 1, skip: 1, },
+    { opcode: 0x660F14, e: 1 },
+
+    { opcode: 0x0F28, e: 1 },
+    { opcode: 0x660F28, e: 1 },
+    { opcode: 0x0F29, e: 1 },
+    { opcode: 0x660F29, e: 1 },
+    { opcode: 0x0F2B, only_mem: 1, e: 1 },
+    { opcode: 0x660F2B, only_mem: 1, e: 1 },
+
+    { opcode: 0xF20F2C, e: 1, },
+    { opcode: 0x0F2C, e: 1, skip: 1, },
+    { opcode: 0xF30F2C, e: 1, skip: 1, },
+    { opcode: 0x660F2C, e: 1, skip: 1, },
+
+    { opcode: 0x0F54, e: 1 },
+    { opcode: 0x660F54, e: 1 },
+    { opcode: 0x0F57, e: 1 },
+    { opcode: 0x660F57, e: 1 },
+
+    { opcode: 0x660F60, e: 1 },
+    { opcode: 0x0F60, e: 1 },
+    { opcode: 0x660F61, e: 1 },
+    { opcode: 0x0F61, e: 1 },
+    { opcode: 0x660F62, e: 1, skip: 1, },
+    { opcode: 0x0F62, e: 1 },
+    { opcode: 0x660F63, e: 1, skip: 1, },
+    { opcode: 0x0F63, e: 1 },
+    { opcode: 0x660F64, e: 1, skip: 1, },
+    { opcode: 0x0F64, e: 1 },
+    { opcode: 0x660F65, e: 1, skip: 1, },
+    { opcode: 0x0F65, e: 1 },
+    { opcode: 0x660F66, e: 1, skip: 1, },
+    { opcode: 0x0F66, e: 1 },
+    { opcode: 0x660F67, e: 1 },
+    { opcode: 0x0F67, e: 1 },
+
+    { opcode: 0x660F68, e: 1 },
+    { opcode: 0x0F68, e: 1 },
+    { opcode: 0x660F69, e: 1, skip: 1, },
+    { opcode: 0x0F69, e: 1 },
+    { opcode: 0x660F6A, e: 1, skip: 1, },
+    { opcode: 0x0F6A, e: 1 },
+    { opcode: 0x660F6B, e: 1, skip: 1, },
+    { opcode: 0x0F6B, e: 1 },
+    { opcode: 0x660F6C, e: 1, skip: 1, },
+    { opcode: 0x660F6D, e: 1, skip: 1, },
+    { opcode: 0x660F6E, e: 1 },
+    { opcode: 0x0F6E, e: 1 },
+    { opcode: 0xF30F6F, e: 1 },
+    { opcode: 0x660F6F, e: 1 },
+    { opcode: 0x0F6F, e: 1 },
+
+    { opcode: 0x0F70, e: 1, imm8: 1, },
+    { opcode: 0x660F70, e: 1, imm8: 1, },
+    { opcode: 0xF20F70, e: 1, imm8: 1, },
+    { opcode: 0xF30F70, e: 1, imm8: 1, },
+
+    { opcode: 0x0F71, e: 1, fixed_g: 2, imm8: 1, only_reg: 1, },
+    { opcode: 0x660F71, e: 1, fixed_g: 2, imm8: 1, only_reg: 1, skip: 1, },
+    { opcode: 0x0F71, e: 1, fixed_g: 4, imm8: 1, only_reg: 1, },
+    { opcode: 0x660F71, e: 1, fixed_g: 4, imm8: 1, only_reg: 1, skip: 1, },
+    { opcode: 0x0F71, e: 1, fixed_g: 6, imm8: 1, only_reg: 1, },
+    { opcode: 0x660F71, e: 1, fixed_g: 6, imm8: 1, only_reg: 1, skip: 1, },
+
+    { opcode: 0x0F72, e: 1, fixed_g: 2, imm8: 1, only_reg: 1, },
+    { opcode: 0x660F72, e: 1, fixed_g: 2, imm8: 1, only_reg: 1, skip: 1, },
+    { opcode: 0x0F72, e: 1, fixed_g: 4, imm8: 1, only_reg: 1, },
+    { opcode: 0x660F72, e: 1, fixed_g: 4, imm8: 1, only_reg: 1, skip: 1, },
+    { opcode: 0x0F72, e: 1, fixed_g: 6, imm8: 1, only_reg: 1, },
+    { opcode: 0x660F72, e: 1, fixed_g: 6, imm8: 1, only_reg: 1, skip: 1, },
+
+    { opcode: 0x0F73, e: 1, fixed_g: 2, imm8: 1, only_reg: 1, },
+    { opcode: 0x660F73, e: 1, fixed_g: 2, imm8: 1, only_reg: 1, },
+    { opcode: 0x660F73, e: 1, fixed_g: 3, imm8: 1, only_reg: 1, skip: 1, },
+    { opcode: 0x0F73, e: 1, fixed_g: 6, imm8: 1, only_reg: 1, },
+    { opcode: 0x660F73, e: 1, fixed_g: 6, imm8: 1, only_reg: 1, skip: 1, },
+    { opcode: 0x660F73, e: 1, fixed_g: 7, imm8: 1, only_reg: 1, skip: 1, },
+
+    { opcode: 0x0F74, e: 1, },
+    { opcode: 0x660F74, e: 1, },
+    { opcode: 0x0F75, e: 1, },
+    { opcode: 0x660F75, e: 1, },
+    { opcode: 0x0F76, e: 1, },
+    { opcode: 0x660F76, e: 1, },
+    { opcode: 0x0F77 },
+
+    { opcode: 0x0F7E, e: 1 },
+    { opcode: 0x660F7E, e: 1 },
+    { opcode: 0xF30F7E, e: 1 },
+    { opcode: 0x0F7F, e: 1 },
+    { opcode: 0x660F7F, e: 1 },
+    { opcode: 0xF30F7F, e: 1 },
+
+    { opcode: 0x0FC3, e: 1, only_mem: 1, },
+
+    { opcode: 0x0FC5, e: 1, only_reg: 1, imm8: 1, skip: 1, },
+    { opcode: 0x660FC5, e: 1, only_reg: 1, imm8: 1, },
+
+    { opcode: 0x0FD1, e: 1 },
+    { opcode: 0x660FD1, e: 1, skip: 1, },
+    { opcode: 0x0FD2, e: 1 },
+    { opcode: 0x660FD2, e: 1, skip: 1, },
+    { opcode: 0x0FD3, e: 1 },
+    { opcode: 0x660FD3, e: 1 },
+    { opcode: 0x0FD4, e: 1, skip: 1, },
+    { opcode: 0x660FD4, e: 1, skip: 1, },
+    { opcode: 0x0FD5, e: 1 },
+    { opcode: 0x660FD5, e: 1 },
+
+    { opcode: 0x660FD6, e: 1 },
+    { opcode: 0xF20FD6, e: 1, skip: 1, },
+    { opcode: 0xF30FD6, e: 1, skip: 1, },
+    { opcode: 0x0FD7, e: 1, only_reg: 1, skip: 1, },
+    { opcode: 0x660FD7, e: 1, only_reg: 1, },
+
+    { opcode: 0x0FD8, e: 1 },
+    { opcode: 0x660FD8, e: 1, skip: 1, },
+    { opcode: 0x0FD9, e: 1 },
+    { opcode: 0x660FD9, e: 1, skip: 1, },
+    { opcode: 0x0FDA, e: 1, skip: 1, },
+    { opcode: 0x660FDA, e: 1 },
+    { opcode: 0x0FDB, e: 1 },
+    { opcode: 0x660FDB, e: 1, skip: 1, },
+    { opcode: 0x0FDC, e: 1 },
+    { opcode: 0x660FDC, e: 1 },
+    { opcode: 0x0FDD, e: 1 },
+    { opcode: 0x660FDD, e: 1 },
+    { opcode: 0x0FDE, e: 1, skip: 1, },
+    { opcode: 0x660FDE, e: 1 },
+    { opcode: 0x0FDF, e: 1 },
+    { opcode: 0x660FDF, e: 1, skip: 1, },
+
+    { opcode: 0x0FE0, e: 1, skip: 1, },
+    { opcode: 0x660FE0, e: 1, skip: 1, },
+    { opcode: 0x0FE1, e: 1 },
+    { opcode: 0x660FE1, e: 1, skip: 1, },
+    { opcode: 0x0FE2, e: 1 },
+    { opcode: 0x660FE2, e: 1, skip: 1, },
+    { opcode: 0x0FE3, e: 1, skip: 1, },
+    { opcode: 0x660FE3, e: 1, skip: 1, },
+    { opcode: 0x0FE4, e: 1, skip: 1, },
+    { opcode: 0x660FE4, e: 1 },
+    { opcode: 0x0FE5, e: 1 },
+    { opcode: 0x660FE5, e: 1, skip: 1, },
+
+    { opcode: 0x660FE6, e: 1, skip: 1, },
+    { opcode: 0xF20FE6, e: 1, skip: 1, },
+    { opcode: 0xF30FE6, e: 1, skip: 1, },
+    { opcode: 0x0FE7, e: 1, only_mem: 1, skip: 1, },
+    { opcode: 0x660FE7, e: 1, only_mem: 1, },
+
+    { opcode: 0x0FE8, e: 1 },
+    { opcode: 0x660FE8, e: 1, skip: 1, },
+    { opcode: 0x0FE9, e: 1 },
+    { opcode: 0x660FE9, e: 1, skip: 1, },
+    { opcode: 0x0FEA, e: 1, skip: 1, },
+    { opcode: 0x660FEA, e: 1, skip: 1, },
+    { opcode: 0x0FEB, e: 1 },
+    { opcode: 0x660FEB, e: 1 },
+    { opcode: 0x0FEC, e: 1 },
+    { opcode: 0x660FEC, e: 1, skip: 1, },
+    { opcode: 0x0FED, e: 1 },
+    { opcode: 0x660FED, e: 1, skip: 1, },
+    { opcode: 0x0FEE, e: 1, skip: 1, },
+    { opcode: 0x660FEE, e: 1, skip: 1, },
+    { opcode: 0x0FEF, e: 1 },
+    { opcode: 0x660FEF, e: 1 },
+
+    { opcode: 0x0FF1, e: 1 },
+    { opcode: 0x660FF1, e: 1, skip: 1, },
+    { opcode: 0x0FF2, e: 1 },
+    { opcode: 0x660FF2, e: 1, skip: 1, },
+    { opcode: 0x0FF3, e: 1 },
+    { opcode: 0x660FF3, e: 1, },
+    { opcode: 0x0FF4, e: 1, skip: 1, },
+    { opcode: 0x660FF4, e: 1, skip: 1, },
+    { opcode: 0x0FF5, e: 1 },
+    { opcode: 0x660FF5, e: 1, skip: 1, },
+    { opcode: 0x0FF6, e: 1, skip: 1, },
+    { opcode: 0x660FF6, e: 1, skip: 1, },
+    { opcode: 0x0FF7, e: 1, skip: 1, },
+    { opcode: 0x660FF7, e: 1, skip: 1, },
+
+    { opcode: 0x0FF8, e: 1 },
+    { opcode: 0x660FF8, e: 1, skip: 1, },
+    { opcode: 0x0FF9, e: 1 },
+    { opcode: 0x660FF9, e: 1, skip: 1, },
+    { opcode: 0x0FFA, e: 1 },
+    { opcode: 0x660FFA, e: 1 },
+    { opcode: 0x0FFB, e: 1, skip: 1, },
+    { opcode: 0x660FFB, e: 1, skip: 1, },
+    { opcode: 0x0FFC, e: 1 },
+    { opcode: 0x660FFC, e: 1, skip: 1, },
+    { opcode: 0x0FFD, e: 1 },
+    { opcode: 0x660FFD, e: 1, skip: 1, },
+    { opcode: 0x0FFE, e: 1 },
+    { opcode: 0x660FFE, e: 1, skip: 1, },
+];
+
+for(let i = 0; i < 8; i++)
+{
+    encodings.push.apply(encodings, [
+        { opcode: 0x00 | i << 3, e: 1, },
+        { opcode: 0x01 | i << 3, os: 1, e: 1, },
+        { opcode: 0x02 | i << 3, e: 1, },
+        { opcode: 0x03 | i << 3, os: 1, e: 1, },
+        { opcode: 0x04 | i << 3, eax: 1, imm8: 1, },
+        { opcode: 0x05 | i << 3, os: 1, eax: 1, imm1632: 1, },
+
+        { opcode: 0x70 | i, imm8s: 1, skip: 1, },
+        { opcode: 0x78 | i, imm8s: 1, skip: 1, },
+
+        { opcode: 0x80, e: 1, fixed_g: i, imm8: 1, },
+        { opcode: 0x81, os: 1, e: 1, fixed_g: i, imm1632: 1, },
+        { opcode: 0x82, e: 1, fixed_g: i, imm8: 1, },
+        { opcode: 0x83, os: 1, e: 1, fixed_g: i, imm8s: 1, },
+
+        { opcode: 0xB0 | i, imm8: 1, },
+        { opcode: 0xB8 | i, os: 1, imm1632: 1, },
+
+        // note: overflow flag only undefined if shift is > 1
+        // note: the adjust flag is undefined for shifts > 0 and unaffected by rotates
+        { opcode: 0xC0, e: 1, fixed_g: i, imm8: 1, mask_flags: of | af, },
+        { opcode: 0xC1, os: 1, e: 1, fixed_g: i, imm8: 1, mask_flags: of | af, },
+        { opcode: 0xD0, e: 1, fixed_g: i, mask_flags: af, },
+        { opcode: 0xD1, os: 1, e: 1, fixed_g: i, mask_flags: af, },
+        { opcode: 0xD2, e: 1, fixed_g: i, mask_flags: of | af, },
+        { opcode: 0xD3, os: 1, e: 1, fixed_g: i, mask_flags: of | af, },
+    ]);
+}
+
+encodings.sort((e1, e2) => {
+    let o1 = (e1.opcode & 0xFF00) === 0x0F00 ? e1.opcode & 0xFFFF : e1.opcode & 0xFF;
+    let o2 = (e2.opcode & 0xFF00) === 0x0F00 ? e2.opcode & 0xFFFF : e2.opcode & 0xFF;
+    return o1 - o2 || e1.fixed_g - e2.fixed_g;
+});
+
+module.exports = Object.freeze(encodings);

+ 3 - 2
index.html

@@ -156,11 +156,12 @@
 <div id="screen_container" style="display: none">
     <div id="screen"></div>
     <canvas id="vga"></canvas>
+    <div style="position: absolute; top: 0; z-index: 10">
+        <textarea class="phone_keyboard"></textarea>
+    </div>
 </div>
 
 
-<input type="text" class="phone_keyboard" style="display: none">
-
 <div id="runtime_infos" style="display: none">
     Running: <span id="running_time">0s</span> <br>
     Speed: <span id="speed">0</span>kIPS<br>

+ 1 - 1
loader.js

@@ -5,7 +5,7 @@
     "use strict";
 
     var CORE_FILES =
-        "const.js config.js log.js cpu.js debug.js translate.js modrm.js string.js arith.js misc_instr.js instructions.js " +
+        "const.js config.js log.js cpu.js debug.js translate.js modrm.js string.js arith.js misc_instr.js instructions.js codegen.js " +
         "io.js main.js lib.js ide.js fpu.js pci.js floppy.js " +
         "memory.js dma.js pit.js vga.js ps2.js pic.js rtc.js uart.js acpi.js apic.js ioapic.js hpet.js " +
         "ne2k.js state.js virtio.js bus.js elf.js";

+ 44 - 0
src/browser/lib.js

@@ -58,6 +58,50 @@ var ASYNC_SAFE = false;
         });
     };
 
+    v86util.add_download_button = function(data, filename)
+    {
+        let b = document.createElement("button");
+        b.textContent = "download";
+        document.body.appendChild(b);
+        b.onclick = function()
+        {
+            dump_file(data, filename);
+        };
+    };
+
+    function dump_file(ab, name)
+    {
+        if(!(ab instanceof Array))
+        {
+            ab = [ab];
+        }
+
+        var blob = new Blob(ab);
+        download(blob, name);
+    }
+
+    function download(file_or_blob, name)
+    {
+        var a = document.createElement("a");
+        a["download"] = name;
+        a.href = window.URL.createObjectURL(file_or_blob);
+        a.dataset["downloadurl"] = ["application/octet-stream", a["download"], a.href].join(":");
+
+        if(document.createEvent)
+        {
+            var ev = document.createEvent("MouseEvent");
+            ev.initMouseEvent("click", true, true, window,
+                0, 0, 0, 0, 0, false, false, false, false, 0, null);
+            a.dispatchEvent(ev);
+        }
+        else
+        {
+            a.click();
+        }
+
+        window.URL.revokeObjectURL(a.href);
+    }
+
     /**
      * @param {string} filename
      * @param {Object} options

+ 13 - 2
src/browser/main.js

@@ -787,8 +787,6 @@
         $("runtime_infos").style.display = "block";
         $("screen_container").style.display = "block";
 
-        document.getElementsByClassName("phone_keyboard")[0].style.display = "block";
-
         if(settings.filesystem)
         {
             init_filesystem_panel(emulator);
@@ -1197,6 +1195,19 @@
             }
         };
 
+        const phone_keyboard = document.getElementsByClassName("phone_keyboard")[0];
+
+        phone_keyboard.setAttribute("autocorrect", "off");
+        phone_keyboard.setAttribute("autocapitalize", "off");
+        phone_keyboard.setAttribute("spellcheck", "false");
+        phone_keyboard.tabIndex = 0;
+
+        $("screen_container").addEventListener("mousedown", (e) =>
+        {
+            e.preventDefault();
+            phone_keyboard.focus();
+        }, false);
+
         $("take_screenshot").onclick = function()
         {
             emulator.screen_make_screenshot();

+ 1 - 1
src/browser/screen.js

@@ -22,7 +22,7 @@ function ScreenAdapter(screen_container, bus)
         graphic_screen = screen_container.getElementsByTagName("canvas")[0],
         graphic_context = graphic_screen.getContext("2d"),
 
-        text_screen = graphic_screen.nextElementSibling || graphic_screen.previousElementSibling,
+        text_screen = screen_container.getElementsByTagName("div")[0],
         cursor_element = document.createElement("div");
 
     var

+ 35 - 21
src/browser/starter.js

@@ -97,18 +97,28 @@ function V86Starter(options)
     var mem;
     var mem8;
     var wasm_shared_funcs = {
-        "_throw_cpu_exception": () => { throw MAGIC_CPU_EXCEPTION; },
+        "_throw_cpu_exception": () => {
+            throw MAGIC_CPU_EXCEPTION;
+        },
+        "_cpu_exception_hook": (n) => {
+            return this["cpu_exception_hook"] && this["cpu_exception_hook"](n);
+        },
         "_hlt_op": function() { return cpu.hlt_op(); },
         "abort": function() { dbg_assert(false); },
         "_dbg_assert": function() { return cpu.dbg_assert.apply(cpu, arguments); },
         "_dbg_log": function() { return cpu.dbg_log.apply(cpu, arguments); },
+        "_dbg_trace": function() { return dbg_trace(); },
+        "_logop": function(eip, op) { return cpu.debug.logop(eip, op); },
         "_todo": function() { return cpu.todo.apply(cpu, arguments); },
         "_undefined_instruction": function() { return cpu.undefined_instruction.apply(cpu, arguments); },
-        "_unimplemented_sse": function() { return cpu.unimplemented_sse_wasm(); },
+        "_unimplemented_sse": function() { return cpu.unimplemented_sse(); },
         "_microtick": function() { return v86.microtick(); },
         "_get_rand_int": function() { return v86util.get_rand_int(); },
         "_has_rand_int": function() { return v86util.has_rand_int(); },
-        "_printf": function(offset) { dbg_log_wasm(mem, offset, [].slice.call(arguments, 1)); },
+        "_printf": function(format_string_offset, stack_top) {
+            dbg_assert(arguments.length === 2);
+            dbg_log_wasm(mem, format_string_offset, stack_top);
+        },
         "_memcpy_large": function(dest, source, length) {
             mem8.set(mem8.subarray(source, source + length), dest);
             return dest;
@@ -122,6 +132,11 @@ function V86Starter(options)
         "_switch_seg": function(reg, selector) { cpu.switch_seg(reg, selector); },
         "_iret16": function() { return cpu.iret16(); },
         "_iret32": function() { return cpu.iret32(); },
+        "_handle_irqs": function() { return cpu.handle_irqs(); },
+
+        //XXX: These are temporary for as long as we are testing the JIT
+        "_resolve_modrm16": function(modrm_byte) { return cpu.resolve_modrm16(modrm_byte); },
+        "_resolve_modrm32": function(modrm_byte) { return cpu.resolve_modrm32(modrm_byte); },
 
         "_io_port_read8": function(addr) { return cpu.io.port_read8(addr); },
         "_io_port_read16": function(addr) { return cpu.io.port_read16(addr); },
@@ -160,22 +175,12 @@ function V86Starter(options)
         "_math_pow": function(x, y) { return Math.pow(x, y); },
 
         "_do_page_translation": function() { return cpu.do_page_translation.apply(cpu, arguments); },
-        "_read_reg_e16": function() { return cpu.read_reg_e16.apply(cpu, arguments); },
-        "_read_reg_e32s": function() { return cpu.read_reg_e32s.apply(cpu, arguments); },
-        "_write_reg_e16": function() { return cpu.write_reg_e16.apply(cpu, arguments); },
-        "_write_reg_e32": function() { return cpu.write_reg_e32.apply(cpu, arguments); },
         "_popa16": function() { return cpu.popa16.apply(cpu, arguments); },
         "_popa32": function() { return cpu.popa32.apply(cpu, arguments); },
         "_arpl": function() { return cpu.arpl.apply(cpu, arguments); },
-        "_trigger_ud": function() { return cpu.trigger_ud.apply(cpu, arguments); },
-        "_trigger_nm": function() { return cpu.trigger_nm.apply(cpu, arguments); },
-        "_virt_boundary_read16": function() { return cpu.virt_boundary_read16.apply(cpu, arguments); },
-        "_virt_boundary_read32s": function() { return cpu.virt_boundary_read32s.apply(cpu, arguments); },
-        "_virt_boundary_write16": function() { return cpu.virt_boundary_write16.apply(cpu, arguments); },
-        "_virt_boundary_write32": function() { return cpu.virt_boundary_write32.apply(cpu, arguments); },
         "_getiopl": function() { return cpu.getiopl.apply(cpu, arguments); },
         "_vm86_mode": function() { return cpu.vm86_mode.apply(cpu, arguments); },
-        
+
         "_bswap": function() { return cpu.bswap.apply(cpu, arguments); },
 
         "_lar": function() { return cpu.lar.apply(cpu, arguments); },
@@ -200,12 +205,6 @@ function V86Starter(options)
         "_enter16": function() { return cpu.enter16.apply(cpu, arguments); },
         "_enter32": function() { return cpu.enter32.apply(cpu, arguments); },
         "_update_eflags": function() { return cpu.update_eflags.apply(cpu, arguments); },
-        "_handle_irqs": function() { return cpu.handle_irqs.apply(cpu, arguments); },
-        "_xchg8": function() { return cpu.xchg8.apply(cpu, arguments); },
-        "_xchg16": function() { return cpu.xchg16.apply(cpu, arguments); },
-        "_xchg16r": function() { return cpu.xchg16r.apply(cpu, arguments); },
-        "_xchg32": function() { return cpu.xchg32.apply(cpu, arguments); },
-        "_xchg32r": function() { return cpu.xchg32r.apply(cpu, arguments); },
         "_loop": function() { return cpu.loop.apply(cpu, arguments); },
         "_loope": function() { return cpu.loope.apply(cpu, arguments); },
         "_loopne": function() { return cpu.loopne.apply(cpu, arguments); },
@@ -214,6 +213,20 @@ function V86Starter(options)
         "_jcxz": function() { return cpu.jcxz.apply(cpu, arguments); },
         "_test_privileges_for_io": function() { return cpu.test_privileges_for_io.apply(cpu, arguments); },
 
+        "_convert_f64_to_i32": function(f) {
+            // implemented here due to emscripten bug
+            if(!(f <= 0x7FFFFFFF && f >= -0x80000000))
+            {
+                f = 0x80000000 | 0;
+            }
+
+            return f | 0;
+        },
+        "_get_time": () => Date.now(),
+        // XXX: Closure compiler hack; these functions are actually wasm exports
+        "_jit_empty_cache": () => {},
+        "_jit_dirty_cache": () => {},
+        "_after_jump": () => {},
     };
 
     let wasm_file = DEBUG ? "v86-debug.wasm" : "v86.wasm";
@@ -229,7 +242,7 @@ function V86Starter(options)
 
     v86util.load_wasm(wasm_file, { 'env': wasm_shared_funcs }, wm => {
         wm.instance.exports["__post_instantiate"]();
-        emulator = this.v86 = new v86(this.emulator_bus, wm);
+        emulator = this.v86 = new v86(this.emulator_bus, wm, new Codegen(wm));
         cpu = emulator.cpu;
         mem = wm.mem.buffer;
         mem8 = new Uint8Array(mem);
@@ -255,6 +268,7 @@ function V86Starter(options)
     };
 
     settings.load_devices = true;
+    settings.log_level = options["log_level"];
     settings.memory_size = options["memory_size"] || 64 * 1024 * 1024;
     settings.vga_memory_size = options["vga_memory_size"] || 8 * 1024 * 1024;
     settings.boot_order = options["boot_order"] || 0x213;

+ 91 - 0
src/codegen.js

@@ -0,0 +1,91 @@
+/** @constructor */
+function Codegen(wm)
+{
+    this.wm = wm;
+    this.wm.funcs['_gen_init']();
+}
+
+Codegen.prototype.reset = function()
+{
+    this.wm.funcs['_gen_reset']();
+}
+
+Codegen.OUTPUT_OFFSET = 2048 + 0x100000 * 6;
+Codegen.STR_INPUT_OFFSET = Codegen.OUTPUT_OFFSET + 1024 - 32;
+
+Codegen.prototype.str_input = function(str)
+{
+    if (str.length > 32) {
+        throw new Error('Max string length for crossing boundary is 32');
+    }
+    const view = new Uint8Array(this.wm.mem.buffer, Codegen.STR_INPUT_OFFSET, 32);
+    for (let i = 0; i < str.length; i++)
+    {
+        view[i] = str.charCodeAt(i);
+    }
+};
+
+Codegen.prototype.fn0 = function(fn)
+{
+    this.str_input(fn);
+    this.wm.funcs['_gen_fn0'](Codegen.STR_INPUT_OFFSET, fn.length);
+};
+
+Codegen.prototype.fn1 = function(fn, arg0)
+{
+    this.str_input(fn);
+    this.wm.funcs['_gen_fn1'](Codegen.STR_INPUT_OFFSET, fn.length, arg0);
+};
+
+Codegen.prototype.fn2 = function(fn, arg0, arg1)
+{
+    this.str_input(fn);
+    this.wm.funcs['_gen_fn2'](Codegen.STR_INPUT_OFFSET, fn.length, arg0, arg1);
+};
+
+Codegen.prototype.modrm_fn0 = function(fn, modrm_byte, arg)
+{
+    this.str_input(fn);
+    this.wm.funcs['_gen_modrm_fn0'](Codegen.STR_INPUT_OFFSET, fn.length, modrm_byte, arg);
+};
+
+Codegen.prototype.modrm_fn1 = function(fn, modrm_byte)
+{
+    this.str_input(fn);
+    this.wm.funcs['_gen_modrm_fn1'](Codegen.STR_INPUT_OFFSET, fn.length, modrm_byte);
+};
+
+Codegen.prototype.jit_resolve_modrm16 = function(modrm_byte)
+{
+    this.wm.funcs['_gen_resolve_modrm16'](modrm_byte);
+};
+
+Codegen.prototype.jit_resolve_modrm32 = function(modrm_byte)
+{
+    this.wm.funcs['_gen_resolve_modrm32'](modrm_byte);
+};
+
+Codegen.prototype.increment_instruction_pointer = function(n)
+{
+    this.wm.funcs['_gen_increment_instruction_pointer'](n);
+};
+
+Codegen.prototype.set_previous_eip = function()
+{
+    this.wm.funcs['_gen_set_previous_eip']();
+};
+
+Codegen.prototype.finish = function()
+{
+    return this.wm.funcs['_gen_finish']();
+};
+
+Codegen.prototype.get_module_code = function()
+{
+    const final_offset = this.wm.funcs['_gen_get_final_offset']();
+
+    // extract wasm module
+    const output_buffer_view = new Uint8Array(this.wm.mem.buffer, Codegen.OUTPUT_OFFSET, final_offset - Codegen.OUTPUT_OFFSET);
+    return output_buffer_view;
+};
+

+ 5 - 0
src/const.js

@@ -349,3 +349,8 @@ var PREFIX_66 = PREFIX_MASK_OPSIZE; // alias
 
 /** @const */
 var MXCSR_MASK = (0xFFFF & ~(1 << 6));
+
+/** @const */
+const P_IDLE = 0;
+/** @const */
+const P_DO_MANY_CYCLES = 2;

+ 150 - 18
src/cpu.js

@@ -11,9 +11,10 @@ var CPU_LOG_VERBOSE = false;
 
 
 /** @constructor */
-function CPU(bus, wm)
+function CPU(bus, wm, codegen)
 {
     this.wm = wm;
+    this.codegen = codegen;
     this.wasm_patch(wm);
 
     this.memory_size = new Uint32Array(wm.mem.buffer, 812, 1);
@@ -182,7 +183,7 @@ function CPU(bus, wm)
         vga: null,
     };
 
-    this.timestamp_counter = new Int32Array(wm.mem.buffer, 664, 1);
+    this.timestamp_counter = new Uint32Array(wm.mem.buffer, 664, 1);
 
     // registers
     this.reg32s = new Int32Array(wm.mem.buffer, 4, 8);
@@ -198,6 +199,9 @@ function CPU(bus, wm)
     this.reg_mmx8s = new Int8Array(this.reg_mmxs.buffer, 1064, 64);
     this.reg_mmx8 = new Uint8Array(this.reg_mmxs.buffer, 1064, 64);
 
+    this.cache_hit = new Uint32Array(wm.mem.buffer, 1280, 1);
+    this.cache_compile = new Uint32Array(wm.mem.buffer, 1284, 1);
+
     this.reg_xmm32s = new Int32Array(wm.mem.buffer, 828, 8 * 4);
 
     this.mxcsr = new Int32Array(wm.mem.buffer, 824, 1);
@@ -373,6 +377,88 @@ CPU.prototype.wasm_patch = function(wm)
     this.fxrstor = this.wm.funcs['_fxrstor'];
 };
 
+//*
+CPU.prototype.resolve_modrm16 = function(modrm_byte)
+{
+    dbg_log("resolve_modrm16, modrm_byte=" + h(modrm_byte));
+
+    let buf;
+    const RESULT_LOC = 1600;
+
+    try {
+        const gen = this.codegen;
+        gen.reset();
+        gen.jit_resolve_modrm16(modrm_byte);
+        gen.finish();
+        buf = gen.get_module_code();
+
+        //XXX: move the following logic to a separate function
+        const module = new WebAssembly.Module(buf);
+        const imports = { "e": {
+            "get_seg": v => this.get_seg(v),
+            "get_reg": v => v, //XXX: no get_reg on CPU :|
+            "get_seg_prefix_ds": v => this.get_seg_prefix_ds(v),
+            "get_seg_prefix_ss": v => this.get_seg_prefix_ss(v),
+            "get_seg_prefix": v => this.get_seg_prefix(v),
+            "m": this.wm.mem,
+        } };
+        const o = new WebAssembly.Instance(module, imports);
+        o.exports["f"]();
+        const view = new Int32Array(this.wm.mem.buffer, RESULT_LOC, 4);
+        return view[0];
+    }
+    catch(err)
+    {
+        if (typeof buf !== "undefined")
+        {
+            v86util.add_download_button(buf, "myjit.wasm");
+        }
+        throw err;
+    }
+};
+//*/
+
+//*
+CPU.prototype.resolve_modrm32 = function(modrm_byte)
+{
+    dbg_log("resolve_modrm32, modrm_byte=" + h(modrm_byte));
+
+    let buf;
+    const RESULT_LOC = 1600;
+
+    try {
+        const gen = this.codegen;
+        gen.reset();
+        gen.jit_resolve_modrm32(modrm_byte);
+        gen.finish();
+        buf = gen.get_module_code();
+
+        //XXX: move the following logic to a separate function
+        const module = new WebAssembly.Module(buf);
+        const imports = { "e": {
+            "get_seg": v => this.get_seg(v),
+            "get_reg": v => v, //XXX: no get_reg on CPU :|
+            "get_seg_prefix_ds": v => this.get_seg_prefix_ds(v),
+            "get_seg_prefix_ss": v => this.get_seg_prefix_ss(v),
+            "get_seg_prefix": v => this.get_seg_prefix(v),
+            "m": this.wm.mem,
+        } };
+        const o = new WebAssembly.Instance(module, imports);
+        o.exports["f"]();
+        const view = new Int32Array(this.wm.mem.buffer, RESULT_LOC, 4);
+        return view[0];
+    }
+    catch(err)
+    {
+        if (typeof buf !== "undefined")
+        {
+            v86util.add_download_button(buf, "myjit.wasm");
+        }
+        throw err;
+    }
+};
+//*/
+
 CPU.prototype.get_state = function()
 {
     var state = [];
@@ -701,9 +787,9 @@ CPU.prototype.create_memory = function(size)
 
     var buffer = this.wm.mem.buffer;
 
-    this.mem8 = new Uint8Array(buffer, 2048 + 0x100000 * 6, size);
-    this.mem16 = new Uint16Array(buffer, 2048 + 0x100000 * 6, size >> 1);
-    this.mem32s = new Int32Array(buffer, 2048 + 0x100000 * 6, size >> 2);
+    this.mem8 = new Uint8Array(buffer, 2048 + 0x100000 * 6 + 2048, size);
+    this.mem16 = new Uint16Array(buffer, 2048 + 0x100000 * 6 + 2048, size >> 1);
+    this.mem32s = new Int32Array(buffer, 2048 + 0x100000 * 6 + 2048, size >> 2);
 };
 
 CPU.prototype.init = function(settings, device_bus)
@@ -713,6 +799,12 @@ CPU.prototype.init = function(settings, device_bus)
 
     this.reset();
 
+    if(typeof settings.log_level === "number")
+    {
+        // XXX: Shared between all emulator instances
+        LOG_LEVEL = settings.log_level;
+    }
+
     var io = new IO(this);
     this.io = io;
 
@@ -857,6 +949,8 @@ CPU.prototype.init = function(settings, device_bus)
     {
         this.debug.init();
     }
+
+    this.wm.funcs["_profiler_init"]();
 };
 
 CPU.prototype.load_multiboot = function(buffer)
@@ -1161,6 +1255,9 @@ CPU.prototype.load_bios = function()
 
 CPU.prototype.do_run = function()
 {
+    // Idle time is when no instructions are being executed
+    this.wm.funcs["_profiler_end"](P_IDLE);
+
     /** @type {number} */
     var start = v86.microtick();
 
@@ -1183,10 +1280,15 @@ CPU.prototype.do_run = function()
 
         now = v86.microtick();
     }
+
+    this.wm.funcs["_profiler_start"](P_IDLE);
 };
 
 CPU.prototype.do_many_cycles = function()
 {
+    // Capture the total time we were executing instructions
+    this.wm.funcs["_profiler_start"](P_DO_MANY_CYCLES);
+
     try {
         this.do_many_cycles_unsafe();
     }
@@ -1194,6 +1296,8 @@ CPU.prototype.do_many_cycles = function()
     {
         this.exception_cleanup(e);
     }
+
+    this.wm.funcs["_profiler_end"](P_DO_MANY_CYCLES);
 };
 
 CPU.prototype.do_many_cycles_unsafe = function()
@@ -1419,12 +1523,15 @@ CPU.prototype.set_cr0 = function(cr0)
     }
 
     this.protected_mode[0] = +((this.cr[0] & CR0_PE) === CR0_PE);
+
+    this.wm.funcs._jit_empty_cache();
 };
 
 CPU.prototype.set_cr4 = function(cr4)
 {
     if(cr4 & (1 << 11 | 1 << 12 | 1 << 15 | 1 << 16 | 1 << 19 | 0xFFC00000))
     {
+        dbg_log("trigger_gp: Invalid cr4 bit", LOG_CPU);
         this.trigger_gp(0);
     }
 
@@ -1553,9 +1660,20 @@ CPU.prototype.read_disp16 = CPU.prototype.read_imm16;
 CPU.prototype.read_disp32s = CPU.prototype.read_imm32s;
 
 CPU.prototype.init2 = function () {};
-CPU.prototype.branch_taken = function () {};
-CPU.prototype.branch_not_taken = function () {};
-CPU.prototype.diverged = function () {};
+
+CPU.prototype.after_jump = function () {
+    // May be called through JS imports in the WASM module, such as loop or handle_irqs (through popf, sti)
+    this.wm.funcs._after_jump();
+};
+CPU.prototype.branch_taken = function () {
+    this.after_jump();
+};
+CPU.prototype.branch_not_taken = function () {
+    this.after_jump();
+};
+CPU.prototype.diverged = function () {
+    this.after_jump();
+};
 
 CPU.prototype.modrm_resolve = function(modrm_byte)
 {
@@ -3240,6 +3358,7 @@ CPU.prototype.hlt_op = function()
 {
     if(this.cpl[0])
     {
+        dbg_log("#gp hlt with cpl != 0", LOG_CPU);
         this.trigger_gp(0);
     }
 
@@ -3389,13 +3508,6 @@ CPU.prototype.unimplemented_sse = function()
     this.trigger_ud();
 };
 
-CPU.prototype.unimplemented_sse_wasm = function()
-{
-    this.instruction_pointer[0] -= 1;
-
-    this.table0F_32[this.read_op0F()](this);
-};
-
 CPU.prototype.get_seg_prefix_ds = function(offset)
 {
     offset = offset || 0;
@@ -3930,9 +4042,10 @@ CPU.prototype.cpuid = function()
     var edx = 0;
     var ebx = 0;
 
-    var winnt_fix = false;
+    const winnt_fix = false;
+    const level = this.reg32s[reg_eax];
 
-    switch(this.reg32s[reg_eax])
+    switch(level)
     {
         case 0:
             // maximum supported level
@@ -4029,7 +4142,14 @@ CPU.prototype.cpuid = function()
             dbg_log("cpuid: unimplemented eax: " + h(this.reg32[reg_eax]), LOG_CPU);
     }
 
-    dbg_log("cpuid: eax=" + h(this.reg32[reg_eax], 8) + " cl=" + h(this.reg8[reg_cl], 2), LOG_CPU);
+    if(level === 4)
+    {
+        dbg_log("cpuid: eax=" + h(this.reg32[reg_eax], 8) + " cl=" + h(this.reg8[reg_cl], 2), LOG_CPU);
+    }
+    else
+    {
+        dbg_log("cpuid: eax=" + h(this.reg32[reg_eax], 8), LOG_CPU);
+    }
 
     this.reg32s[reg_eax] = eax;
     this.reg32s[reg_ecx] = ecx;
@@ -4403,6 +4523,12 @@ CPU.prototype.lar = function(selector, original)
 {
     dbg_log("lar sel=" + h(selector, 4), LOG_CPU);
 
+    if(!this.protected_mode[0] || this.vm86_mode())
+    {
+        dbg_log("lar #ud");
+        this.trigger_ud();
+    }
+
     /** @const */
     var LAR_INVALID_TYPE = 1 << 0 | 1 << 6 | 1 << 7 | 1 << 8 | 1 << 0xA |
                            1 << 0xD | 1 << 0xE | 1 << 0xF;
@@ -4431,6 +4557,12 @@ CPU.prototype.lsl = function(selector, original)
 {
     dbg_log("lsl sel=" + h(selector, 4), LOG_CPU);
 
+    if(!this.protected_mode[0] || this.vm86_mode())
+    {
+        dbg_log("lsl #ud");
+        this.trigger_ud();
+    }
+
     /** @const */
     var LSL_INVALID_TYPE = 1 << 0 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7 | 1 << 8 |
                            1 << 0xA | 1 << 0xC | 1 << 0xD | 1 << 0xE | 1 << 0xF;

+ 2 - 0
src/debug.js

@@ -177,6 +177,8 @@ CPU.prototype.debug_init = function()
             return;
         }
 
+        _ip = _ip >>> 0;
+
         if(debug.trace_all && debug.all_ops)
         {
             debug.all_ops.push(_ip, op);

+ 3 - 3
src/elf.js

@@ -106,9 +106,9 @@ function read_elf(buffer)
     {
         for(let key in header)
         {
-            console.log(key + ": 0x" + header[key].toString(16));
+            dbg_log(key + ": 0x" + header[key].toString(16));
         }
-        console.log(header);
+        dbg_log(header);
     }
 
     console.assert(header.magic === ELF_MAGIC, "Bad magic");
@@ -137,7 +137,7 @@ function read_elf(buffer)
         SectionHeader,
         header.shnum);
 
-    if(DEBUG)
+    if(DEBUG && LOG_LEVEL)
     {
         console.log("%d program headers:", program_headers.length);
         for(let program of program_headers)

+ 1 - 1
src/externs.js

@@ -14,4 +14,4 @@ var exports = {};
 var define = {};
 var module = {};
 
-var WebAssembly = { Memory() {}, Table() {}, instantiate() {}, compile() {} };
+var WebAssembly = { Memory() {}, Table() {}, instantiate() {}, compile() {}, Instance() {}, Module() {} };

+ 2 - 2
src/ide.js

@@ -151,7 +151,7 @@ function IDEDevice(cpu, buffer, is_cd, nr, bus)
     {
         dbg_log("Read error: " + h(this.current_interface.error & 0xFF) +
                 " slave=" + (this.current_interface === this.slave), LOG_DISK);
-        return this.current_interface.error;
+        return this.current_interface.error & 0xFF;
     });
     cpu.io.register_read(this.ata_port | 2, this, function()
     {
@@ -177,7 +177,7 @@ function IDEDevice(cpu, buffer, is_cd, nr, bus)
     cpu.io.register_read(this.ata_port | 6, this, function()
     {
         dbg_log("Read 1F6", LOG_DISK);
-        return this.current_interface.drive_head;
+        return this.current_interface.drive_head & 0xFF;
     });
 
     cpu.io.register_write(this.ata_port | 0, this, function(data)

+ 34 - 9
src/log.js

@@ -87,25 +87,50 @@ var dbg_log = (function()
     return dbg_log_;
 })();
 
-function dbg_log_wasm(memory, offset, args)
+function dbg_log_wasm(memory, format_string_offset, stack)
 {
-    if(!(LOG_LEVEL & LOG_CPU))
+    if(!DEBUG || !(LOG_LEVEL & LOG_CPU))
     {
         return;
     }
 
-    let s = new Uint8Array(memory, offset);
-    let length = s.indexOf(0);
+    let s = new Uint8Array(memory, format_string_offset);
+
+    const length = s.indexOf(0);
     if(length !== -1)
     {
-        s = new Uint8Array(memory, offset, length);
+        s = new Uint8Array(memory, format_string_offset, length);
+    }
+
+    function pop_arg()
+    {
+        const arg = new Int32Array(memory, stack, 1)[0];
+        stack += 4;
+        return arg;
     }
 
-    let format_string = "[CPU ] " + String.fromCharCode.apply(String, s);
-    let format_args = [format_string];
-    format_args.push.apply(format_args, args);
+    let format_string = String.fromCharCode.apply(String, s);
+    format_string = format_string.replace(/%([xd%])/g, function(full_match, identifier)
+        {
+            if(identifier === "x")
+            {
+                return (pop_arg() >>> 0).toString(16);
+            }
+            else if(identifier === "d")
+            {
+                return pop_arg().toString(10);
+            }
+            else if(identifier === "%")
+            {
+                return "%";
+            }
+            else
+            {
+                console.assert(false);
+            }
+        });
 
-    console.log.apply(console, format_args);
+    dbg_log(format_string, LOG_CPU);
 }
 
 /**

+ 3 - 2
src/main.js

@@ -3,8 +3,9 @@
 /**
  * @constructor
  * @param {Object=} wm
+ * @param {Object=} codegen
  */
-function v86(bus, wm)
+function v86(bus, wm, codegen)
 {
     /** @type {boolean} */
     this.running = false;
@@ -13,7 +14,7 @@ function v86(bus, wm)
     this.stopped = false;
 
     /** @type {CPU} */
-    this.cpu = new CPU(bus, wm);
+    this.cpu = new CPU(bus, wm, codegen);
 
     this.bus = bus;
     bus.register("cpu-init", this.init, this);

+ 18 - 9
src/memory.js

@@ -70,14 +70,14 @@ CPU.prototype.mmap_read32 = function(addr)
     var aligned_addr = addr >>> MMAP_BLOCK_BITS;
 
     return this.memory_map_read32[aligned_addr](addr);
-}
+};
 
 CPU.prototype.mmap_write32 = function(addr, value)
 {
     var aligned_addr = addr >>> MMAP_BLOCK_BITS;
 
     this.memory_map_write32[aligned_addr](addr, value);
-}
+};
 
 CPU.prototype.in_mapped_range = function(addr)
 {
@@ -223,12 +223,14 @@ CPU.prototype.write16 = function(addr, value)
 CPU.prototype.write_aligned16 = function(addr, value)
 {
     dbg_assert(addr >= 0 && addr < 0x80000000);
-    this.debug_write(addr << 1, 2, value);
+
+    let phys_addr = addr << 1;
+    this.debug_write(phys_addr, 2, value);
     if(USE_A20 && !this.a20_enabled[0]) addr &= A20_MASK16;
 
-    if(this.in_mapped_range(addr << 1))
+    if(this.in_mapped_range(phys_addr))
     {
-        this.mmap_write16(addr << 1, value);
+        this.mmap_write16(phys_addr, value);
     }
     else
     {
@@ -261,12 +263,14 @@ CPU.prototype.write32 = function(addr, value)
 CPU.prototype.write_aligned32 = function(addr, value)
 {
     dbg_assert(addr >= 0 && addr < 0x40000000);
-    this.debug_write(addr << 2, 4, value);
+
+    let phys_addr = addr << 2;
+    this.debug_write(phys_addr, 4, value);
     if(USE_A20 && !this.a20_enabled[0]) addr &= A20_MASK32;
 
-    if(this.in_mapped_range(addr << 2))
+    if(this.in_mapped_range(phys_addr))
     {
-        this.mmap_write32(addr << 2, value);
+        this.mmap_write32(phys_addr, value);
     }
     else
     {
@@ -280,7 +284,9 @@ CPU.prototype.write_aligned32 = function(addr, value)
  */
 CPU.prototype.write_blob = function(blob, offset)
 {
-    this.debug_write(offset, blob.length, 0)
+    this.debug_write(offset, blob.length, 0);
+    this.wm.funcs._jit_dirty_cache(offset, offset + blob.length);
+
     dbg_assert(blob && blob.length >= 0);
 
     this.mem8.set(blob, offset);
@@ -293,6 +299,9 @@ CPU.prototype.write_blob = function(blob, offset)
 CPU.prototype.write_blob32 = function(blob, offset)
 {
     dbg_assert(blob && blob.length);
+    let phys_addr = offset << 2;
+    this.wm.funcs._jit_dirty_cache(phys_addr, phys_addr + blob.length);
+
     this.debug_write(offset, blob.length << 2, 0);
     this.mem32s.set(blob, offset);
 };

+ 5 - 1
src/native/all.c

@@ -1,6 +1,5 @@
 #include <stdint.h>
 #include <math.h>
-#include <assert.h>
 #include <stdbool.h>
 
 extern void call_interrupt_vector(int32_t interrupt_nr, bool is_software_int, bool has_error_code, int32_t error_code);
@@ -11,6 +10,7 @@ extern double_t math_pow(double_t, double_t);
 #include "global_pointers.h"
 #include "log.c"
 #include "cpu.c"
+#include "profiler.c"
 #include "memory.c"
 #include "modrm.c"
 #include "misc_instr.c"
@@ -19,3 +19,7 @@ extern double_t math_pow(double_t, double_t);
 #include "instructions.c"
 #include "instructions_0f.c"
 #include "string.c"
+
+extern void jit_resolve_modrm16(int32_t);
+extern void jit_resolve_modrm32(int32_t);
+

+ 33 - 60
src/native/arith.c

@@ -249,51 +249,21 @@ int32_t imul_reg16(int32_t operand1, int32_t operand2)
     return result;
 }
 
-void do_mul32(uint32_t a, uint32_t b)
-{
-    uint32_t a00 = a & 0xFFFF;
-    uint32_t a16 = a >> 16;
-    uint32_t b00 = b & 0xFFFF;
-    int32_t b16 = b >> 16;
-    uint32_t low_result = a00 * b00;
-    uint32_t mid = (low_result >> 16) + (a16 * b00);
-    uint32_t high_result = mid >> 16;
-    mid = (mid & 0xFFFF) + (a00 * b16);
-    mul32_result[0] = (mid << 16) | low_result & 0xFFFF;
-    mul32_result[1] = ((mid >> 16) + (a16 * b16)) + high_result;
-}
-
-void do_imul32(int32_t a, int32_t b)
-{
-    bool is_neg = false;
-    if(a < 0) {
-        is_neg = true;
-        a = -a;
-    }
-    if(b < 0) {
-        is_neg = !is_neg;
-        b = -b;
-    }
-    do_mul32(a, b);
-    if(is_neg) {
-        mul32_result[0] = -mul32_result[0];
-        mul32_result[1] = ~mul32_result[1] + !mul32_result[0];
-    }
-}
-
 void mul32(int32_t source_operand)
 {
     int32_t dest_operand = reg32s[EAX];
 
-    do_mul32(dest_operand, source_operand);
+    uint64_t result = (uint64_t)(uint32_t)dest_operand * (uint32_t)source_operand;
+    int32_t result_low = result;
+    int32_t result_high = result >> 32;
 
-    reg32s[EAX] = mul32_result[0];
-    reg32s[EDX] = mul32_result[1];
+    reg32s[EAX] = result_low;
+    reg32s[EDX] = result_high;
 
-    *last_result = mul32_result[0];
+    *last_result = result_low;
     *last_op_size = OPSIZE_32;
 
-    if(mul32_result[1] == 0)
+    if(result_high == 0)
     {
         *flags &= ~1 & ~FLAG_OVERFLOW;
     }
@@ -307,16 +277,17 @@ void mul32(int32_t source_operand)
 void imul32(int32_t source_operand)
 {
     int32_t dest_operand = reg32s[EAX];
+    int64_t result = (int64_t)dest_operand * (int64_t)source_operand;
+    int32_t result_low = result;
+    int32_t result_high = result >> 32;
 
-    do_imul32(dest_operand, source_operand);
+    reg32s[EAX] = result_low;
+    reg32s[EDX] = result_high;
 
-    reg32s[EAX] = mul32_result[0];
-    reg32s[EDX] = mul32_result[1];
-
-    *last_result = mul32_result[0];
+    *last_result = result_low;
     *last_op_size = OPSIZE_32;
 
-    if(mul32_result[1] == (mul32_result[0] >> 31))
+    if(result_high == (result_low >> 31))
     {
         *flags &= ~1 & ~FLAG_OVERFLOW;
     }
@@ -329,12 +300,14 @@ void imul32(int32_t source_operand)
 
 int32_t imul_reg32(int32_t operand1, int32_t operand2)
 {
-    do_imul32(operand1, operand2);
+    int64_t result = (int64_t)operand1 * (int64_t)operand2;
+    int32_t result_low = result;
+    int32_t result_high = result >> 32;
 
-    *last_result = mul32_result[0];
+    *last_result = result_low;
     *last_op_size = OPSIZE_32;
 
-    if(mul32_result[1] == (mul32_result[0] >> 31))
+    if(result_high == (result_low >> 31))
     {
         *flags &= ~1 & ~FLAG_OVERFLOW;
     }
@@ -344,7 +317,7 @@ int32_t imul_reg32(int32_t operand1, int32_t operand2)
     }
     *flags_changed = FLAGS_ALL & ~1 & ~FLAG_OVERFLOW;
 
-    return mul32_result[0];
+    return result_low;
 }
 
 int32_t xadd8(int32_t source_operand, int32_t reg)
@@ -1011,7 +984,7 @@ int32_t sar8(int32_t dest_operand, int32_t count)
 
     if(count < 8)
     {
-        *last_result = dest_operand << 24 >> count + 24;
+        *last_result = dest_operand << 24 >> (count + 24);
         // of is zero
         *flags = (*flags & ~1 & ~FLAG_OVERFLOW) | (dest_operand >> (count - 1) & 1);
     }
@@ -1036,7 +1009,7 @@ int32_t sar16(int32_t dest_operand, int32_t count)
 
     if(count < 16)
     {
-        *last_result = dest_operand << 16 >> count + 16;
+        *last_result = dest_operand << 16 >> (count + 16);
         *flags = (*flags & ~1 & ~FLAG_OVERFLOW) | (dest_operand >> (count - 1) & 1);
     }
     else
@@ -1193,7 +1166,7 @@ int32_t btr_reg(int32_t bit_base, int32_t bit_offset)
 
 void bt_mem(int32_t virt_addr, int32_t bit_offset)
 {
-    int32_t bit_base = safe_read8(virt_addr + (bit_offset >> 3) | 0);
+    int32_t bit_base = safe_read8(virt_addr + (bit_offset >> 3));
     bit_offset &= 7;
 
     *flags = (*flags & ~1) | (bit_base >> bit_offset & 1);
@@ -1202,7 +1175,7 @@ void bt_mem(int32_t virt_addr, int32_t bit_offset)
 
 void btc_mem(int32_t virt_addr, int32_t bit_offset)
 {
-    int32_t phys_addr = translate_address_write(virt_addr + (bit_offset >> 3) | 0);
+    int32_t phys_addr = translate_address_write(virt_addr + (bit_offset >> 3));
     int32_t bit_base = read8(phys_addr);
 
     bit_offset &= 7;
@@ -1215,7 +1188,7 @@ void btc_mem(int32_t virt_addr, int32_t bit_offset)
 
 void btr_mem(int32_t virt_addr, int32_t bit_offset)
 {
-    int32_t phys_addr = translate_address_write(virt_addr + (bit_offset >> 3) | 0);
+    int32_t phys_addr = translate_address_write(virt_addr + (bit_offset >> 3));
     int32_t bit_base = read8(phys_addr);
 
     bit_offset &= 7;
@@ -1228,7 +1201,7 @@ void btr_mem(int32_t virt_addr, int32_t bit_offset)
 
 void bts_mem(int32_t virt_addr, int32_t bit_offset)
 {
-    int32_t phys_addr = translate_address_write(virt_addr + (bit_offset >> 3) | 0);
+    int32_t phys_addr = translate_address_write(virt_addr + (bit_offset >> 3));
     int32_t bit_base = read8(phys_addr);
 
     bit_offset &= 7;
@@ -1277,7 +1250,7 @@ int32_t bsf32(int32_t old, int32_t bit_base)
     {
         *flags &= ~FLAG_ZERO;
 
-        return *last_result = int_log2(((uint32_t) (-bit_base & bit_base)) >> 0);
+        return *last_result = int_log2(((uint32_t) (-bit_base & bit_base)));
     }
 }
 
@@ -1316,7 +1289,7 @@ int32_t bsr32(int32_t old, int32_t bit_base)
     else
     {
         *flags &= ~FLAG_ZERO;
-        return *last_result = int_log2(((uint32_t) bit_base) >> 0);
+        return *last_result = int_log2(((uint32_t) bit_base));
     }
 }
 
@@ -1343,7 +1316,7 @@ uint32_t saturate_sw_to_ub(uint32_t v)
 {
     dbg_assert((v & 0xFFFF0000) == 0);
 
-    uint32_t ret = v >> 0;
+    uint32_t ret = v;
     if (ret >= 0x8000) {
         ret = 0;
     }
@@ -1377,7 +1350,7 @@ int32_t saturate_sw_to_sb(int32_t v)
 
 uint32_t saturate_sd_to_sw(uint32_t v)
 {
-    uint32_t ret = v >> 0;
+    uint32_t ret = v;
 
     if (ret > 0xFFFF8000) {
         ret = ret & 0xFFFF;
@@ -1395,7 +1368,7 @@ uint32_t saturate_sd_to_sw(uint32_t v)
 
 uint32_t saturate_sd_to_sb(uint32_t v)
 {
-    uint32_t ret = v >> 0;
+    uint32_t ret = v;
 
     if (ret > 0xFFFFFF80) {
         ret = ret & 0xFF;
@@ -1413,7 +1386,7 @@ uint32_t saturate_sd_to_sb(uint32_t v)
 
 int32_t saturate_sd_to_ub(int32_t v)
 {
-    int32_t ret = v | 0;
+    int32_t ret = v;
 
     if (ret < 0) {
         ret = 0;
@@ -1425,7 +1398,7 @@ int32_t saturate_sd_to_ub(int32_t v)
 
 uint32_t saturate_ud_to_ub(uint32_t v)
 {
-    uint32_t ret = v >> 0;
+    uint32_t ret = v;
 
     if (ret > 0xFF) {
         ret = 0xFF;

+ 332 - 0
src/native/codegen/api.c

@@ -0,0 +1,332 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "cstring.h"
+#include "../const.h"
+#include "wasm_opcodes.h"
+#include "util.h"
+#include "codegen.h"
+
+// location in memory where we store the result of the computation for testing
+#define RESULT_LOC 1600
+
+extern bool is_asize_32(void);
+extern int32_t read_imm8();
+extern int32_t read_imm8s();
+extern int32_t read_imm16();
+extern int32_t read_imm32s();
+
+extern int32_t* const instruction_pointer;
+extern int32_t* const previous_ip;
+extern uint8_t* const reg8;
+extern uint16_t* const reg16;
+extern int8_t* const reg8s;
+extern int16_t* const reg16s;
+extern int32_t* const reg32s;
+
+static void jit_resolve_modrm32_(int32_t);
+static void jit_resolve_modrm16_(int32_t);
+
+void gen_increment_instruction_pointer(int32_t n)
+{
+    push_i32((int32_t)instruction_pointer); // store address of ip
+
+    load_i32((int32_t)instruction_pointer); // load ip
+    push_i32(n); // load value to add to it
+    add_i32();
+
+    store_i32(); // store it back in
+}
+
+void gen_set_previous_eip()
+{
+    push_i32((int32_t)previous_ip); // store address of previous ip
+    load_i32((int32_t)instruction_pointer); // load ip
+    store_i32(); // store it as previous ip
+}
+
+void gen_fn0(char* fn, uint8_t fn_len)
+{
+    int32_t fn_idx = get_fn_index(fn, fn_len, FN0_TYPE_INDEX);
+    call_fn(fn_idx);
+}
+
+void gen_fn1(char* fn, uint8_t fn_len, int32_t arg0)
+{
+    int32_t fn_idx = get_fn_index(fn, fn_len, FN1_TYPE_INDEX);
+    push_i32(arg0);
+    call_fn(fn_idx);
+}
+
+void gen_fn2(char* fn, uint8_t fn_len, int32_t arg0, int32_t arg1)
+{
+    int32_t fn_idx = get_fn_index(fn, fn_len, FN2_TYPE_INDEX);
+    push_i32(arg0);
+    push_i32(arg1);
+    call_fn(fn_idx);
+}
+
+#define MODRM_ENTRY(n, work)\
+    case (n) | 0 << 3:\
+    case (n) | 1 << 3:\
+    case (n) | 2 << 3:\
+    case (n) | 3 << 3:\
+    case (n) | 4 << 3:\
+    case (n) | 5 << 3:\
+    case (n) | 6 << 3:\
+    case (n) | 7 << 3:\
+        work; break;
+
+#define MODRM_ENTRY16_0(row, seg, reg1, reg2)\
+    MODRM_ENTRY(0x00 | row, gen_modrm_entry_0(seg, reg1, reg2, 0))\
+    MODRM_ENTRY(0x40 | row, gen_modrm_entry_0(seg, reg1, reg2, read_imm8s()))\
+    MODRM_ENTRY(0x80 | row, gen_modrm_entry_0(seg, reg1, reg2, read_imm16()))\
+
+#define MODRM_ENTRY16_1(row, seg, reg)\
+    MODRM_ENTRY(0x00 | row, gen_modrm_entry_1(seg, reg, 0))\
+    MODRM_ENTRY(0x40 | row, gen_modrm_entry_1(seg, reg, read_imm8s()))\
+    MODRM_ENTRY(0x80 | row, gen_modrm_entry_1(seg, reg, read_imm16()))\
+
+static void inline gen_modrm_entry_0(int32_t fn_idx, int32_t reg16_idx_1, int32_t reg16_idx_2, int32_t imm)
+{
+    // generates: fn( ( reg1 + reg2 + imm ) & 0xFFFF )
+    load_u16(reg16_idx_1);
+    load_u16(reg16_idx_2);
+    add_i32();
+
+    push_i32(imm);
+    add_i32();
+
+    push_i32(0xFFFF);
+    and_i32();
+
+    call_fn(fn_idx);
+}
+
+static void gen_modrm_entry_1(int32_t fn_idx, int32_t reg16_idx, int32_t imm)
+{
+    // generates: fn ( ( reg + imm ) & 0xFFFF )
+    load_u16(reg16_idx);
+    push_i32(imm);
+    add_i32();
+
+    push_i32(0xFFFF);
+    and_i32();
+
+    call_fn(fn_idx);
+}
+
+static void jit_resolve_modrm16_(int32_t modrm_byte)
+{
+    int32_t const ds = fn_get_seg_prefix_ds_idx;
+    int32_t const ss = fn_get_seg_prefix_ss_idx;
+
+    switch(modrm_byte)
+    {
+        // The following casts cause some weird issue with emscripten and cause
+        // a performance hit. XXX: look into this later.
+        MODRM_ENTRY16_0(0, ds, (int32_t)(reg16 + BX), (int32_t)(reg16 + SI))
+        MODRM_ENTRY16_0(1, ds, (int32_t)(reg16 + BX), (int32_t)(reg16 + DI))
+        MODRM_ENTRY16_0(2, ss, (int32_t)(reg16 + BP), (int32_t)(reg16 + SI))
+        MODRM_ENTRY16_0(3, ss, (int32_t)(reg16 + BP), (int32_t)(reg16 + DI))
+        MODRM_ENTRY16_1(4, ds, (int32_t)(reg16 + SI))
+        MODRM_ENTRY16_1(5, ds, (int32_t)(reg16 + DI))
+
+        // special case
+        MODRM_ENTRY(0x00 | 6, call_fn_with_arg(ds, read_imm16()))
+        MODRM_ENTRY(0x40 | 6, gen_modrm_entry_1(ss, (int32_t)(reg16 + BP), read_imm8s()))
+        MODRM_ENTRY(0x80 | 6, gen_modrm_entry_1(ss, (int32_t)(reg16 + BP), read_imm16()))
+
+        MODRM_ENTRY16_1(7, ds, (int32_t)(reg16 + BX))
+
+        default:
+            assert(false);
+    }
+}
+
+void gen_resolve_modrm16(int32_t modrm_byte)
+{
+    push_u32(RESULT_LOC);
+    jit_resolve_modrm16_(modrm_byte);
+    store_i32();
+}
+
+#define MODRM_ENTRY32_0(row, seg, reg)\
+    MODRM_ENTRY(0x00 | row, gen_modrm32_entry(seg, reg, 0))\
+    MODRM_ENTRY(0x40 | row, gen_modrm32_entry(seg, reg, read_imm8s()))\
+    MODRM_ENTRY(0x80 | row, gen_modrm32_entry(seg, reg, read_imm32s()))\
+
+static void gen_modrm32_entry(int32_t fn_idx, int32_t reg32s_idx, int32_t imm)
+{
+    // generates: fn ( reg + imm )
+    load_i32(reg32s_idx);
+    push_i32(imm);
+    add_i32();
+
+    call_fn(fn_idx);
+}
+
+static void jit_resolve_sib(bool mod)
+{
+    uint8_t sib_byte = read_imm8();
+    uint8_t r = sib_byte & 7;
+    uint8_t m = sib_byte >> 3 & 7;
+
+    int32_t base_addr;
+    int32_t base;
+    uint8_t seg;
+    bool base_is_mem_access = true;
+
+    if(r == 4)
+    {
+        base_addr = (int32_t)(reg32s + ESP);
+        seg = SS;
+    }
+    else if(r == 5)
+    {
+        if(mod)
+        {
+            base_addr = (int32_t)(reg32s + EBP);
+            seg = SS;
+        }
+        else
+        {
+            base = read_imm32s();
+            seg = DS;
+            base_is_mem_access = false;
+        }
+    }
+    else
+    {
+        base_addr = (int32_t)(reg32s + r);
+        seg = DS;
+    }
+
+    // generate: get_seg_prefix(seg) + base
+    // Where base is accessed from memory if base_is_mem_access or written as a constant otherwise
+
+    // We don't use push_i32 here since we know seg will fit in 1 byte anyways so no need to loop
+    cs_write_u8(OP_I32CONST);
+    cs_write_u8(seg);
+
+    call_fn(fn_get_seg_prefix_idx);
+
+    if(base_is_mem_access)
+    {
+        load_i32(base_addr);
+    }
+    else
+    {
+        push_i32(base);
+    }
+
+    add_i32();
+
+    // We now have to generate an offset value to add
+
+    if(m == 4)
+    {
+        // offset is 0, we don't need to add anything
+        return;
+    }
+
+    // Offset is reg32s[m] << s, where s is:
+
+    uint8_t s = sib_byte >> 6 & 3;
+
+    load_i32((int32_t)(reg32s + m));
+    // We don't use push_u32 here either since s will fit in 1 byte
+    cs_write_u8(OP_I32CONST);
+    cs_write_u8(s);
+    shl_i32();
+
+    add_i32();
+}
+
+static void modrm32_special_case_1()
+{
+    jit_resolve_sib(true);
+    push_i32(read_imm8s());
+    add_i32();
+}
+
+static void modrm32_special_case_2()
+{
+    jit_resolve_sib(true);
+    push_i32(read_imm32s());
+    add_i32();
+}
+
+static void jit_resolve_modrm32_(int32_t modrm_byte)
+{
+    int32_t const ds = fn_get_seg_prefix_ds_idx;
+    int32_t const ss = fn_get_seg_prefix_ss_idx;
+
+    switch(modrm_byte)
+    {
+        MODRM_ENTRY32_0(0, ds, (int32_t)(reg32s + EAX))
+        MODRM_ENTRY32_0(1, ds, (int32_t)(reg32s + ECX))
+        MODRM_ENTRY32_0(2, ds, (int32_t)(reg32s + EDX))
+        MODRM_ENTRY32_0(3, ds, (int32_t)(reg32s + EBX))
+
+        // special cases
+        MODRM_ENTRY(0x00 | 4, jit_resolve_sib(false))
+        MODRM_ENTRY(0x40 | 4, modrm32_special_case_1())
+        MODRM_ENTRY(0x80 | 4, modrm32_special_case_2())
+        MODRM_ENTRY(0x00 | 5, call_fn_with_arg(ds, read_imm32s()))
+        MODRM_ENTRY(0x40 | 5, gen_modrm32_entry(ss, (int32_t)(reg32s + EBP), read_imm8s()))
+        MODRM_ENTRY(0x80 | 5, gen_modrm32_entry(ss, (int32_t)(reg32s + EBP), read_imm32s()))
+
+        MODRM_ENTRY32_0(6, ds, (int32_t)(reg32s + ESI))
+        MODRM_ENTRY32_0(7, ds, (int32_t)(reg32s + EDI))
+
+        default:
+            assert(false);
+    }
+}
+
+void gen_resolve_modrm32(int32_t modrm_byte)
+{
+    push_i32(RESULT_LOC);
+    jit_resolve_modrm32_(modrm_byte);
+    store_i32();
+}
+
+#undef MODRM_ENTRY
+
+void gen_modrm_fn1(char* fn, uint8_t fn_len, int32_t modrm_byte, int32_t arg0)
+{
+    // generates: fn( modrm_resolve( modrm_byte ), arg0 )
+    if(is_asize_32())
+    {
+        jit_resolve_modrm32_(modrm_byte);
+    }
+    else
+    {
+        jit_resolve_modrm16_(modrm_byte);
+    }
+
+    push_i32(arg0);
+
+    int32_t fn_idx = get_fn_index(fn, fn_len, FN2_RET_TYPE_INDEX);
+    call_fn(fn_idx);
+}
+
+void gen_modrm_fn0(char* fn, uint8_t fn_len, int32_t modrm_byte)
+{
+    // generates: fn( modrm_resolve( modrm_byte ) )
+    if(is_asize_32())
+    {
+        jit_resolve_modrm32_(modrm_byte);
+    }
+    else
+    {
+        jit_resolve_modrm16_(modrm_byte);
+    }
+
+    int32_t fn_idx = get_fn_index(fn, fn_len, FN1_RET_TYPE_INDEX);
+    call_fn(fn_idx);
+}
+

+ 331 - 0
src/native/codegen/codegen.h

@@ -0,0 +1,331 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "cstring.h"
+#include "../const.h"
+#include "wasm_opcodes.h"
+#include "util.h"
+
+// taken from chrome's buffer size limit on synchronous compilation
+#define TOTAL_SIZE_LIMIT (4 * 1024)
+
+// Memory layout
+// - First 1024 bytes (except for the nullptr) are for storing all sections except for the code
+// section
+// - Followed by that, upto TOTAL_SIZE_LIMIT bytes could be used for storing the code section
+
+static uint8_t* const output = (uint8_t* const) 2048 + 0x100000 * 6;
+
+// pointer to next free byte slot in output buffer, incremented as we write along in the buffer
+static uint8_t* op_ptr = output;
+
+static uint8_t* const code_section = output + 1024;
+static uint8_t* cs_ptr = code_section;
+
+// JS can keep strings at this location for passing them to wasm
+//XXX: figure out a better location for this
+static uint8_t* const str_input = code_section - 32;
+
+static void inline write_u8(uint8_t x)
+{
+    *op_ptr++ = x;
+}
+
+static void inline cs_write_u8(uint8_t x)
+{
+    *cs_ptr++ = x;
+}
+
+static void inline write_i32(int32_t x)
+{
+    op_ptr = _write_leb_i32(op_ptr, x);
+}
+
+static void inline cs_write_i32(int32_t x)
+{
+    cs_ptr = _write_leb_i32(cs_ptr, x);
+}
+
+static void inline write_u32(uint32_t x)
+{
+    op_ptr = _write_leb_u32(op_ptr, x);
+}
+
+static void inline cs_write_u32(uint32_t x)
+{
+    cs_ptr = _write_leb_u32(cs_ptr, x);
+}
+
+#define FN0_TYPE_INDEX 0
+#define FN1_TYPE_INDEX 1
+#define FN2_TYPE_INDEX 2
+#define FN0_RET_TYPE_INDEX 3
+#define FN1_RET_TYPE_INDEX 4
+#define FN2_RET_TYPE_INDEX 5
+
+static void write_type_section()
+{
+    write_u8(SC_TYPE);
+
+    uint8_t* ptr_section_size = op_ptr;
+    write_u8(0);
+
+    write_u8(6); // number of type descriptors
+
+    // FN0
+    write_u8(TYPE_FUNC);
+    write_u8(0); // no args
+    write_u8(0); // no return val
+
+    // FN1
+    write_u8(TYPE_FUNC);
+    write_u8(1);
+    write_u8(TYPE_I32);
+    write_u8(0);
+
+    // FN2
+    write_u8(TYPE_FUNC);
+    write_u8(2);
+    write_u8(TYPE_I32);
+    write_u8(TYPE_I32);
+    write_u8(0);
+
+    // FN0_RET
+    write_u8(TYPE_FUNC);
+    write_u8(0);
+    write_u8(1);
+    write_u8(TYPE_I32);
+
+    // FN1_RET
+    write_u8(TYPE_FUNC);
+    write_u8(1);
+    write_u8(TYPE_I32);
+    write_u8(1);
+    write_u8(TYPE_I32);
+
+    // FN2_RET
+    write_u8(TYPE_FUNC);
+    write_u8(2);
+    write_u8(TYPE_I32);
+    write_u8(TYPE_I32);
+    write_u8(1);
+    write_u8(TYPE_I32);
+
+    *ptr_section_size = (op_ptr - 1) - ptr_section_size;
+}
+
+// Import table
+
+static uint8_t* ptr_import_count = (uint8_t*) 0;
+static uint8_t* ptr_import_entries = (uint8_t*) 0;
+static uint8_t* ptr_import_table_size = (uint8_t*) 0;
+
+// The import table size is written in leb encoding, which we can't read by simple dereferencing so
+// we store the actual value separately. This is needed since we reserve two bytes for the import
+// table size as it can exceed 127
+// Default value is one as the section starts with containing one byte for the import count
+static uint32_t import_table_size = 1;
+
+// Goes over the import block to find index of an import entry by function name
+// Returns -1 if not found
+static int32_t get_import_index(char* fn, uint8_t fn_len)
+{
+    uint8_t* offset = ptr_import_entries;
+    for(int32_t i = 0; i < *ptr_import_count; i++)
+    {
+        offset += 1; // skip length of module name
+        offset += 1; // skip module name itself
+        uint8_t len = *offset++;
+        char* name = (char*) offset;
+        if (len == fn_len && strncmp(name, fn, fn_len) == 0)
+        {
+            return i;
+        }
+        offset += len; // skip the string
+        offset += 1; // skip import kind
+        offset += 1; // skip type index
+    }
+    return -1;
+}
+
+static void set_import_table_size(uint16_t size)
+{
+    import_table_size = size;
+    write_fixed_leb16_to_ptr(ptr_import_table_size, size);
+}
+
+static void write_import_section_preamble()
+{
+    write_u8(SC_IMPORT);
+
+    ptr_import_table_size = op_ptr; // store current pointer location to write into later on
+    write_u8(1 | 0b10000000); write_u8(0); // 1 in 2 byte leb
+
+    // same as above but for count of entries
+    ptr_import_count = op_ptr;
+    write_u8(0);
+
+    // here after starts the actual list of imports
+    ptr_import_entries = op_ptr;
+}
+
+static void write_memory_import()
+{
+    write_u8(1);
+    write_u8('e');
+    write_u8(1);
+    write_u8('m');
+
+    write_u8(EXT_MEMORY);
+
+    write_u8(0); // memory flag, 0 for no maximum memory limit present
+    write_u32(256); // initial memory length of 256 pages, takes 2 bytes in leb128
+
+    *ptr_import_count += 1;
+    set_import_table_size(import_table_size + 1 + 1 + 1 + 1 + 1 + 1 + 2);
+}
+
+static uint8_t write_import_entry(char* fn_name, uint8_t fn_name_len, uint8_t type_index)
+{
+    write_u8(1); // length of module name
+    write_u8('e'); // module name
+    write_u8(fn_name_len);
+    for (uint8_t i = 0; i < fn_name_len; i++)
+    {
+        write_u8(fn_name[i]);
+    }
+    write_u8(EXT_FUNCTION);
+    write_u8(type_index);
+    *ptr_import_count += 1;
+
+    set_import_table_size(import_table_size + 1 + 1 + 1 + fn_name_len + 1 + 1);
+
+    return *ptr_import_count - 1;
+}
+
+static void write_function_section()
+{
+    write_u8(SC_FUNCTION);
+    write_u8(2); // length of this section
+    write_u8(1); // count of signature indices
+    write_u8(FN0_TYPE_INDEX); // we export one function which is nullary
+}
+
+static void write_export_section()
+{
+    write_u8(SC_EXPORT);
+    write_u8(1 + 1 + 1 + 1 + 1); // size of this section
+    write_u8(1); // count of table: just one function exported
+
+    write_u8(1); // length of exported function name
+    write_u8('f'); // function name
+    write_u8(EXT_FUNCTION);
+
+    // index of the exported function
+    // function space starts with imports. index of last import is import count - 1
+    // the last import however is a memory, so we subtract one from that
+    write_u8(*ptr_import_count - 1);
+}
+
+static int32_t get_fn_index(char* fn, uint8_t fn_len, uint8_t type_index)
+{
+    int32_t fn_idx = get_import_index(fn, fn_len);
+    if (fn_idx == -1)
+    {
+        return write_import_entry(fn, fn_len, type_index);
+    }
+    return fn_idx;
+}
+
+static uint8_t const fn_get_seg_prefix_ds_idx = 0;
+static uint8_t const fn_get_seg_prefix_ss_idx = 1;
+static uint8_t const fn_get_seg_prefix_idx = 2;
+
+static uint8_t* op_ptr_reset_location;
+static uint32_t import_table_size_reset_value;
+static uint32_t initial_import_count;
+
+void gen_init()
+{
+    // wasm magic header
+    write_u8(0); write_u8('a'); write_u8('s'); write_u8('m');
+
+    // wasm version in leb128, 4 bytes
+    write_u8(WASM_VERSION); write_u8(0); write_u8(0); write_u8(0);
+
+    write_type_section();
+    write_import_section_preamble();
+
+    // add initial imports
+    uint8_t _fn_get_seg_prefix_ds_idx = write_import_entry("get_seg_prefix_ds", 17, FN1_RET_TYPE_INDEX);
+    assert(_fn_get_seg_prefix_ds_idx == fn_get_seg_prefix_ds_idx);
+    uint8_t _fn_get_seg_prefix_ss_idx = write_import_entry("get_seg_prefix_ss", 17, FN1_RET_TYPE_INDEX);
+    assert(_fn_get_seg_prefix_ss_idx == fn_get_seg_prefix_ss_idx);
+    uint8_t _fn_get_seg_prefix_idx = write_import_entry("get_seg_prefix", 14, FN1_RET_TYPE_INDEX);
+    assert(_fn_get_seg_prefix_idx == fn_get_seg_prefix_idx);
+
+    // store state of current pointers etc. so we can reset them later
+    op_ptr_reset_location = op_ptr;
+    initial_import_count = *ptr_import_count;
+    import_table_size_reset_value = import_table_size;
+}
+
+void gen_reset()
+{
+    op_ptr = op_ptr_reset_location;
+    cs_ptr = code_section;
+    *ptr_import_count = initial_import_count;
+    import_table_size = import_table_size_reset_value;
+}
+
+static void copy_code_section()
+{
+    uint8_t* offset = code_section;
+    while (offset < cs_ptr)
+    {
+        write_u8(*offset++);
+    }
+}
+
+uintptr_t gen_finish()
+{
+    write_memory_import();
+    write_function_section();
+    write_export_section();
+
+    uint8_t* ptr_code_section_size = (uint8_t*) 0; // initialized below
+    uint8_t* ptr_fn_body_size = (uint8_t*) 0; // this as well
+
+    // write code section preamble
+    write_u8(SC_CODE);
+    ptr_code_section_size = op_ptr; // we will write to this location later
+    write_u8(0); write_u8(0); // write temp val for now using 2 bytes
+
+    write_u8(1); // number of function bodies: just 1
+
+    // same as above but for body size of the function
+    ptr_fn_body_size = op_ptr;
+    write_u8(0); write_u8(0);
+
+    write_u8(0); // count of locals, none
+
+    copy_code_section();
+
+    // write code section epilogue
+    write_u8(OP_END);
+
+    // write the actual sizes to the pointer locations stored above. We subtract 1 from the actual
+    // value because the ptr itself points to two bytes
+    write_fixed_leb16_to_ptr(ptr_fn_body_size, ((op_ptr - 1) - ptr_fn_body_size) - 1);
+    write_fixed_leb16_to_ptr(ptr_code_section_size, ((op_ptr - 1) - ptr_code_section_size) - 1);
+
+    return (uintptr_t) op_ptr;
+}
+
+uintptr_t gen_get_final_offset()
+{
+    return (uintptr_t) op_ptr;
+}
+

+ 132 - 0
src/native/codegen/cstring.h

@@ -0,0 +1,132 @@
+// everything here is copied from musl
+
+#ifndef _CSTRING_H
+#define _CSTRING_H
+
+#include <stdint.h>
+#include <limits.h>
+
+// from strncmp.c
+
+static int strncmp(const char *_l, const char *_r, size_t n)
+{
+    const unsigned char *l=(void *)_l, *r=(void *)_r;
+    if (!n--) return 0;
+    for (; *l && *r && n && *l == *r ; l++, r++, n--);
+    return *l - *r;
+}
+
+// from memset.c
+
+static void *memset(void *dest, int c, size_t n)
+{
+    unsigned char *s = dest;
+    size_t k;
+
+    /* Fill head and tail with minimal branching. Each
+     * conditional ensures that all the subsequently used
+     * offsets are well-defined and in the dest region. */
+
+    if (!n) return dest;
+    s[0] = s[n-1] = c;
+    if (n <= 2) return dest;
+    s[1] = s[n-2] = c;
+    s[2] = s[n-3] = c;
+    if (n <= 6) return dest;
+    s[3] = s[n-4] = c;
+    if (n <= 8) return dest;
+
+    /* Advance pointer to align it at a 4-byte boundary,
+     * and truncate n to a multiple of 4. The previous code
+     * already took care of any head/tail that get cut off
+     * by the alignment. */
+
+    k = -(uintptr_t)s & 3;
+    s += k;
+    n -= k;
+    n &= -4;
+
+#ifdef __GNUC__
+    typedef uint32_t __attribute__((__may_alias__)) u32;
+    typedef uint64_t __attribute__((__may_alias__)) u64;
+
+    u32 c32 = ((u32)-1)/255 * (unsigned char)c;
+
+    /* In preparation to copy 32 bytes at a time, aligned on
+     * an 8-byte bounary, fill head/tail up to 28 bytes each.
+     * As in the initial byte-based head/tail fill, each
+     * conditional below ensures that the subsequent offsets
+     * are valid (e.g. !(n<=24) implies n>=28). */
+
+    *(u32 *)(s+0) = c32;
+    *(u32 *)(s+n-4) = c32;
+    if (n <= 8) return dest;
+    *(u32 *)(s+4) = c32;
+    *(u32 *)(s+8) = c32;
+    *(u32 *)(s+n-12) = c32;
+    *(u32 *)(s+n-8) = c32;
+    if (n <= 24) return dest;
+    *(u32 *)(s+12) = c32;
+    *(u32 *)(s+16) = c32;
+    *(u32 *)(s+20) = c32;
+    *(u32 *)(s+24) = c32;
+    *(u32 *)(s+n-28) = c32;
+    *(u32 *)(s+n-24) = c32;
+    *(u32 *)(s+n-20) = c32;
+    *(u32 *)(s+n-16) = c32;
+
+    /* Align to a multiple of 8 so we can fill 64 bits at a time,
+     * and avoid writing the same bytes twice as much as is
+     * practical without introducing additional branching. */
+
+    k = 24 + ((uintptr_t)s & 4);
+    s += k;
+    n -= k;
+
+    /* If this loop is reached, 28 tail bytes have already been
+     * filled, so any remainder when n drops below 32 can be
+     * safely ignored. */
+
+    u64 c64 = c32 | ((u64)c32 << 32);
+    for (; n >= 32; n-=32, s+=32) {
+        *(u64 *)(s+0) = c64;
+        *(u64 *)(s+8) = c64;
+        *(u64 *)(s+16) = c64;
+        *(u64 *)(s+24) = c64;
+    }
+#else
+    /* Pure C fallback with no aliasing violations. */
+    for (; n; n--, s++) *s = c;
+#endif
+
+    return dest;
+}
+
+// from stpncpy.c, function renamed from __stpncpy to strncpy
+// strncpy was assigned simply as an alias to that function
+
+#define ALIGN (sizeof(size_t)-1)
+#define ONES ((size_t)-1/UCHAR_MAX)
+#define HIGHS (ONES * (UCHAR_MAX/2+1))
+#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+
+static char *strncpy(char *restrict d, const char *restrict s, size_t n)
+{
+    size_t *wd;
+    const size_t *ws;
+
+    if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
+        for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++);
+        if (!n || !*s) goto tail;
+        wd=(void *)d; ws=(const void *)s;
+        for (; n>=sizeof(size_t) && !HASZERO(*ws);
+               n-=sizeof(size_t), ws++, wd++) *wd = *ws;
+        d=(void *)wd; s=(const void *)ws;
+    }
+    for (; n && (*d=*s); n--, s++, d++);
+tail:
+    memset(d, 0, n);
+    return d;
+}
+
+#endif

+ 135 - 0
src/native/codegen/util.h

@@ -0,0 +1,135 @@
+#ifndef _WASM_UTIL_H
+#define _WASM_UTIL_H
+
+#include<stdint.h>
+
+static uint8_t* _write_leb_u32(uint8_t* ptr, uint32_t v)
+{
+    uint8_t byte;
+    do {
+        byte = v & 0b1111111; // get last 7 bits
+        v >>= 7; // shift them away from the value
+        if (v != 0)
+        {
+            byte |= 0b10000000; // turn on MSB
+        }
+        *ptr++ = byte;
+    } while (v != 0);
+    return ptr;
+}
+
+static uint8_t* _write_leb_i32(uint8_t* ptr, int32_t v)
+{
+    // Super complex stuff. See the following:
+    // https://en.wikipedia.org/wiki/LEB128#Encode_signed_integer
+    // http://llvm.org/doxygen/LEB128_8h_source.html#l00048
+
+    bool more = true;
+    bool negative = v < 0;
+    uint32_t size = 32;
+    uint8_t byte;
+    while (more)
+    {
+        byte = v & 0b1111111; // get last 7 bits
+        v >>= 7; // shift them away from the value
+        if (negative)
+        {
+            v |= (~0 << (size - 7)); // extend sign
+        }
+        uint8_t sign_bit = byte & (1 << 6);
+        if ((v == 0 && sign_bit == 0) || (v == -1 && sign_bit != 0))
+        {
+            more = false;
+        }
+        else
+        {
+            byte |= 0b10000000; // turn on MSB
+        }
+        *ptr++ = byte;
+    }
+    return ptr;
+}
+
+static void inline write_fixed_leb16_to_ptr(uint8_t* ptr, uint16_t x)
+{
+    if (x < 128)
+    {
+        *ptr = x | 0b10000000;
+        *(ptr + 1) = 0;
+    }
+    else
+    {
+        *ptr = (x & 0b1111111) | 0b10000000;
+        *(ptr + 1) = x >> 7;
+    }
+}
+
+static void cs_write_u8(uint8_t);
+static void cs_write_u32(uint32_t);
+static void cs_write_i32(int32_t);
+
+static void inline push_i32(int32_t v)
+{
+    cs_write_u8(OP_I32CONST);
+    cs_write_i32(v);
+}
+
+static void inline push_u32(uint32_t v)
+{
+    cs_write_u8(OP_I32CONST);
+    cs_write_u32(v);
+}
+
+static void inline load_u16(uint32_t addr)
+{
+    cs_write_u8(OP_I32CONST);
+    cs_write_u32(addr);
+    cs_write_u8(OP_I32LOAD16U);
+    cs_write_u8(MEM_IMM_ALIGNMENT);
+    cs_write_u8(MEM_IMM_OFFSET);
+}
+
+static void inline load_i32(uint32_t addr)
+{
+    cs_write_u8(OP_I32CONST);
+    cs_write_u32(addr);
+    cs_write_u8(OP_I32LOAD);
+    cs_write_u8(MEM_IMM_ALIGNMENT);
+    cs_write_u8(MEM_IMM_OFFSET);
+}
+
+static void inline store_i32()
+{
+    cs_write_u8(OP_I32STORE);
+    cs_write_u8(MEM_IMM_ALIGNMENT);
+    cs_write_u8(MEM_IMM_OFFSET);
+}
+
+static void inline add_i32()
+{
+    cs_write_u8(OP_I32ADD);
+}
+
+static void inline and_i32()
+{
+    cs_write_u8(OP_I32AND);
+}
+
+static void inline shl_i32()
+{
+    cs_write_u8(OP_I32SHL);
+}
+
+static void inline call_fn(uint8_t fn_idx)
+{
+    cs_write_u8(OP_CALL);
+    cs_write_u8(fn_idx);
+}
+
+static void inline call_fn_with_arg(uint8_t fn_idx, int32_t arg0)
+{
+    push_i32(arg0);
+    call_fn(fn_idx);
+}
+
+#endif

+ 212 - 0
src/native/codegen/wasm_opcodes.h

@@ -0,0 +1,212 @@
+// https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md#high-level-structure
+#define WASM_VERSION 0x1
+
+// Section codes
+#define SC_TYPE     1
+#define SC_IMPORT   2
+#define SC_FUNCTION 3
+#define SC_TABLE    4
+#define SC_MEMORY   5
+#define SC_GLOBAL   6
+#define SC_EXPORT   7
+#define SC_START    8
+#define SC_ELEMENT  9
+#define SC_CODE     10
+#define SC_DATA     11
+
+// https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md#language-types
+#define TYPE_I32        0x7f
+#define TYPE_I64        0x7e
+#define TYPE_F32        0x7d
+#define TYPE_F64        0x7c
+#define TYPE_ANYFUNC    0x70
+#define TYPE_FUNC       0x60
+#define TYPE_BLOCK_TYPE 0x40
+
+// https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md#external_kind
+#define EXT_FUNCTION    0
+#define EXT_TABLE       1
+#define EXT_MEMORY      2
+#define EXT_GLOBAL      3
+
+// Taken from wasm2ast's source code and modified with vim magic
+#define OP_UNREACHABLE          0x00
+#define OP_NOP                  0x01
+#define OP_BLOCK                0x02
+#define OP_LOOP                 0x03
+#define OP_IF                   0x04
+#define OP_ELSE                 0x05
+#define OP_TRY                  0x06
+#define OP_CATCH                0x07
+#define OP_THROW                0x08
+#define OP_RETHROW              0x09
+#define OP_CATCHALL             0x0a
+#define OP_END                  0x0b
+#define OP_BR                   0x0c
+#define OP_BRIF                 0x0d
+#define OP_BRTABLE              0x0e
+#define OP_RETURN               0x0f
+#define OP_CALL                 0x10
+#define OP_CALLINDIRECT         0x11
+#define OP_DROP                 0x1a
+#define OP_SELECT               0x1b
+#define OP_GETLOCAL             0x20
+#define OP_SETLOCAL             0x21
+#define OP_TEELOCAL             0x22
+#define OP_GETGLOBAL            0x23
+#define OP_SETGLOBAL            0x24
+#define OP_I32LOAD              0x28
+#define OP_I64LOAD              0x29
+#define OP_F32LOAD              0x2a
+#define OP_F64LOAD              0x2b
+#define OP_I32LOAD8S            0x2c
+#define OP_I32LOAD8U            0x2d
+#define OP_I32LOAD16S           0x2e
+#define OP_I32LOAD16U           0x2f
+#define OP_I64LOAD8S            0x30
+#define OP_I64LOAD8U            0x31
+#define OP_I64LOAD16S           0x32
+#define OP_I64LOAD16U           0x33
+#define OP_I64LOAD32S           0x34
+#define OP_I64LOAD32U           0x35
+#define OP_I32STORE             0x36
+#define OP_I64STORE             0x37
+#define OP_F32STORE             0x38
+#define OP_F64STORE             0x39
+#define OP_I32STORE8            0x3a
+#define OP_I32STORE16           0x3b
+#define OP_I64STORE8            0x3c
+#define OP_I64STORE16           0x3d
+#define OP_I64STORE32           0x3e
+#define OP_CURRENTMEMORY        0x3f
+#define OP_GROWMEMORY           0x40
+#define OP_I32CONST             0x41
+#define OP_I64CONST             0x42
+#define OP_F32CONST             0x43
+#define OP_F64CONST             0x44
+#define OP_I32EQZ               0x45
+#define OP_I32EQ                0x46
+#define OP_I32NE                0x47
+#define OP_I32LTS               0x48
+#define OP_I32LTU               0x49
+#define OP_I32GTS               0x4a
+#define OP_I32GTU               0x4b
+#define OP_I32LES               0x4c
+#define OP_I32LEU               0x4d
+#define OP_I32GES               0x4e
+#define OP_I32GEU               0x4f
+#define OP_I64EQZ               0x50
+#define OP_I64EQ                0x51
+#define OP_I64NE                0x52
+#define OP_I64LTS               0x53
+#define OP_I64LTU               0x54
+#define OP_I64GTS               0x55
+#define OP_I64GTU               0x56
+#define OP_I64LES               0x57
+#define OP_I64LEU               0x58
+#define OP_I64GES               0x59
+#define OP_I64GEU               0x5a
+#define OP_F32EQ                0x5b
+#define OP_F32NE                0x5c
+#define OP_F32LT                0x5d
+#define OP_F32GT                0x5e
+#define OP_F32LE                0x5f
+#define OP_F32GE                0x60
+#define OP_F64EQ                0x61
+#define OP_F64NE                0x62
+#define OP_F64LT                0x63
+#define OP_F64GT                0x64
+#define OP_F64LE                0x65
+#define OP_F64GE                0x66
+#define OP_I32CLZ               0x67
+#define OP_I32CTZ               0x68
+#define OP_I32POPCNT            0x69
+#define OP_I32ADD               0x6a
+#define OP_I32SUB               0x6b
+#define OP_I32MUL               0x6c
+#define OP_I32DIVS              0x6d
+#define OP_I32DIVU              0x6e
+#define OP_I32REMS              0x6f
+#define OP_I32REMU              0x70
+#define OP_I32AND               0x71
+#define OP_I32OR                0x72
+#define OP_I32XOR               0x73
+#define OP_I32SHL               0x74
+#define OP_I32SHRS              0x75
+#define OP_I32SHRU              0x76
+#define OP_I32ROTL              0x77
+#define OP_I32ROTR              0x78
+#define OP_I64CLZ               0x79
+#define OP_I64CTZ               0x7a
+#define OP_I64POPCNT            0x7b
+#define OP_I64ADD               0x7c
+#define OP_I64SUB               0x7d
+#define OP_I64MUL               0x7e
+#define OP_I64DIVS              0x7f
+#define OP_I64DIVU              0x80
+#define OP_I64REMS              0x81
+#define OP_I64REMU              0x82
+#define OP_I64AND               0x83
+#define OP_I64OR                0x84
+#define OP_I64XOR               0x85
+#define OP_I64SHL               0x86
+#define OP_I64SHRS              0x87
+#define OP_I64SHRU              0x88
+#define OP_I64ROTL              0x89
+#define OP_I64ROTR              0x8a
+#define OP_F32ABS               0x8b
+#define OP_F32NEG               0x8c
+#define OP_F32CEIL              0x8d
+#define OP_F32FLOOR             0x8e
+#define OP_F32TRUNC             0x8f
+#define OP_F32NEAREST           0x90
+#define OP_F32SQRT              0x91
+#define OP_F32ADD               0x92
+#define OP_F32SUB               0x93
+#define OP_F32MUL               0x94
+#define OP_F32DIV               0x95
+#define OP_F32MIN               0x96
+#define OP_F32MAX               0x97
+#define OP_F32COPYSIGN          0x98
+#define OP_F64ABS               0x99
+#define OP_F64NEG               0x9a
+#define OP_F64CEIL              0x9b
+#define OP_F64FLOOR             0x9c
+#define OP_F64TRUNC             0x9d
+#define OP_F64NEAREST           0x9e
+#define OP_F64SQRT              0x9f
+#define OP_F64ADD               0xa0
+#define OP_F64SUB               0xa1
+#define OP_F64MUL               0xa2
+#define OP_F64DIV               0xa3
+#define OP_F64MIN               0xa4
+#define OP_F64MAX               0xa5
+#define OP_F64COPYSIGN          0xa6
+#define OP_I32WRAPI64           0xa7
+#define OP_I32TRUNCSF32         0xa8
+#define OP_I32TRUNCUF32         0xa9
+#define OP_I32TRUNCSF64         0xaa
+#define OP_I32TRUNCUF64         0xab
+#define OP_I64EXTENDSI32        0xac
+#define OP_I64EXTENDUI32        0xad
+#define OP_I64TRUNCSF32         0xae
+#define OP_I64TRUNCUF32         0xaf
+#define OP_I64TRUNCSF64         0xb0
+#define OP_I64TRUNCUF64         0xb1
+#define OP_F32CONVERTSI32       0xb2
+#define OP_F32CONVERTUI32       0xb3
+#define OP_F32CONVERTSI64       0xb4
+#define OP_F32CONVERTUI64       0xb5
+#define OP_F32DEMOTEF64         0xb6
+#define OP_F64CONVERTSI32       0xb7
+#define OP_F64CONVERTUI32       0xb8
+#define OP_F64CONVERTSI64       0xb9
+#define OP_F64CONVERTUI64       0xba
+#define OP_F64PROMOTEF32        0xbb
+#define OP_I32REINTERPRETF32    0xbc
+#define OP_I64REINTERPRETF64    0xbd
+#define OP_F32REINTERPRETI32    0xbe
+#define OP_F64REINTERPRETI64    0xbf
+
+#define MEM_IMM_ALIGNMENT 0
+#define MEM_IMM_OFFSET 0

+ 14 - 0
src/native/const.h

@@ -161,3 +161,17 @@
 #define USE_A20 false
 
 #define MXCSR_MASK (0xFFFF & ~(1 << 6))
+
+// Mask used to map physical address to index in cache array
+#define JIT_PHYS_MASK 0xFFFF
+
+#define CACHE_LEN 0x10000
+#define HASH_PRIME 6151
+#define JIT_THRESHOLD 10000
+// XXX: Consider making this the same as page size (12) during perf testing
+#define DIRTY_ARR_SHIFT 16
+#define MAX_INSTR_LEN 15
+#define MAX_BLOCK_LENGTH ((1 << DIRTY_ARR_SHIFT) - MAX_INSTR_LEN)
+
+#define ENABLE_JIT 0
+#define ENABLE_PROFILER 0

+ 295 - 328
src/native/cpu.c

@@ -6,10 +6,7 @@
 
 #include "const.h"
 #include "global_pointers.h"
-
-int32_t read_e8_partial_branch() {
-    return reg8[*modrm_byte << 2 & 0xC | *modrm_byte >> 2 & 1];
-}
+#include "profiler.h"
 
 // like memcpy, but only efficient for large (approximately 10k) sizes
 // See memcpy in https://github.com/kripken/emscripten/blob/master/src/library.js
@@ -21,14 +18,19 @@ int32_t translate_address_write(int32_t);
 int32_t read8(uint32_t);
 int32_t read16(uint32_t);
 int32_t read32s(uint32_t);
+int64_t read64s(uint32_t);
+int32_t read_aligned16(uint32_t addr);
 int32_t virt_boundary_read16(int32_t, int32_t);
 int32_t virt_boundary_read32s(int32_t, int32_t);
-void write8(uint32_t, uint8_t);
-void write16(uint32_t, uint16_t);
+void write8(uint32_t, int32_t);
+void write16(uint32_t, int32_t);
 void write32(uint32_t, int32_t);
+void write64(uint32_t, int64_t);
 void virt_boundary_write16(int32_t, int32_t, int32_t);
 void virt_boundary_write32(int32_t, int32_t, int32_t);
 
+bool cpu_exception_hook(int32_t);
+
 bool in_mapped_range(uint32_t);
 
 void trigger_gp(int32_t);
@@ -48,9 +50,25 @@ void fxrstor(uint32_t);
 
 int32_t do_page_translation(int32_t, bool, bool);
 
-void diverged() {}
-void branch_taken() {}
-void branch_not_taken() {}
+void after_jump()
+{
+    jit_jump = 1;
+}
+
+void diverged() {
+    after_jump();
+}
+
+void branch_taken()
+{
+    after_jump();
+}
+
+void branch_not_taken()
+{
+    after_jump();
+}
+
 
 int32_t getcf(void);
 int32_t getpf(void);
@@ -60,6 +78,9 @@ int32_t getsf(void);
 int32_t getof(void);
 
 
+double_t microtick();
+
+
 int32_t get_eflags()
 {
     return (*flags & ~FLAGS_ALL) | !!getcf() | !!getpf() << 2 | !!getaf() << 4 |
@@ -148,17 +169,6 @@ int32_t read_imm32s()
     return data32;
 }
 
-int32_t read_op0F() { return read_imm8(); }
-int32_t read_sib() { return read_imm8(); }
-int32_t read_op8() { return read_imm8(); }
-int32_t read_op8s() { return read_imm8s(); }
-int32_t read_op16() { return read_imm16(); }
-int32_t read_op32s() { return read_imm32s(); }
-int32_t read_disp8() { return read_imm8(); }
-int32_t read_disp8s() { return read_imm8s(); }
-int32_t read_disp16() { return read_imm16(); }
-int32_t read_disp32s() { return read_imm32s(); }
-
 bool is_osize_32()
 {
     return *is_32 != ((*prefixes & PREFIX_MASK_OPSIZE) == PREFIX_MASK_OPSIZE);
@@ -169,11 +179,6 @@ bool is_asize_32()
     return *is_32 != ((*prefixes & PREFIX_MASK_ADDRSIZE) == PREFIX_MASK_ADDRSIZE);
 }
 
-void read_modrm_byte()
-{
-    *modrm_byte = read_imm8();
-}
-
 int32_t get_seg(int32_t segment)
 {
     assert(segment >= 0 && segment < 8);
@@ -184,6 +189,7 @@ int32_t get_seg(int32_t segment)
         if(segment_is_null[segment])
         {
             assert(segment != CS && segment != SS);
+            dbg_log("#gp: Access null segment");
             trigger_gp(0);
         }
     }
@@ -232,129 +238,117 @@ static int32_t modrm_resolve(int32_t modrm_byte)
     }
 }
 
-void set_e8(int32_t value)
-{
-    int32_t modrm_byte_ = *modrm_byte;
-    if(modrm_byte_ < 0xC0) {
-        int32_t addr = modrm_resolve(modrm_byte_);
-        safe_write8(addr, value);
-    } else {
-        reg8[modrm_byte_ << 2 & 0xC | modrm_byte_ >> 2 & 1] = value;
-    }
-}
-
-void set_e16(int32_t value)
-{
-    int32_t modrm_byte_ = *modrm_byte;
-    if(modrm_byte_ < 0xC0) {
-        int32_t addr = modrm_resolve(modrm_byte_);
-        safe_write16(addr, value);
-    } else {
-        reg16[modrm_byte_ << 1 & 14] = value;
-    }
-}
-
-void set_e32(int32_t value)
-{
-    int32_t modrm_byte_ = *modrm_byte;
-    if(modrm_byte_ < 0xC0) {
-        int32_t addr = modrm_resolve(modrm_byte_);
-        safe_write32(addr, value);
-    } else {
-        reg32s[modrm_byte_ & 7] = value;
-    }
-}
-
-int32_t read_g8()
-{
-    return reg8[*modrm_byte >> 1 & 0xC | *modrm_byte >> 5 & 1];
-}
-
-int32_t read_g16()
+uint32_t jit_hot_hash(uint32_t addr)
 {
-    return reg16[*modrm_byte >> 2 & 14];
+    return addr % HASH_PRIME;
 }
 
-int32_t read_g16s()
-{
-    return reg16s[*modrm_byte >> 2 & 14];
-}
-
-int32_t read_g32s()
-{
-    return reg32s[*modrm_byte >> 3 & 7];
-}
-
-void write_g8(int32_t value)
-{
-    reg8[*modrm_byte >> 1 & 0xC | *modrm_byte >> 5 & 1] = value;
-}
-
-void write_g16(int32_t value)
-{
-    reg16[*modrm_byte >> 2 & 14] = value;
-}
-
-void write_g32(int32_t value)
+void cycle_internal()
 {
-    reg32s[*modrm_byte >> 3 & 7] = value;
-}
+#if ENABLE_JIT
+/* Use JIT mode */
+    int32_t eip = *instruction_pointer;
+    // Save previous_ip now since translate_address_read might trigger a page-fault
+    *previous_ip = *instruction_pointer;
 
-int32_t read_e8()
-{
-    if(*modrm_byte < 0xC0)
-    {
-        return safe_read8(modrm_resolve(*modrm_byte));
-    }
-    else
+    if((eip & ~0xFFF) ^ *last_virt_eip)
     {
-        return reg8[*modrm_byte << 2 & 0xC | *modrm_byte >> 2 & 1];
+        *eip_phys = translate_address_read(eip) ^ eip;
+        *last_virt_eip = eip & ~0xFFF;
     }
-}
 
-int32_t read_e8s()
-{
-    return read_e8() << 24 >> 24;
-}
+    uint32_t phys_addr = *eip_phys ^ eip;
+    assert(!in_mapped_range(phys_addr));
 
-int32_t read_e16()
-{
-    if(*modrm_byte < 0xC0)
-    {
-        return safe_read16(modrm_resolve(*modrm_byte));
-    }
-    else
-    {
-        return reg16[*modrm_byte << 1 & 14];
-    }
-}
+    struct code_cache *entry = &jit_cache_arr[phys_addr & JIT_PHYS_MASK];
 
-int32_t read_e16s()
-{
-    return read_e16() << 16 >> 16;
-}
-
-int32_t read_e32s()
-{
-    if(*modrm_byte < 0xC0)
+    if(entry->group_status == group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT] &&
+       entry->start_addr == phys_addr)
     {
-        return safe_read32s(modrm_resolve(*modrm_byte));
+        // XXX: With the code-generation, we need to figure out how we
+        // would call the function from the other module here; likely
+        // through a handler in JS. For now:
+
+        // Confirm that cache is not dirtied (through page-writes,
+        // mode switch, or just cache eviction)
+        for(int32_t i = 0; i < entry->len; i++)
+        {
+            *previous_ip = *instruction_pointer;
+            int32_t opcode = read_imm8();
+            phys_addr = *eip_phys ^ (*instruction_pointer - 1);
+            assert(opcode == entry->opcode[i]);
+            run_instruction(entry->opcode[i] | !!*is_32 << 8);
+            (*timestamp_counter)++;
+        }
+        // XXX: Try to find an assert to detect self-modifying code
+        // JIT compiled self-modifying basic blocks may trigger this assert
+        // assert(entry->group_status != group_dirtiness[entry->start_addr >> DIRTY_ARR_SHIFT]);
+        *cache_hit = *cache_hit + 1;
+    }
+    // A jump just occured indicating the start of a basic block + the
+    // address is hot; let's JIT compile it
+    else if(jit_jump == 1 && ++hot_code_addresses[jit_hot_hash(phys_addr)] > JIT_THRESHOLD)
+    {
+        // Minimize collision based thrashing
+        hot_code_addresses[jit_hot_hash(phys_addr)] = 0;
+        jit_jump = 0;
+        entry->len = 0;
+        entry->start_addr = phys_addr;
+        entry->end_addr = phys_addr + 1;
+        jit_cache_arr[phys_addr & JIT_PHYS_MASK] = *entry;
+
+        *cache_compile = *cache_compile + 1;
+
+        // XXX: Artificial limit allows jit_dirty_cache to be
+        // simplified by only dirtying 2 entries based on a mask
+        // (instead of all possible entries)
+        while(jit_jump == 0 && entry->len < 100 &&
+              (entry->end_addr - entry->start_addr) < MAX_BLOCK_LENGTH)
+        {
+            *previous_ip = *instruction_pointer;
+            int32_t opcode = read_imm8();
+            // XXX: Currently only includes opcode of final jmp, not operands
+            entry->end_addr = *eip_phys ^ *instruction_pointer;
+            entry->opcode[entry->len] = opcode;
+            entry->len++;
+
+            // XXX: Generate the instruction instead of running it
+            // XXX: If it's a jmp instruction, make sure
+            // generate_instruction sets jit_jump=1 and end_addr is set correctly
+            run_instruction(opcode | !!*is_32 << 8);
+            (*timestamp_counter)++;
+        }
+        jit_jump = 0;
+        // When the hot instruction is a jmp (backwards),
+        // leave its group_status unupdated, thereby invalidating it
+        if (entry->end_addr > entry->start_addr)
+        {
+            entry->group_status = group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT];
+        }
     }
+    // Regular un-hot code execution
     else
     {
-        return reg32s[*modrm_byte & 7];
+        jit_jump = 0;
+        int32_t opcode = read_imm8();
+        run_instruction(opcode | !!*is_32 << 8);
+        (*timestamp_counter)++;
     }
-}
 
-void cycle_internal()
-{
+#else
+/* Use non-JIT mode */
     previous_ip[0] = instruction_pointer[0];
 
     (*timestamp_counter)++;
 
     int32_t opcode = read_imm8();
 
+#if DEBUG
+    logop(previous_ip[0], opcode);
+#endif
+
     run_instruction(opcode | !!*is_32 << 8);
+#endif
 }
 
 static void run_prefix_instruction()
@@ -385,12 +379,26 @@ void do_many_cycles_unsafe()
 
 void raise_exception(int32_t interrupt_nr)
 {
+#if DEBUG
+    if(cpu_exception_hook(interrupt_nr))
+    {
+        throw_cpu_exception();
+        return;
+    }
+#endif
     call_interrupt_vector(interrupt_nr, false, false, 0);
     throw_cpu_exception();
 }
 
 void raise_exception_with_code(int32_t interrupt_nr, int32_t error_code)
 {
+#if DEBUG
+    if(cpu_exception_hook(interrupt_nr))
+    {
+        throw_cpu_exception();
+        return;
+    }
+#endif
     call_interrupt_vector(interrupt_nr, false, true, error_code);
     throw_cpu_exception();
 }
@@ -401,12 +409,104 @@ void trigger_de()
     raise_exception(0);
 }
 
+void trigger_ud()
+{
+    dbg_log("#ud");
+    dbg_trace();
+    *instruction_pointer = *previous_ip;
+    raise_exception(6);
+}
+
+void trigger_nm()
+{
+    *instruction_pointer = *previous_ip;
+    raise_exception(7);
+}
+
 void trigger_gp(int32_t code)
 {
     *instruction_pointer = *previous_ip;
     raise_exception_with_code(13, code);
 }
 
+int32_t virt_boundary_read16(int32_t low, int32_t high)
+{
+    dbg_assert((low & 0xFFF) == 0xFFF);
+    dbg_assert((high & 0xFFF) == 0);
+
+    return read8(low) | read8(high) << 8;
+}
+
+int32_t virt_boundary_read32s(int32_t low, int32_t high)
+{
+    dbg_assert((low & 0xFFF) >= 0xFFD);
+    dbg_assert((high - 3 & 0xFFF) == (low & 0xFFF));
+
+    int32_t mid = 0;
+
+    if(low & 1)
+    {
+        if(low & 2)
+        {
+            // 0xFFF
+            mid = read_aligned16((high - 2) >> 1);
+        }
+        else
+        {
+            // 0xFFD
+            mid = read_aligned16((low + 1) >> 1);
+        }
+    }
+    else
+    {
+        // 0xFFE
+        mid = virt_boundary_read16(low + 1, high - 1);
+    }
+
+    return read8(low) | mid << 8 | read8(high) << 24;
+}
+
+void virt_boundary_write16(int32_t low, int32_t high, int32_t value)
+{
+    dbg_assert((low & 0xFFF) == 0xFFF);
+    dbg_assert((high & 0xFFF) == 0);
+
+    write8(low, value);
+    write8(high, value >> 8);
+}
+
+void virt_boundary_write32(int32_t low, int32_t high, int32_t value)
+{
+    dbg_assert((low & 0xFFF) >= 0xFFD);
+    dbg_assert((high - 3 & 0xFFF) == (low & 0xFFF));
+
+    write8(low, value);
+
+    if(low & 1)
+    {
+        if(low & 2)
+        {
+            // 0xFFF
+            write8(high - 2, value >> 8);
+            write8(high - 1, value >> 16);
+        }
+        else
+        {
+            // 0xFFD
+            write8(low + 1, value >> 8);
+            write8(low + 2, value >> 16);
+        }
+    }
+    else
+    {
+        // 0xFFE
+        write8(low + 1, value >> 8);
+        write8(high - 1, value >> 16);
+    }
+
+    write8(high, value >> 24);
+}
+
 int32_t safe_read8(int32_t addr)
 {
     return read8(translate_address_read(addr));
@@ -439,7 +539,7 @@ int32_t safe_read32s(int32_t addr)
 union reg64 safe_read64s(int32_t addr)
 {
     union reg64 x;
-    if((addr & 0xFFF) >= 0xFF9)
+    if((addr & 0xFFF) > (0x1000 - 8))
     {
         x.u32[0] = safe_read32s(addr);
         x.u32[1] = safe_read32s(addr + 4);
@@ -447,8 +547,7 @@ union reg64 safe_read64s(int32_t addr)
     else
     {
         int32_t addr_phys = translate_address_read(addr);
-        x.u32[0] = read32s(addr_phys);
-        x.u32[1] = read32s(addr_phys + 4);
+        x.u64[0] = read64s(addr_phys);
     }
     return x;
 }
@@ -456,7 +555,7 @@ union reg64 safe_read64s(int32_t addr)
 union reg128 safe_read128s(int32_t addr)
 {
     union reg128 x;
-    if((addr & 0xFFF) >= 0xFF1)
+    if((addr & 0xFFF) > (0x1000 - 16))
     {
         x.u32[0] = safe_read32s(addr);
         x.u32[1] = safe_read32s(addr + 4);
@@ -466,32 +565,12 @@ union reg128 safe_read128s(int32_t addr)
     else
     {
         int32_t addr_phys = translate_address_read(addr);
-        x.u32[0] = read32s(addr_phys);
-        x.u32[1] = read32s(addr_phys + 4);
-        x.u32[2] = read32s(addr_phys + 8);
-        x.u32[3] = read32s(addr_phys + 12);
+        x.u64[0] = read64s(addr_phys);
+        x.u64[1] = read64s(addr_phys + 8);
     }
     return x;
 }
 
-void safe_write64(int32_t addr, int32_t low, int32_t high)
-{
-    // TODO: Optimize
-    writable_or_pagefault(addr, 8);
-    safe_write32(addr, low);
-    safe_write32(addr + 4, high);
-}
-
-void safe_write128(int32_t addr, union reg128 value)
-{
-    // TODO: Optimize
-    writable_or_pagefault(addr, 16);
-    safe_write32(addr, value.u32[0]);
-    safe_write32(addr + 4, value.u32[1]);
-    safe_write32(addr + 8, value.u32[2]);
-    safe_write32(addr + 12, value.u32[3]);
-}
-
 void safe_write8(int32_t addr, int32_t value)
 {
     write8(translate_address_write(addr), value);
@@ -515,7 +594,7 @@ void safe_write32(int32_t addr, int32_t value)
 {
     int32_t phys_low = translate_address_write(addr);
 
-    if((addr & 0xFFF) >= 0xFFD)
+    if((addr & 0xFFF) > (0x1000 - 4))
     {
         virt_boundary_write32(phys_low, translate_address_write(addr + 3 & ~3) | (addr + 3) & 3, value);
     }
@@ -525,245 +604,127 @@ void safe_write32(int32_t addr, int32_t value)
     }
 }
 
-int32_t read_write_e8()
+void safe_write64(int32_t addr, int64_t value)
 {
-    if(*modrm_byte < 0xC0)
+    if((addr & 0xFFF) > (0x1000 - 8))
     {
-        int32_t virt_addr = modrm_resolve(*modrm_byte);
-        *phys_addr = translate_address_write(virt_addr);
-        return read8(*phys_addr);
+        safe_write32(addr, value);
+        safe_write32(addr + 4, value >> 32);
     }
     else
     {
-        return reg8[*modrm_byte << 2 & 0xC | *modrm_byte >> 2 & 1];
+        int32_t phys = translate_address_write(addr);
+        write64(phys, value);
     }
 }
 
-void write_e8(int32_t value)
+void safe_write128(int32_t addr, union reg128 value)
 {
-    if(*modrm_byte < 0xC0)
+    if((addr & 0xFFF) > (0x1000 - 16))
     {
-        write8(*phys_addr, value);
+        safe_write64(addr, value.u64[0]);
+        safe_write64(addr + 8, value.u64[1]);
     }
     else
     {
-        reg8[*modrm_byte << 2 & 0xC | *modrm_byte >> 2 & 1] = value;
+        int32_t phys = translate_address_write(addr);
+        write64(phys, value.u64[0]);
+        write64(phys + 8, value.u64[1]);
     }
 }
 
-int32_t read_write_e16()
+static int32_t get_reg8_index(int32_t index) { return index << 2 & 0xC | index >> 2 & 1; }
+
+static int32_t read_reg8(int32_t index)
 {
-    if(*modrm_byte < 0xC0)
-    {
-        int32_t virt_addr = modrm_resolve(*modrm_byte);
-        *phys_addr = translate_address_write(virt_addr);
-        if((virt_addr & 0xFFF) == 0xFFF)
-        {
-            *phys_addr_high = translate_address_write(virt_addr + 1);
-            dbg_assert(*phys_addr_high);
-            return virt_boundary_read16(*phys_addr, *phys_addr_high);
-        }
-        else
-        {
-            *phys_addr_high = 0;
-            return read16(*phys_addr);
-        }
-    }
-    else
-    {
-        return reg16[*modrm_byte << 1 & 14];
-    }
+    return reg8[get_reg8_index(index)];
 }
 
-void write_e16(int32_t value)
+static void write_reg8(int32_t index, int32_t value)
 {
-    if(*modrm_byte < 0xC0)
-    {
-        if(*phys_addr_high)
-        {
-            virt_boundary_write16(*phys_addr, *phys_addr_high, value);
-        }
-        else
-        {
-            write16(*phys_addr, value);
-        }
-    }
-    else
-    {
-        reg16[*modrm_byte << 1 & 14] = value;
-    }
+    reg8[get_reg8_index(index)] = value;
 }
 
-int32_t read_write_e32()
+static int32_t get_reg16_index(int32_t index) { return index << 1; }
+
+static int32_t read_reg16(int32_t index)
 {
-    if(*modrm_byte < 0xC0)
-    {
-        int32_t virt_addr = modrm_resolve(*modrm_byte);
-        *phys_addr = translate_address_write(virt_addr);
-        if((virt_addr & 0xFFF) >= 0xFFD)
-        {
-            *phys_addr_high = translate_address_write(virt_addr + 3 & ~3) | (virt_addr + 3) & 3;
-            dbg_assert(*phys_addr_high);
-            return virt_boundary_read32s(*phys_addr, *phys_addr_high);
-        }
-        else
-        {
-            *phys_addr_high = 0;
-            return read32s(*phys_addr);
-        }
-    }
-    else
-    {
-        return reg32s[*modrm_byte & 7];
-    }
+    return reg16[get_reg16_index(index)];
 }
 
-void write_e32(int32_t value)
+static void write_reg16(int32_t index, int32_t value)
 {
-    if(*modrm_byte < 0xC0)
-    {
-        if(*phys_addr_high)
-        {
-            virt_boundary_write32(*phys_addr, *phys_addr_high, value);
-        }
-        else
-        {
-            write32(*phys_addr, value);
-        }
-    }
-    else
-    {
-        reg32s[*modrm_byte & 7] = value;
-    }
+    reg16[get_reg16_index(index)] = value;
 }
 
-union reg64 read_mmx64s()
+
+static int32_t read_reg32(int32_t index)
 {
-    union reg64 x;
-    int32_t i = (*modrm_byte >> 3 & 7) << 1;
-    x.u32[0] = reg_mmx32s[i];
-    x.u32[1] = reg_mmx32s[i | 1];
-    return x;
+    return reg32s[index];
 }
 
-
-int32_t read_mmx_mem32s()
+static void write_reg32(int32_t index, int32_t value)
 {
-    if(*modrm_byte < 0xC0)
-    {
-        return safe_read32s(modrm_resolve(*modrm_byte));
-    }
-    else
-    {
-        // Returning lower dword of qword
-        return reg_mmx32s[(*modrm_byte & 7) << 1];
-    }
+    reg32s[index] = value;
 }
 
-union reg64 read_mmx_mem64s()
+static void write_reg_osize(int32_t index, int32_t value)
 {
-    if(*modrm_byte < 0xC0)
+    assert(index >= 0 && index < 8);
+
+    if(is_osize_32())
     {
-        return safe_read64s(modrm_resolve(*modrm_byte));
+        write_reg32(index, value);
     }
     else
     {
-        union reg64 x;
-        int32_t i = (*modrm_byte & 7) << 1;
-        x.u32[0] = reg_mmx32s[i];
-        x.u32[1] = reg_mmx32s[i | 1];
-
-        return x;
+        write_reg16(index, value & 0xFFFF);
     }
 }
 
-void write_mmx64s(int32_t low, int32_t high)
+int32_t read_mmx32s(int32_t r)
 {
-    int32_t offset = (*modrm_byte >> 3 & 7) << 1;
-    reg_mmx32s[offset] = low;
-    reg_mmx32s[offset | 1] = high;
+    return reg_mmx[r].u32[0];
 }
 
-void write_mmx_mem64s(int32_t low, int32_t high)
+union reg64 read_mmx64s(int32_t r)
 {
-    if(*modrm_byte < 0xC0) {
-        int32_t addr = modrm_resolve(*modrm_byte);
-        safe_write64(addr, low, high);
-    } else {
-        int32_t offset = (*modrm_byte & 7) << 1;
-        reg_mmx32s[offset] = low;
-        reg_mmx32s[offset | 1] = high;
-    }
+    return reg_mmx[r];
 }
 
-union reg64 read_xmm64s()
+void write_mmx64(int32_t r, int32_t low, int32_t high)
 {
-    union reg64 x;
-    int32_t i = (*modrm_byte >> 3 & 7) << 2;
-    x.u32[0] = reg_xmm32s[i];
-    x.u32[1] = reg_xmm32s[i | 1];
-    return x;
+    reg_mmx[r].u32[0] = low;
+    reg_mmx[r].u32[1] = high;
 }
 
-union reg128 read_xmm128s()
+union reg64 read_xmm64s(int32_t r)
 {
-    union reg128 x;
-    int32_t i = (*modrm_byte >> 3 & 7) << 2;
-    x.u32[0] = reg_xmm32s[i];
-    x.u32[1] = reg_xmm32s[i | 1];
-    x.u32[2] = reg_xmm32s[i | 2];
-    x.u32[3] = reg_xmm32s[i | 3];
+    union reg64 x;
+    x.u64[0] = reg_xmm[r].u64[0];
     return x;
 }
 
-union reg64 read_xmm_mem64s()
+union reg128 read_xmm128s(int32_t r)
 {
-    if(*modrm_byte < 0xC0)
-    {
-        return safe_read64s(modrm_resolve(*modrm_byte));
-    }
-    else
-    {
-        union reg64 x;
-        int32_t i = (*modrm_byte & 7) << 2;
-        x.u32[0] = reg_xmm32s[i];
-        x.u32[1] = reg_xmm32s[i | 1];
-        return x;
-    }
+    return reg_xmm[r];
 }
 
-union reg128 read_xmm_mem128s()
+void write_xmm64(int32_t r, union reg64 data)
 {
-    if(*modrm_byte < 0xC0)
-    {
-        return safe_read128s(modrm_resolve(*modrm_byte));
-    }
-    else
-    {
-        union reg128 x;
-        int32_t i = (*modrm_byte & 7) << 2;
-        x.u32[0] = reg_xmm32s[i];
-        x.u32[1] = reg_xmm32s[i | 1];
-        x.u32[2] = reg_xmm32s[i | 2];
-        x.u32[3] = reg_xmm32s[i | 3];
-        return x;
-    }
+    reg_xmm[r].u64[0] = data.u64[0];
 }
 
-void write_xmm64(int32_t d0, int32_t d1)
+void write_xmm128(int32_t r, int32_t i0, int32_t i1, int32_t i2, int32_t i3)
 {
-    int32_t i = (*modrm_byte >> 3 & 7) << 2;
-    reg_xmm32s[i] = d0;
-    reg_xmm32s[i | 1] = d1;
+    union reg128 x = { .u32 = { i0, i1, i2, i3 } };
+    reg_xmm[r] = x;
 }
 
-void write_xmm128s(int32_t d0, int32_t d1, int32_t d2, int32_t d3)
+void write_xmm_reg128(int32_t r, union reg128 data)
 {
-    int32_t i = (*modrm_byte >> 3 & 7) << 2;
-    reg_xmm32s[i] = d0;
-    reg_xmm32s[i | 1] = d1;
-    reg_xmm32s[i | 2] = d2;
-    reg_xmm32s[i | 3] = d3;
+    reg_xmm[r].u64[0] = data.u64[0];
+    reg_xmm[r].u64[1] = data.u64[1];
 }
 
 void clear_tlb()
@@ -791,11 +752,11 @@ int32_t read_moffs()
 {
     if(is_asize_32())
     {
-        return get_seg_prefix(DS) + read_op32s();
+        return read_imm32s();
     }
     else
     {
-        return get_seg_prefix(DS) + read_op16();
+        return read_imm16();
     }
 }
 
@@ -874,3 +835,9 @@ int32_t decr_ecx_asize()
     return is_asize_32() ? --reg32s[ECX] : --reg16[CX];
 }
 
+uint64_t read_tsc()
+{
+    double_t n = microtick() - tsc_offset[0]; // XXX: float
+    n = n * TSC_RATE;
+    return n;
+}

+ 2 - 2
src/native/fpu.c

@@ -94,8 +94,8 @@ void fpu_store_m80(uint32_t addr, double_t n)
 double_t fpu_load_m80(uint32_t addr)
 {
     int32_t exponent = safe_read16(addr + 8);
-    uint32_t low = ((uint32_t)(safe_read32s(addr))) >> 0;
-    uint32_t high = ((uint32_t)(safe_read32s(addr + 4))) >> 0;
+    uint32_t low = ((uint32_t)(safe_read32s(addr)));
+    uint32_t high = ((uint32_t)(safe_read32s(addr + 4)));
 
     int32_t sign = exponent >> 15;
     exponent &= ~0x8000;

+ 15 - 24
src/native/global_pointers.h

@@ -1,6 +1,12 @@
 #ifndef _GLOBAL_POINTERS_H
 #define _GLOBAL_POINTERS_H
 
+#include <stdint.h>
+#include <stdbool.h>
+#include <math.h>
+#include "const.h"
+#include "shared.h"
+
 uint8_t* const reg8 = (uint8_t* const) 4;
 uint16_t* const reg16 = (uint16_t* const) 4;
 int8_t* const reg8s = (int8_t* const) 4;
@@ -14,8 +20,7 @@ int32_t* const last_add_result = (int32_t* const) 524;
 int32_t* const last_result = (int32_t* const) 528;
 int32_t* const flags_changed = (int32_t* const) 532;
 int32_t* const flags = (int32_t* const) 536;
-int32_t* const modrm_byte = (int32_t* const) 540;
-int32_t* const mul32_result = (int32_t* const) 544; // length 8
+// gap 16
 
 bool* const a20_enabled = (bool* const) 552;
 int32_t* const instruction_pointer = (int32_t* const) 556;
@@ -40,7 +45,7 @@ uint8_t* const prefixes = (uint8_t* const) 648;
 int32_t* const tsc_offset = (int32_t* const) 652;
 int32_t* const phys_addr = (int32_t* const) 656;
 int32_t* const phys_addr_high = (int32_t* const) 660;
-int32_t* const timestamp_counter = (int32_t* const) 664;
+uint32_t* const timestamp_counter = (uint32_t* const) 664;
 
 uint16_t* const sreg = (uint16_t* const) 668;
 int32_t* const dreg = (int32_t* const) 684; // length 32
@@ -59,32 +64,17 @@ bool* const paging = (bool* const) 820;
 
 int32_t* const mxcsr = (int32_t* const) 824;
 
-union reg128 {
-    uint8_t   u8[16];
-    uint16_t  u16[8];
-    uint32_t  u32[4];
-};
-
-union reg64 {
-    int8_t   s8[8];
-    int16_t  s16[4];
-    int32_t  s32[2];
-    uint8_t   u8[8];
-    uint16_t  u16[4];
-    uint32_t  u32[2];
-    double   f64[1];
-};
-
-int32_t* const reg_xmm32s = (int32_t* const) 828; // length 128
 union reg128* const reg_xmm = (union reg128* const) 828; // length 128
 
 uint8_t* const tlb_info = (uint8_t* const) 2048; // length 0x100000
 uint8_t* const tlb_info_global = (uint8_t* const) (2048 + 0x100000); // length 0x100000
 int32_t* const tlb_data = (int32_t* const) (2048 + 0x100000 + 0x100000); // length 0x100000*4
 
-uint8_t* const mem8 = (uint8_t* const) (2048 + 0x100000 * 6);
-uint16_t* const mem16 = (uint16_t* const) (2048 + 0x100000 * 6);
-int32_t* const mem32s = (int32_t* const) (2048 + 0x100000 * 6);
+uint8_t* const codegen_buffers = (uint8_t* const) (2048 + 0x100000 * 6); // length 2048
+
+uint8_t* const mem8 = (uint8_t* const) (2048 + 0x100000 * 6 + 2048);
+uint16_t* const mem16 = (uint16_t* const) (2048 + 0x100000 * 6 + 2048);
+int32_t* const mem32s = (int32_t* const) (2048 + 0x100000 * 6 + 2048);
 
 float_t* const fpu_float32 = (float_t* const) 956;
 uint8_t* const fpu_float32_byte = (uint8_t* const) 956;
@@ -107,7 +97,8 @@ double_t* const fpu_st = (double_t* const) 968;
 uint8_t* const fpu_st8 = (uint8_t* const) 968;
 int32_t* const fpu_st32 = (int32_t* const) 968;
 
-int32_t* const reg_mmx32s = (int32_t* const) 1064; // length 64
 union reg64* const reg_mmx = (union reg64* const) 1064; // length 64
 
+uint32_t* const cache_hit = (uint32_t* const) 1280;
+uint32_t* const cache_compile = (uint32_t* const) 1284;
 #endif

File diff suppressed because it is too large
+ 305 - 332
src/native/instructions.c


File diff suppressed because it is too large
+ 359 - 405
src/native/instructions_0f.c


+ 4 - 1
src/native/log.c

@@ -7,7 +7,10 @@
 
 #include "const.h"
 #include "global_pointers.h"
+void logop(int32_t, int32_t);
+
+void dbg_trace(void);
 
 #define dbg_log(...) { if(DEBUG) { printf(__VA_ARGS__); } }
-#define dbg_assert(condition) { if(DEBUG) { assert(condition); } }
+#define dbg_assert(condition) { if(DEBUG) { if(!(condition)) dbg_log(#condition); assert(condition); } }
 #define dbg_assert_message(condition, message) { if(DEBUG && !(condition)) { dbg_log(message); assert(false); } }

+ 101 - 10
src/native/memory.c

@@ -1,4 +1,8 @@
 #include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+#include "const.h"
+#include "global_pointers.h"
 
 bool in_mapped_range(uint32_t addr)
 {
@@ -8,9 +12,56 @@ bool in_mapped_range(uint32_t addr)
 int32_t mmap_read8(uint32_t);
 int32_t mmap_read16(uint32_t);
 int32_t mmap_read32(uint32_t);
-void mmap_write8(uint32_t, uint8_t);
-void mmap_write16(uint32_t, uint16_t);
-void mmap_write32(uint32_t, uint32_t);
+void mmap_write8(uint32_t, int32_t);
+void mmap_write16(uint32_t, int32_t);
+void mmap_write32(uint32_t, int32_t);
+
+/*
+ * There are 3 primary ways a cached basic block will be dirtied:
+ * 1. A write dirties basic block A independently (A is clean and
+ * write came from outside A)
+ * 2. A write from within basic block A dirties itself
+ * 3. A run_instruction during compilation dirties itself
+
+ * #3 won't happen with generate_instruction so we don't
+ * account for it
+ */
+void jit_dirty_cache(uint32_t start_addr, uint32_t end_addr)
+{
+#if ENABLE_JIT
+    assert(start_addr <= end_addr);
+    for(uint32_t i = start_addr; i < end_addr; i++)
+    {
+        uint32_t idx = i >> DIRTY_ARR_SHIFT;
+        // XXX: Overflow _can_ cause a stale cache (with
+        // group_status=0) to be mistakenly run, but the odds are low
+        // since it depends on a compiled block never being
+        // re-compiled or evicted for 2^32 times that
+        // another block in its group is dirtied
+        group_dirtiness[idx]++;
+
+        // We currently limit basic blocks to a length of
+        // MAX_BLOCK_LENGTH, which may still cause an overlap of 1
+        // page. The ongoing write may write to a block with a
+        // starting address lower than the start_addr of the write,
+        // i.e. in the previous page.
+        // XXX: Consider not generating blocks across boundaries
+
+        if(idx != 0)
+        {
+            group_dirtiness[idx - 1]++;
+        }
+    }
+#endif
+}
+
+void jit_empty_cache()
+{
+    for(uint32_t i = 0; i < CACHE_LEN; i++)
+    {
+        group_dirtiness[i]++;
+    }
+}
 
 int32_t read8(uint32_t addr)
 {
@@ -40,7 +91,7 @@ int32_t read16(uint32_t addr)
     }
 }
 
-uint16_t read_aligned16(uint32_t addr)
+int32_t read_aligned16(uint32_t addr)
 {
     dbg_assert(addr >= 0 && addr < 0x80000000);
     if(USE_A20 && !*a20_enabled) addr &= A20_MASK16;
@@ -69,6 +120,20 @@ int32_t read32s(uint32_t addr)
     }
 }
 
+int64_t read64s(uint32_t addr)
+{
+    if(USE_A20 && *a20_enabled) addr &= A20_MASK;
+
+    if(in_mapped_range(addr))
+    {
+        return (int64_t)mmap_read32(addr) | (int64_t)mmap_read32(addr + 4) << 32;
+    }
+    else
+    {
+        return *(int64_t*)(mem8 + addr);
+    }
+}
+
 int32_t read_aligned32(uint32_t addr)
 {
     if(USE_A20 && !*a20_enabled) addr &= A20_MASK32;
@@ -83,10 +148,12 @@ int32_t read_aligned32(uint32_t addr)
     }
 }
 
-void write8(uint32_t addr, uint8_t value)
+void write8(uint32_t addr, int32_t value)
 {
     if(USE_A20 && !*a20_enabled) addr &= A20_MASK;
 
+    jit_dirty_cache(addr, addr + 1);
+
     if(in_mapped_range(addr))
     {
         mmap_write8(addr, value);
@@ -97,10 +164,12 @@ void write8(uint32_t addr, uint8_t value)
     }
 }
 
-void write16(uint32_t addr, uint16_t value)
+void write16(uint32_t addr, int32_t value)
 {
     if(USE_A20 && !*a20_enabled) addr &= A20_MASK;
 
+    jit_dirty_cache(addr, addr + 2);
+
     if(in_mapped_range(addr))
     {
         mmap_write16(addr, value);
@@ -116,9 +185,12 @@ void write_aligned16(uint32_t addr, uint32_t value)
     dbg_assert(addr >= 0 && addr < 0x80000000);
     if(USE_A20 && !*a20_enabled) addr &= A20_MASK16;
 
-    if(in_mapped_range(addr << 1))
+    uint32_t phys_addr = addr << 1;
+    jit_dirty_cache(phys_addr, phys_addr + 2);
+
+    if(in_mapped_range(phys_addr))
     {
-        mmap_write16(addr << 1, value);
+        mmap_write16(phys_addr, value);
     }
     else
     {
@@ -130,6 +202,8 @@ void write32(uint32_t addr, int32_t value)
 {
     if(USE_A20 && !*a20_enabled) addr &= A20_MASK;
 
+    jit_dirty_cache(addr, addr + 4);
+
     if(in_mapped_range(addr))
     {
         mmap_write32(addr, value);
@@ -144,9 +218,12 @@ void write_aligned32(int32_t addr, int32_t value)
 {
     if(USE_A20 && !*a20_enabled) addr &= A20_MASK32;
 
-    if(in_mapped_range(addr << 2))
+    uint32_t phys_addr = addr << 2;
+    jit_dirty_cache(phys_addr, phys_addr + 4);
+
+    if(in_mapped_range(phys_addr))
     {
-        mmap_write32(addr << 2, value);
+        mmap_write32(phys_addr, value);
     }
     else
     {
@@ -154,3 +231,17 @@ void write_aligned32(int32_t addr, int32_t value)
     }
 }
 
+void write64(uint32_t addr, int64_t value)
+{
+    if(USE_A20 && !*a20_enabled) addr &= A20_MASK;
+
+    if(in_mapped_range(addr))
+    {
+        mmap_write32(addr + 0, value & 0xFFFFFFFF);
+        mmap_write32(addr + 4, value >> 32);
+    }
+    else
+    {
+        *(int64_t*)(mem8 + addr) = value;
+    }
+}

+ 64 - 26
src/native/misc_instr.c

@@ -15,6 +15,13 @@ void branch_not_taken();
 void writable_or_pagefault(int32_t, int32_t);
 
 
+static void write_reg8(int32_t index, int32_t value);
+static int32_t read_reg16(int32_t index);
+static void write_reg16(int32_t index, int32_t value);
+static int32_t read_reg32(int32_t index);
+static void write_reg32(int32_t index, int32_t value);
+
+
 int32_t getcf()
 {
     if(*flags_changed & 1)
@@ -106,9 +113,8 @@ void jmp_rel16(int32_t rel16)
     branch_taken();
 }
 
-void jmpcc8(bool condition)
+void jmpcc8(bool condition, int32_t imm8)
 {
-    int32_t imm8 = read_imm8s();
     if(condition)
     {
         *instruction_pointer += imm8;
@@ -120,10 +126,8 @@ void jmpcc8(bool condition)
     }
 }
 
-void jmpcc16(bool condition)
+void jmpcc16(bool condition, int32_t imm16)
 {
-    int32_t imm16 = read_imm16();
-
     if(condition)
     {
         jmp_rel16(imm16);
@@ -135,13 +139,11 @@ void jmpcc16(bool condition)
     }
 }
 
-void jmpcc32(bool condition)
+void jmpcc32(bool condition, int32_t imm32)
 {
-    int32_t op = read_imm32s();
-
     if(condition)
     {
-        *instruction_pointer += op;
+        *instruction_pointer += imm32;
         branch_taken();
     }
     else
@@ -150,21 +152,19 @@ void jmpcc32(bool condition)
     }
 }
 
-static void cmovcc16(bool condition)
+static void cmovcc16(bool condition, int32_t value, int32_t r)
 {
-    int32_t data = read_e16();
     if(condition)
     {
-        write_g16(data);
+        write_reg16(r, value);
     }
 }
 
-static void cmovcc32(bool condition)
+static void cmovcc32(bool condition, int32_t value, int32_t r)
 {
-    int32_t data = read_e32s();
     if(condition)
     {
-        write_g32(data);
+        write_reg32(r, value);
     }
 }
 
@@ -285,8 +285,12 @@ void pusha32()
     push32(reg32s[EDI]);
 }
 
-void setcc(bool condition) {
-    set_e8(condition);
+void setcc_reg(bool condition, int32_t r) {
+    write_reg8(r, condition ? 1 : 0);
+}
+
+void setcc_mem(bool condition, int32_t addr) {
+    safe_write8(addr, condition ? 1 : 0);
 }
 
 int32_t fpu_load_status_word();
@@ -320,10 +324,10 @@ void fxsave(uint32_t addr)
     // implementation dependent.
     for(int32_t i = 0; i < 8; i++)
     {
-        safe_write32(addr + 160 + (i << 4) +  0, reg_xmm32s[i << 2 | 0]);
-        safe_write32(addr + 160 + (i << 4) +  4, reg_xmm32s[i << 2 | 1]);
-        safe_write32(addr + 160 + (i << 4) +  8, reg_xmm32s[i << 2 | 2]);
-        safe_write32(addr + 160 + (i << 4) + 12, reg_xmm32s[i << 2 | 3]);
+        safe_write32(addr + 160 + (i << 4) +  0, reg_xmm[i].u32[0]);
+        safe_write32(addr + 160 + (i << 4) +  4, reg_xmm[i].u32[1]);
+        safe_write32(addr + 160 + (i << 4) +  8, reg_xmm[i].u32[2]);
+        safe_write32(addr + 160 + (i << 4) + 12, reg_xmm[i].u32[3]);
     }
 }
 
@@ -336,7 +340,7 @@ void fxrstor(uint32_t addr)
 
     if(new_mxcsr & ~MXCSR_MASK)
     {
-        //dbg_log("Invalid mxcsr bits: " + h((new_mxcsr & ~MXCSR_MASK) >>> 0, 8));
+        dbg_log("#gp Invalid mxcsr bits");
         trigger_gp(0);
     }
 
@@ -358,10 +362,44 @@ void fxrstor(uint32_t addr)
 
     for(int32_t i = 0; i < 8; i++)
     {
-        reg_xmm32s[i << 2 | 0] = safe_read32s(addr + 160 + (i << 4) +  0);
-        reg_xmm32s[i << 2 | 1] = safe_read32s(addr + 160 + (i << 4) +  4);
-        reg_xmm32s[i << 2 | 2] = safe_read32s(addr + 160 + (i << 4) +  8);
-        reg_xmm32s[i << 2 | 3] = safe_read32s(addr + 160 + (i << 4) + 12);
+        reg_xmm[i].u32[0] = safe_read32s(addr + 160 + (i << 4) +  0);
+        reg_xmm[i].u32[1] = safe_read32s(addr + 160 + (i << 4) +  4);
+        reg_xmm[i].u32[2] = safe_read32s(addr + 160 + (i << 4) +  8);
+        reg_xmm[i].u32[3] = safe_read32s(addr + 160 + (i << 4) + 12);
     }
 }
 
+int32_t xchg8(int32_t data, int32_t r8)
+{
+    int32_t tmp = reg8[r8];
+    reg8[r8] = data;
+    return tmp;
+}
+
+int32_t xchg16(int32_t data, int32_t r16)
+{
+    int32_t tmp = reg16[r16];
+    reg16[r16] = data;
+    return tmp;
+}
+
+void xchg16r(int32_t r16)
+{
+    int32_t tmp = reg16[AX];
+    reg16[AX] = reg16[r16];
+    reg16[r16] = tmp;
+}
+
+int32_t xchg32(int32_t data, int32_t r32)
+{
+    int32_t tmp = reg32s[r32];
+    reg32s[r32] = data;
+    return tmp;
+}
+
+void xchg32r(int32_t r32)
+{
+    int32_t tmp = reg32s[EAX];
+    reg32s[EAX] = reg32s[r32];
+    reg32s[r32] = tmp;
+}

+ 7 - 0
src/native/modrm.c

@@ -31,7 +31,9 @@ static int32_t resolve_sib(bool);
     MODRM_ENTRY(0x40 | row, seg(((value) + read_imm8s() & 0xFFFF)))\
     MODRM_ENTRY(0x80 | row, seg(((value) + read_imm16() & 0xFFFF)))\
 
+extern int32_t resolve_modrm16(int32_t modrm_byte);
 
+/*
 static int32_t resolve_modrm16(int32_t modrm_byte)
 {
     switch(modrm_byte)
@@ -56,6 +58,7 @@ static int32_t resolve_modrm16(int32_t modrm_byte)
 
     return 0;
 }
+//*/
 
 #undef MODRM_ENTRY16
 
@@ -64,6 +67,9 @@ static int32_t resolve_modrm16(int32_t modrm_byte)
     MODRM_ENTRY(0x40 | row, seg((value) + read_imm8s()))\
     MODRM_ENTRY(0x80 | row, seg((value) + read_imm32s()))\
 
+extern int32_t resolve_modrm32(int32_t modrm_byte);
+
+/*
 static int32_t resolve_modrm32(int32_t modrm_byte)
 {
     switch(modrm_byte)
@@ -90,6 +96,7 @@ static int32_t resolve_modrm32(int32_t modrm_byte)
 
     return 0;
 }
+//*/
 
 #undef MODRM_ENTRY32
 #undef MODRM_ENTRY

+ 67 - 0
src/native/profiler/profiler.c

@@ -0,0 +1,67 @@
+#include <stdio.h>
+#include <stdbool.h>
+#include "profiler.h"
+
+#if ENABLE_PROFILER
+
+double profiler_init_time = 0;
+
+void profiler_init()
+{
+    profiler_init_time = get_time();
+    for(uint32_t i = 0; i < PROFILER_NAME_COUNT; i++)
+    {
+        struct profiler_data *entry = &profiler_arr[i];
+        entry->total = 0;
+        entry->current_start = 0;
+        entry->capturing = false;
+    }
+}
+
+void profiler_start(enum profile_name name)
+{
+    struct profiler_data *entry = &profiler_arr[name];
+    assert(!entry->capturing);
+
+    entry->current_start = get_time();
+    entry->capturing = true;
+}
+
+void profiler_end(enum profile_name name)
+{
+    struct profiler_data *entry = &profiler_arr[name];
+    if(entry->capturing)
+    {
+        entry->total += get_time() - entry->current_start;
+        entry->current_start = 0;
+        entry->capturing = false;
+    }
+}
+
+void profiler_print()
+{
+    double init_elapsed = get_time() - profiler_init_time;
+    printf("Elapsed: %d\n", (int32_t) init_elapsed);
+    for(uint32_t i = 0; i < PROFILER_NAME_COUNT; i++)
+    {
+        double cur_total = profiler_arr[i].total;
+        printf(
+            "\nIndex:\t%d"
+            "\nTotal:\t%d"
+            "\nPercentage:\t%d%%\n",
+            i,
+            (int32_t) cur_total,
+            (int32_t) (100 * cur_total / init_elapsed)
+        );
+    }
+}
+
+#else
+// Disable profiler
+
+void profiler_init() {}
+void profiler_start(enum profile_name name) {}
+void profiler_end(enum profile_name name) {}
+void profiler_print() {}
+
+#endif

+ 29 - 0
src/native/profiler/profiler.h

@@ -0,0 +1,29 @@
+#include <stdbool.h>
+#ifndef _PROFILER_H
+#define _PROFILER_H
+
+struct profiler_data {
+    double total;
+    double current_start;
+    bool capturing;
+} profiler_arr[4] = {{0, 0, false}};
+
+enum profile_name {
+    P_IDLE,
+    P_GEN_INSTR,
+    P_DO_MANY_CYCLES,
+    P_RUN_FROM_CACHE
+};
+
+#define PROFILER_NAME_COUNT 4
+
+
+void profiler_init();
+void profiler_start(enum profile_name name);
+void profiler_end(enum profile_name name);
+void profiler_print();
+
+// JS import
+double get_time();
+
+#endif

+ 49 - 0
src/native/shared.h

@@ -0,0 +1,49 @@
+#ifndef _SHARED_H
+#define _SHARED_H
+
+#include <stdint.h>
+#include "const.h"
+
+union reg128 {
+    uint8_t   u8[16];
+    uint16_t  u16[8];
+    uint32_t  u32[4];
+    uint64_t  u64[2];
+};
+typedef char assert_size_reg128[(sizeof(union reg128) == 16) * 2 - 1];
+
+union reg64 {
+    int8_t   i8[8];
+    int16_t  i16[4];
+    int32_t  i32[2];
+    uint8_t   u8[8];
+    uint16_t  u16[4];
+    uint32_t  u32[2];
+    uint64_t  u64[1];
+    double   f64[1];
+};
+typedef char assertion_size_reg64[(sizeof(union reg64) == 8) * 2 - 1];
+
+struct code_cache {
+    // Address of the start of the basic block
+    uint32_t start_addr;
+    // Address of the instruction immediately after the basic block ends
+    uint32_t end_addr;
+    int32_t opcode[100];
+    int32_t len;
+    // Cleanliness status of the entry's "group" (based on
+    // DIRTY_ARR_SHIFT). Value only has meaning in relation with the
+    // group_dirtiness value.
+    uint32_t group_status;
+} jit_cache_arr[CACHE_LEN] = {{0, 0, {0}, 0, 0}};
+
+// Flag indicating whether the instruction that just ran was a jump of some sort
+uint32_t jit_jump = 0;
+
+// Count of how many times prime_hash(address) has been called through a jump
+int32_t hot_code_addresses[HASH_PRIME] = {0};
+// An array indicating the current "initial group status" for entries that map
+// to the same group due to the shift
+uint32_t group_dirtiness[1 + (0xffffffff >> DIRTY_ARR_SHIFT)] = {0};
+
+#endif

+ 49 - 48
src/native/string.c

@@ -1,3 +1,5 @@
+#include <stdint.h>
+
 #define MAX_COUNT_PER_CYCLE 0x1000
 #define MIN(x, y) (x < y ? x : y)
 
@@ -29,7 +31,8 @@ void movsb_rep()
     int32_t dest = get_seg(ES) + get_reg_asize(EDI);
     int32_t size = *flags & FLAG_DIRECTION ? -1 : 1;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -48,7 +51,7 @@ void movsb_rep()
         cont = --count != 0;
     }
     while(cont && cycle_counter--);
-    int32_t diff = size * (start_count - count) | 0;
+    int32_t diff = size * (start_count - count);
     add_reg_asize(EDI, diff);
     add_reg_asize(ESI, diff);
     set_ecx_asize(count);
@@ -57,7 +60,6 @@ void movsb_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void movsb_no_rep()
@@ -90,7 +92,8 @@ void movsw_rep()
     int32_t dest = get_seg(ES) + get_reg_asize(EDI);
     int32_t size = *flags & FLAG_DIRECTION ? -2 : 2;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -112,7 +115,7 @@ void movsw_rep()
             cont = --count != 0;
         }
         while(cont && cycle_counter--);
-        int32_t diff = size * (start_count - count) | 0;
+        int32_t diff = size * (start_count - count);
         add_reg_asize(EDI, diff);
         add_reg_asize(ESI, diff);
         set_ecx_asize(count);
@@ -135,7 +138,6 @@ void movsw_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void movsw_no_rep()
@@ -168,7 +170,8 @@ void movsd_rep()
     int32_t dest = get_seg(ES) + get_reg_asize(EDI);
     int32_t size = *flags & FLAG_DIRECTION ? -4 : 4;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -190,7 +193,7 @@ void movsd_rep()
             cont = --count != 0;
         }
         while(cont && cycle_counter--);
-        int32_t diff = size * (start_count - count) | 0;
+        int32_t diff = size * (start_count - count);
         add_reg_asize(EDI, diff);
         add_reg_asize(ESI, diff);
         set_ecx_asize(count);
@@ -213,7 +216,6 @@ void movsd_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void movsd_no_rep()
@@ -247,7 +249,8 @@ void cmpsb_rep()
     int32_t data_src, data_dest;
     int32_t size = *flags & FLAG_DIRECTION ? -1 : 1;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -279,7 +282,6 @@ void cmpsb_rep()
     }
 
     cmp8(data_src, data_dest);
-    diverged();
 }
 
 void cmpsb_no_rep()
@@ -317,7 +319,8 @@ void cmpsw_rep()
     int32_t data_src, data_dest;
     int32_t size = *flags & FLAG_DIRECTION ? -2 : 2;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -341,7 +344,7 @@ void cmpsw_rep()
             cont = --count != 0 && (data_src == data_dest) == is_repz;
         }
         while(cont && cycle_counter--);
-        int32_t diff = size * (start_count - count) | 0;
+        int32_t diff = size * (start_count - count);
         add_reg_asize(EDI, diff);
         add_reg_asize(ESI, diff);
         set_ecx_asize(count);
@@ -367,7 +370,6 @@ void cmpsw_rep()
     }
 
     cmp16(data_src, data_dest);
-    diverged();
 }
 
 void cmpsw_no_rep()
@@ -405,7 +407,8 @@ void cmpsd_rep()
     int32_t data_src, data_dest;
     int32_t size = *flags & FLAG_DIRECTION ? -4 : 4;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -429,7 +432,7 @@ void cmpsd_rep()
             cont = --count != 0 && (data_src == data_dest) == is_repz;
         }
         while(cont && cycle_counter--);
-        int32_t diff = size * (start_count - count) | 0;
+        int32_t diff = size * (start_count - count);
         add_reg_asize(EDI, diff);
         add_reg_asize(ESI, diff);
         set_ecx_asize(count);
@@ -455,7 +458,6 @@ void cmpsd_rep()
     }
 
     cmp32(data_src, data_dest);
-    diverged();
 }
 
 void cmpsd_no_rep()
@@ -492,7 +494,8 @@ void stosb_rep()
     int32_t dest = get_seg(ES) + get_reg_asize(EDI);
     int32_t size = *flags & FLAG_DIRECTION ? -1 : 1;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -517,7 +520,6 @@ void stosb_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void stosb_no_rep()
@@ -549,7 +551,8 @@ void stosw_rep()
     int32_t dest = get_seg(ES) + get_reg_asize(EDI);
     int32_t size = *flags & FLAG_DIRECTION ? -2 : 2;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -589,7 +592,6 @@ void stosw_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void stosw_no_rep()
@@ -621,7 +623,8 @@ void stosd_rep()
     int32_t dest = get_seg(ES) + get_reg_asize(EDI);
     int32_t size = *flags & FLAG_DIRECTION ? -4 : 4;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -661,7 +664,6 @@ void stosd_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void stosd_no_rep()
@@ -691,7 +693,8 @@ void lodsb_rep()
 {
     int32_t src = get_seg_prefix(DS) + get_reg_asize(ESI);
     int32_t size = *flags & FLAG_DIRECTION ? -1 : 1;
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -716,7 +719,6 @@ void lodsb_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void lodsb_no_rep()
@@ -746,7 +748,8 @@ void lodsw_rep()
     int32_t src = get_seg_prefix(DS) + get_reg_asize(ESI);
     int32_t size = *flags & FLAG_DIRECTION ? -2 : 2;
 
-    uint32_t count = ((uint32_t) get_reg_asize(ECX)) >> 0;
+    uint32_t count = ((uint32_t) get_reg_asize(ECX));
+    diverged();
     if(count == 0) return;
     bool cont = false;
     uint32_t cycle_counter = MAX_COUNT_PER_CYCLE;
@@ -762,7 +765,6 @@ void lodsw_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void lodsw_no_rep()
@@ -793,7 +795,8 @@ void lodsd_rep()
     int32_t src = get_seg_prefix(DS) + get_reg_asize(ESI);
     int32_t size = *flags & FLAG_DIRECTION ? -4 : 4;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t cycle_counter = MAX_COUNT_PER_CYCLE;
@@ -809,7 +812,6 @@ void lodsd_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void lodsd_no_rep()
@@ -841,7 +843,8 @@ void scasb_rep()
     int32_t data_dest;
     int32_t data_src = reg8[AL];
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -868,7 +871,6 @@ void scasb_rep()
         *instruction_pointer = *previous_ip;
     }
     cmp8(data_src, data_dest);
-    diverged();
 }
 
 void scasb_no_rep()
@@ -903,7 +905,8 @@ void scasw_rep()
     int32_t data_dest;
     int32_t data_src = reg16[AL];
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -945,7 +948,6 @@ void scasw_rep()
         *instruction_pointer = *previous_ip;
     }
     cmp16(data_src, data_dest);
-    diverged();
 }
 
 void scasw_no_rep()
@@ -980,7 +982,8 @@ void scasd_rep()
     int32_t data_dest;
     int32_t data_src = reg32s[EAX];
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -1022,7 +1025,6 @@ void scasd_rep()
         *instruction_pointer = *previous_ip;
     }
     cmp32(data_src, data_dest);
-    diverged();
 }
 
 void scasd_no_rep()
@@ -1059,7 +1061,8 @@ void insb_rep()
     int32_t dest = get_seg(ES) + get_reg_asize(EDI);
     int32_t size = *flags & FLAG_DIRECTION ? -1 : 1;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -1084,7 +1087,6 @@ void insb_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void insb_no_rep()
@@ -1121,7 +1123,8 @@ void insw_rep()
     int32_t dest = get_seg(ES) + get_reg_asize(EDI);
     int32_t size = *flags & FLAG_DIRECTION ? -2 : 2;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -1161,7 +1164,6 @@ void insw_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void insw_no_rep()
@@ -1198,7 +1200,8 @@ void insd_rep()
     int32_t dest = get_seg(ES) + get_reg_asize(EDI);
     int32_t size = *flags & FLAG_DIRECTION ? -4 : 4;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -1238,7 +1241,6 @@ void insd_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void insd_no_rep()
@@ -1275,7 +1277,8 @@ void outsb_rep()
     int32_t src = get_seg_prefix(DS) + get_reg_asize(ESI);
     int32_t size = *flags & FLAG_DIRECTION ? -1 : 1;
 
-        int32_t count = get_reg_asize(ECX) >> 0;
+        int32_t count = get_reg_asize(ECX);
+    diverged();
         if(count == 0) return;
         int32_t cont = false;
         int32_t start_count = count;
@@ -1300,7 +1303,6 @@ void outsb_rep()
         {
             *instruction_pointer = *previous_ip;
         }
-    diverged();
     }
 
 void outsb_no_rep()
@@ -1336,7 +1338,8 @@ void outsw_rep()
     int32_t src = get_seg_prefix(DS) + get_reg_asize(ESI);
     int32_t size = *flags & FLAG_DIRECTION ? -2 : 2;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -1376,7 +1379,6 @@ void outsw_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void outsw_no_rep()
@@ -1412,7 +1414,8 @@ void outsd_rep()
     int32_t src = get_seg_prefix(DS) + get_reg_asize(ESI);
     int32_t size = *flags & FLAG_DIRECTION ? -4 : 4;
 
-    int32_t count = get_reg_asize(ECX) >> 0;
+    int32_t count = get_reg_asize(ECX);
+    diverged();
     if(count == 0) return;
     int32_t cont = false;
     int32_t start_count = count;
@@ -1452,7 +1455,6 @@ void outsd_rep()
     {
         *instruction_pointer = *previous_ip;
     }
-    diverged();
 }
 
 void outsd_no_rep()
@@ -1479,4 +1481,3 @@ void outsd()
         outsd_no_rep();
     }
 }
-

+ 12 - 2
src/ps2.js

@@ -1,5 +1,8 @@
 "use strict";
 
+/** @const */
+let PS2_LOG_VERBOSE = false;
+
 /**
  * @constructor
  * @param {CPU} cpu
@@ -270,7 +273,10 @@ PS2.prototype.send_mouse_packet = function(dx, dy)
     this.mouse_buffer.push(delta_x);
     this.mouse_buffer.push(delta_y);
 
-    dbg_log("adding mouse packets: " + [info_byte, dx, dy], LOG_PS2);
+    if(PS2_LOG_VERBOSE)
+    {
+        dbg_log("adding mouse packets: " + [info_byte, dx, dy], LOG_PS2);
+    }
 
     this.mouse_irq();
 };
@@ -320,7 +326,11 @@ PS2.prototype.port60_read = function()
     {
         this.cpu.device_lower_irq(12);
         this.last_port60_byte = this.mouse_buffer.shift();
-        dbg_log("Port 60 read (mouse): " + h(this.last_port60_byte), LOG_PS2);
+
+        if(PS2_LOG_VERBOSE)
+        {
+            dbg_log("Port 60 read (mouse): " + h(this.last_port60_byte), LOG_PS2);
+        }
 
         if(this.mouse_buffer.length >= 1)
         {

+ 144 - 0
tests/codegen.js

@@ -0,0 +1,144 @@
+const fs = require('fs');
+const { TextDecoder } = require('util');
+
+//XXX: use a non-hacky method maybe
+const interfaceCode = fs.readFileSync(__dirname + '/../src/codegen.js', 'utf8');
+eval(interfaceCode);
+
+console.assert(typeof Codegen === "function");
+
+const codegenModuleBuffer = fs.readFileSync(__dirname + '/../build/codegen-test.wasm');
+
+const dec = new TextDecoder('utf-8');
+
+const vals = {
+    imm8: 1,
+    imm8s: 1,
+    imm16: 2,
+    imm32s: 3,
+    asize_32: false,
+    reg16: 0,
+    reg32s: 0,
+    instruction_pointer: 0,
+    previous_ip: 0,
+};
+
+load_wasm(codegenModuleBuffer, {
+        env: {
+            _read_imm8() { return vals.imm8; },
+            _read_imm8s() { return vals.imm8s; },
+            _read_imm16() { return vals.imm16; },
+            _read_imm32s() { return vals.imm32s; },
+            _is_asize_32() { return vals.asize_32; },
+
+            // static pointer imports
+            g$_reg16() { return vals.reg16; },
+            g$_reg32s() { return vals.reg32s; },
+            g$_instruction_pointer() { return vals.instruction_pointer; },
+            g$_previous_ip() { return vals.previous_ip; },
+        }
+    })
+    .then(function(wm) {
+        return new Codegen(wm);
+    })
+    .then(test);
+
+function test(gen)
+{
+    gen.reset();
+    gen.finish();
+
+    let buf = gen.get_module_code();
+
+    gen.reset();
+    gen.fn0('fn0');
+    gen.fn1('fn1', 0);
+    gen.fn2('fn2', 0, 1);
+    gen.increment_instruction_pointer(10);
+    gen.set_previous_eip();
+    gen.finish();
+
+    buf = gen.get_module_code();
+    fs.writeFileSync(__dirname + '/../build/myjit.wasm', buf);
+
+    const module = new WebAssembly.Module(buf);
+
+    const expected = [
+        ['fn0'],
+        ['fn1', 0],
+        ['fn2', 0, 1],
+    ];
+
+    const store = [];
+
+    const imports = {
+        e: {
+            fn0() { store.push(['fn0']); },
+            fn1(arg0) { store.push(['fn1', arg0]); },
+            fn2(arg0, arg1) { store.push(['fn2', arg0, arg1]); },
+            get_seg_prefix_ds() {},
+            get_seg_prefix_ss() {},
+            get_seg_prefix() {},
+            m: new WebAssembly.Memory({ initial: 256 * 1024 * 1024 / 64 / 1024 }),
+        },
+    };
+    const o = new WebAssembly.Instance(module, imports);
+    o.exports.f();
+    const view = new Uint32Array(imports.e.m.buffer);
+    console.assert(view[vals.instruction_pointer] === 10);
+    console.assert(view[vals.previous_ip] === 10);
+    if (JSON.stringify(store) === JSON.stringify(expected))
+    {
+        console.log('Test passed');
+    }
+    else
+    {
+        console.error('Test failed');
+        console.log('Expected:', expected);
+        console.log('Got:', store);
+    }
+}
+
+function load_wasm(buffer, imports, cb)
+{
+    if (!imports) {
+        imports = {};
+    }
+
+    // XXX: These should not be fixed in M
+    const STATIC_MEMORY_BASE = 256 - 32;
+    const WASM_MEMORY_SIZE = 256;
+
+    return WebAssembly.compile(buffer)
+        .then(module => {
+            if (!imports['env']) {
+                imports['env'] = {};
+            }
+            imports['env']['___assert_fail'] = (a, b, c, d) => {
+                console.error('Assertion Failed', a, b, c, d);
+                dbg_assert(false);
+            };
+            imports['env']['memoryBase'] = STATIC_MEMORY_BASE * 1024 * 1024;
+            imports['env']['tableBase'] = 0;
+            imports['env']['memory'] = new WebAssembly.Memory({ ['initial']: WASM_MEMORY_SIZE * 1024 * 1024 / 64 / 1024, });
+            imports['env']['table'] = new WebAssembly.Table({ ['initial']: 18, ['element']: 'anyfunc' });
+            return WebAssembly.instantiate(module, imports).then(instance => ({ instance, module }));
+        })
+        .then(({ instance, module }) => {
+            const ret = {
+                mem: imports['env']['memory'],
+                funcs: instance['exports'],
+                instance,
+                imports,
+            };
+            if (typeof cb === "function")
+            {
+                cb(ret);
+            }
+            else
+            {
+                return ret;
+            }
+        });
+}
+

+ 13 - 3
tests/full/run.js

@@ -283,6 +283,7 @@ function run_test(test, done)
         bios: { url: bios },
         vga_bios: { url: vga_bios },
         autostart: true,
+        memory_size: 128 * 1024 * 1024,
     };
 
     if(test.cdrom)
@@ -295,7 +296,7 @@ function run_test(test, done)
     }
     if(test.hda)
     {
-        settings.hda = { url: test.hda };
+        settings.hda = { url: test.hda, async: true, };
     }
 
     if(test.expected_texts)
@@ -431,10 +432,19 @@ function run_test(test, done)
         if(on_text.length)
         {
             let expected = on_text[0].text;
+
             if(x < expected.length && bytearray_starts_with(line, expected))
             {
                 var action = on_text.shift();
-                emulator.keyboard_send_text(action.run);
+
+                if(action.after)
+                {
+                    setTimeout(() => emulator.keyboard_send_text(action.run), action.after);
+                }
+                else
+                {
+                    emulator.keyboard_send_text(action.run);
+                }
             }
         }
     });
@@ -443,7 +453,7 @@ function run_test(test, done)
     {
         if(action.on_text)
         {
-            on_text.push({ text: string_to_bytearray(action.on_text), run: action.run, });
+            on_text.push({ text: string_to_bytearray(action.on_text), run: action.run, after: action.after, });
         }
     });
 }

+ 11 - 0
tests/jit-paging/Makefile

@@ -0,0 +1,11 @@
+CC=gcc
+CC_I386=$(CC) -m32
+CFLAGS=-Wall -g -fno-strict-aliasing -fPIC -static
+LDFLAGS=
+
+
+test-jit: test-jit.c
+	$(CC_I386) $(CFLAGS) $(LDFLAGS) -o $@ $(<D)/test-jit.c
+
+clean:
+	rm -f test-jit

+ 11 - 0
tests/jit-paging/README.md

@@ -0,0 +1,11 @@
+# About
+
+These tests map 2 adjacent pages to the exact same physical frame. Code is
+written to one page and executed from the other, then overwritten and executed
+again, in order to trigger cache activity. Unlike `/tests/jit/`, this folder is
+meant to test the JIT in protected mode with paging setup, not in real-mode.
+
+# Run
+
+- Obtain the `linux3.iso` image (see [Readme.md](../../Readme.md))
+- Run `make jitpagingtests` in the root of the project

+ 75 - 0
tests/jit-paging/run.js

@@ -0,0 +1,75 @@
+#!/usr/bin/env node
+"use strict";
+
+var V86 = require("../../build/libv86.js").V86;
+var fs = require("fs");
+
+var test_executable = new Uint8Array(fs.readFileSync(__dirname + "/test-jit"));
+
+var emulator = new V86({
+    bios: { url: __dirname + "/../../bios/seabios.bin" },
+    vga_bios: { url: __dirname + "/../../bios/vgabios.bin" },
+    cdrom: { url: __dirname + "/../../images/linux3.iso" },
+    autostart: true,
+    memory_size: 32 * 1024 * 1024,
+    filesystem: {},
+    log_level: 0
+});
+
+emulator.bus.register("emulator-started", function()
+{
+    console.error("Booting now, please stand by");
+    emulator.create_file("test-jit", test_executable);
+});
+
+var ran_command = false;
+var line = "";
+
+emulator.add_listener("serial0-output-char", function(chr)
+{
+    if(chr < " " && chr !== "\n" && chr !== "\t" || chr > "~")
+    {
+        return;
+    }
+
+    if(chr === "\n")
+    {
+        var new_line = line;
+        console.error("Serial: %s", line);
+        line = "";
+    }
+    else
+    {
+        line += chr;
+    }
+
+    if(!ran_command && line.endsWith("~% "))
+    {
+        ran_command = true;
+        emulator.serial0_send("chmod +x /mnt/test-jit\n");
+        emulator.serial0_send("/mnt/test-jit > /mnt/result\n");
+        emulator.serial0_send("echo test fini''shed\n");
+    }
+
+    if(new_line && new_line.includes("test finished"))
+    {
+        console.error("Done. Reading result ...");
+
+        emulator.read_file("/result", function(err, data)
+            {
+                emulator.stop();
+                if(err) throw err;
+                let result = (new Buffer(data)).toString();
+                if(result !== 'test passed\n')
+                {
+                    console.error('[!] Error. Result was:', result);
+                    process.exit(1);
+                }
+                else
+                {
+                    console.log('[+] Test passed');
+                }
+            });
+    }
+
+});

+ 84 - 0
tests/jit-paging/test-jit.c

@@ -0,0 +1,84 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+#include <unistd.h>
+
+int fib(int n)
+{
+    int first = 0, second = 1, next = 0, i = 0;
+    while(i <= n)
+    {
+        if(i < 2)
+        {
+            next = i;
+        }
+        else
+        {
+            next = first + second;
+            first = second;
+            second = next;
+        }
+        i++;
+    }
+    return next;
+}
+
+int pass_test()
+{
+    return 0x42;
+}
+
+void fatal(char *msg)
+{
+    fprintf(stderr, "*** FATAL ERROR: %s\n", (msg ? msg : "no message"));
+    fflush(stderr);
+    abort();
+}
+
+int main()
+{
+    static char filename[] = "/tmp/DoubleMapXXXXXX";
+    int fd = mkstemp(filename);
+    if(fd == -1)
+    {
+        fatal("mkstemp");
+    }
+    if(ftruncate(fd, PAGE_SIZE) == -1)
+    {
+        fatal("ftruncate");
+    }
+
+    char *const write_addr = mmap(0, 2 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+    char *const exec_addr = mmap(write_addr+PAGE_SIZE, PAGE_SIZE,
+                                 PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_FIXED, fd, 0);
+
+    if(write_addr == MAP_FAILED || exec_addr == MAP_FAILED)
+    {
+        fatal("mmap");
+    }
+
+    size_t size = PAGE_SIZE;
+    memcpy(write_addr, fib, size);
+
+    int (*fun_pointer)() = (void*)exec_addr;
+
+    // Give the JIT something to potentially cache
+    for(int i = 0; i < 15000; i++)
+    {
+        if(fun_pointer(20) != 6765)
+        {
+            fatal("fibonacci");
+        }
+    }
+
+    memcpy(write_addr, pass_test, size);
+    if(fun_pointer() == 0x42)
+    {
+        printf("test passed\n");
+    }
+
+    munmap(write_addr, size);
+    return 0;
+}

+ 3 - 1
tests/nasm/Makefile

@@ -1,13 +1,14 @@
 source_files := $(wildcard build/*.asm)
 source_files += $(addprefix build/,$(wildcard *.asm))
 
+obj_files := $(patsubst %.asm,%.o,$(source_files))
 host_executables := $(patsubst %.asm,%.bin,$(source_files))
 v86_executables := $(patsubst %.asm,%.img,$(source_files))
 host_fixtures := $(patsubst %.asm,%.fixture,$(source_files))
 
 inc_files := $(addprefix build/,$(wildcard *.inc))
 
-all: $(source_files) $(host_executables) $(v86_executables) $(host_fixtures)
+all: $(source_files) $(obj_files) $(inc_files) $(host_executables) $(v86_executables) $(host_fixtures)
 .PHONY: all
 
 build/%.o: build/%.asm $(inc_files)
@@ -31,6 +32,7 @@ build/%.asm: %.asm
 build/%.inc: %.inc
 	mkdir -p build; cp $< $@
 
+.PHONY: clean
 clean:
 	rm -f *.o *.bin *.img *.fixture gen_*.asm # old location
 	rm -f build/*.o build/*.bin build/*.img build/*.fixture build/*.asm

File diff suppressed because it is too large
+ 89 - 946
tests/nasm/create_tests.js


+ 18 - 16
tests/nasm/gdbauto

@@ -1,5 +1,7 @@
 # Invocation: gdb -x gdbauto xyz.bin
 
+set $STACK_TOP=0x120000
+
 break loop
 run
 
@@ -67,22 +69,22 @@ printf "    %d,\n", $xmm7.v4_int32[2]
 printf "    %d,\n", $xmm7.v4_int32[3]
 printf "    \n"
 
-printf "    %d,\n", *(int*)(0x120000-64)
-printf "    %d,\n", *(int*)(0x120000-60)
-printf "    %d,\n", *(int*)(0x120000-56)
-printf "    %d,\n", *(int*)(0x120000-52)
-printf "    %d,\n", *(int*)(0x120000-48)
-printf "    %d,\n", *(int*)(0x120000-44)
-printf "    %d,\n", *(int*)(0x120000-40)
-printf "    %d,\n", *(int*)(0x120000-36)
-printf "    %d,\n", *(int*)(0x120000-32)
-printf "    %d,\n", *(int*)(0x120000-28)
-printf "    %d,\n", *(int*)(0x120000-24)
-printf "    %d,\n", *(int*)(0x120000-20)
-printf "    %d,\n", *(int*)(0x120000-16)
-printf "    %d,\n", *(int*)(0x120000-12)
-printf "    %d,\n", *(int*)(0x120000-8)
-printf "    %d,\n", *(int*)(0x120000-4)
+printf "    %d,\n", *(int*)($STACK_TOP-64)
+printf "    %d,\n", *(int*)($STACK_TOP-60)
+printf "    %d,\n", *(int*)($STACK_TOP-56)
+printf "    %d,\n", *(int*)($STACK_TOP-52)
+printf "    %d,\n", *(int*)($STACK_TOP-48)
+printf "    %d,\n", *(int*)($STACK_TOP-44)
+printf "    %d,\n", *(int*)($STACK_TOP-40)
+printf "    %d,\n", *(int*)($STACK_TOP-36)
+printf "    %d,\n", *(int*)($STACK_TOP-32)
+printf "    %d,\n", *(int*)($STACK_TOP-28)
+printf "    %d,\n", *(int*)($STACK_TOP-24)
+printf "    %d,\n", *(int*)($STACK_TOP-20)
+printf "    %d,\n", *(int*)($STACK_TOP-16)
+printf "    %d,\n", *(int*)($STACK_TOP-12)
+printf "    %d,\n", *(int*)($STACK_TOP-8)
+printf "    %d,\n", *(int*)($STACK_TOP-4)
 printf "    \n"
 
 printf "    %d\n", $eflags

+ 1 - 1
tests/nasm/header.inc

@@ -10,7 +10,7 @@ align 4
     dd CHECKSUM
 
 section .bss
-    resb 128*1024
+    resb 128*1024   ; 0x20000
 
 stack_top:
 

+ 21 - 0
tests/nasm/prand.js

@@ -0,0 +1,21 @@
+"use strict";
+const assert = require("assert");
+
+/**
+ * Creates a pseudo-random value generator. The seed must be an integer.
+ */
+function Random(seed) {
+    assert.equal(typeof seed, "number");
+    this._seed = seed % 2147483647;
+    if (this._seed <= 0) this._seed += 2147483646;
+}
+
+/**
+ * Returns a 32-bit pseudo-random value.
+ */
+Random.prototype.next = function () {
+    this._seed = (this._seed * 16807) & 0xffffffff;
+    return (this._seed - 1) | 0;
+};
+
+module.exports = Random;

+ 66 - 11
tests/nasm/run.js

@@ -2,8 +2,8 @@
 'use strict';
 
 // Mapping between signals and x86 exceptions:
-// "Program received signal SIGILL, Illegal instruction." -> #UD
-// "Program received signal SIGFPE, Arithmetic exception." -> #GP
+// "Program received signal SIGILL, Illegal instruction." -> #UD (6)
+// "Program received signal SIGFPE, Arithmetic exception." -> #DE (0)
 // to be determined -> #GP
 // to be determined -> #NM
 // to be determined -> #TS
@@ -26,7 +26,7 @@ const TERMINATE_MSG = 'DONE';
 const MASK_ARITH = 1 | 1 << 2 | 1 << 4 | 1 << 6 | 1 << 7 | 1 << 11;
 
 try {
-    var V86 = require('../../build/libv86.js').V86Starter;
+    var V86 = require('../../build/libv86-debug.js').V86Starter;
 }
 catch(e) {
     console.error('Failed to import build/libv86.js. Run ' +
@@ -61,9 +61,10 @@ function h(n, len)
     return "0x" + pad0(str.toUpperCase(), len || 1);
 }
 
-if (cluster.isMaster) {
-
-    function extract_json(name, fixture_text) {
+if(cluster.isMaster)
+{
+    function extract_json(name, fixture_text)
+    {
         if(fixture_text.includes("SIGFPE, Arithmetic exception"))
         {
             return { exception: "DE", };
@@ -150,7 +151,8 @@ if (cluster.isMaster) {
     let failed_tests = [];
     let finished_workers = 0;
 
-    for (let i = 0; i < nr_of_cpus; i++) {
+    for(let i = 0; i < nr_of_cpus; i++)
+    {
         let worker = cluster.fork();
 
         worker.on('message', function(message) {
@@ -188,10 +190,16 @@ if (cluster.isMaster) {
 
                 console.error('\n[-] %s:', test_failure.img_name);
 
-                test_failure.failures.forEach(function(individual_failure) {
-                    console.error("\n\t" + individual_failure.name);
-                    console.error("\tActual: 0x" + (individual_failure.actual >>> 0).toString(16));
-                    console.error("\tExpected: 0x" + (individual_failure.expected >>> 0).toString(16));
+                test_failure.failures.forEach(function(failure) {
+                    function format_value(v) {
+                        if(typeof v === "number")
+                            return "0x" + (v >>> 0).toString(16);
+                        else
+                            return String(v);
+                    }
+                    console.error("\n\t" + failure.name);
+                    console.error("\tActual:   " + format_value(failure.actual));
+                    console.error("\tExpected: " + format_value(failure.expected));
                 });
             });
             process.exit(1);
@@ -207,6 +215,7 @@ else {
             return;
         }
 
+        waiting_for_test = false;
         current_test = test;
         console.info('Testing', test.img_name);
 
@@ -222,10 +231,12 @@ else {
     let loaded = false;
     let current_test = undefined;
     let first_test = undefined;
+    let waiting_for_test = false;
 
     let emulator = new V86({
         autostart: false,
         memory_size: 2 * 1024 * 1024,
+        log_level: 0,
     });
 
     emulator.add_listener("emulator-loaded", function()
@@ -238,7 +249,51 @@ else {
             }
         });
 
+    emulator.cpu_exception_hook = function(n)
+    {
+        if(waiting_for_test)
+        {
+            return true;
+        }
+
+        waiting_for_test = true;
+        emulator.stop();
+
+        const exceptions = {
+            0: "DE",
+            6: "UD",
+        };
+
+        const exception = exceptions[n];
+
+        if(exception === undefined)
+        {
+            console.error("Unexpected CPU exception: " + n);
+            process.exit(1);
+        }
+
+        if(current_test.fixture.exception !== exception)
+        {
+            process.send({
+                failures: [{
+                    name: "Exception",
+                    actual: exception,
+                    expected: current_test.fixture.exception || "(none)",
+                }],
+                img_name: current_test.img_name,
+            });
+        }
+        else
+        {
+            process.send(DONE_MSG);
+        }
+
+        return true;
+    };
+
     emulator.bus.register('cpu-event-halt', function() {
+        console.assert(!waiting_for_test);
+        waiting_for_test = true;
         emulator.stop();
         var cpu = emulator.v86.cpu;
 

+ 1 - 1
tests/qemu/Makefile

@@ -2,7 +2,7 @@
 CC=gcc
 CC_I386=$(CC) -m32
 QEMU_INCLUDES += -I../..
-CFLAGS=-Wall -O2 -g -fno-strict-aliasing -static -mmmx -mno-sse
+CFLAGS=-Wall -O2 -g -fno-strict-aliasing -static -mmmx -msse
 LDFLAGS=
 
 

+ 67 - 11
tests/qemu/compiler.h

@@ -1,9 +1,15 @@
-/* public domain */
+/* compiler.h: macros to abstract away compiler specifics
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
 
 #ifndef COMPILER_H
 #define COMPILER_H
 
-#include "config-host.h"
+#if defined __clang_analyzer__ || defined __COVERITY__
+#define QEMU_STATIC_ANALYSIS 1
+#endif
 
 /*----------------------------------------------------------------------------
 | The macro QEMU_GNUC_PREREQ tests for minimum version of the GNU C compiler.
@@ -18,10 +24,14 @@
 
 #define QEMU_NORETURN __attribute__ ((__noreturn__))
 
-#if QEMU_GNUC_PREREQ(3, 4)
 #define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+
+#define QEMU_SENTINEL __attribute__((sentinel))
+
+#if QEMU_GNUC_PREREQ(4, 3)
+#define QEMU_ARTIFICIAL __attribute__((always_inline, artificial))
 #else
-#define QEMU_WARN_UNUSED_RESULT
+#define QEMU_ARTIFICIAL
 #endif
 
 #if defined(_WIN32)
@@ -30,19 +40,66 @@
 # define QEMU_PACKED __attribute__((packed))
 #endif
 
-#define cat(x,y) x ## y
-#define cat2(x,y) cat(x,y)
-#define QEMU_BUILD_BUG_ON(x) \
-    typedef char cat2(qemu_build_bug_on__,__LINE__)[(x)?-1:1];
+#define QEMU_ALIGNED(X) __attribute__((aligned(X)))
+
+#ifndef glue
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s)	tostring(s)
+#define tostring(s)	#s
+#endif
+
+#ifndef likely
+#if __GNUC__ < 3
+#define __builtin_expect(x, n) (x)
+#endif
+
+#define likely(x)   __builtin_expect(!!(x), 1)
+#define unlikely(x)   __builtin_expect(!!(x), 0)
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({                      \
+        const typeof(((type *) 0)->member) *__mptr = (ptr);     \
+        (type *) ((char *) __mptr - offsetof(type, member));})
+#endif
+
+/* Convert from a base type to a parent type, with compile time checking.  */
+#ifdef __GNUC__
+#define DO_UPCAST(type, field, dev) ( __extension__ ( { \
+    char __attribute__((unused)) offset_must_be_zero[ \
+        -offsetof(type, field)]; \
+    container_of(dev, type, field);}))
+#else
+#define DO_UPCAST(type, field, dev) container_of(dev, type, field)
+#endif
+
+#define typeof_field(type, field) typeof(((type *)0)->field)
+#define type_check(t1,t2) ((t1*)0 - (t2*)0)
+
+#define QEMU_BUILD_BUG_ON_STRUCT(x) \
+    struct { \
+        int:(x) ? -1 : 1; \
+    }
+
+#if defined(CONFIG_STATIC_ASSERT)
+#define QEMU_BUILD_BUG_ON(x) _Static_assert(!(x), "not expecting: " #x)
+#elif defined(__COUNTER__)
+#define QEMU_BUILD_BUG_ON(x) typedef QEMU_BUILD_BUG_ON_STRUCT(x) \
+    glue(qemu_build_bug_on__, __COUNTER__) __attribute__((unused))
+#else
+#define QEMU_BUILD_BUG_ON(x)
+#endif
+
+#define QEMU_BUILD_BUG_ON_ZERO(x) (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - \
+                                   sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)))
 
 #if defined __GNUC__
 # if !QEMU_GNUC_PREREQ(4, 4)
    /* gcc versions before 4.4.x don't support gnu_printf, so use printf. */
-#  define GCC_ATTR __attribute__((__unused__, format(printf, 1, 2)))
 #  define GCC_FMT_ATTR(n, m) __attribute__((format(printf, n, m)))
 # else
    /* Use gnu_printf when supported (qemu uses standard format strings). */
-#  define GCC_ATTR __attribute__((__unused__, format(gnu_printf, 1, 2)))
 #  define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
 #  if defined(_WIN32)
     /* Map __printf__ to __gnu_printf__ because we want standard format strings
@@ -51,7 +108,6 @@
 #  endif
 # endif
 #else
-#define GCC_ATTR /**/
 #define GCC_FMT_ATTR(n, m)
 #endif
 

+ 5 - 1
tests/qemu/run.js

@@ -56,7 +56,11 @@ emulator.add_listener("serial0-output-char", function(chr)
 
         emulator.read_file("/result", function(err, data)
             {
-                if(err) throw err;
+                if(err)
+                {
+                    console.error("Reading test result failed: " + err);
+                    process.exit(1);
+                }
                 console.error("Got result, writing to stdout");
                 process.stdout.write(new Buffer(data));
                 emulator.stop();

+ 232 - 32
tests/qemu/test-i386.c

@@ -28,6 +28,7 @@
 #include <errno.h>
 #include <sys/ucontext.h>
 #include <sys/mman.h>
+#include <sys/user.h>
 
 #if !defined(__x86_64__)
 //#define TEST_VM86
@@ -65,6 +66,7 @@
 #define glue(x, y) xglue(x, y)
 #define stringify(s)	tostring(s)
 #define tostring(s)	#s
+#define UNUSED(s)	(void)(s)
 
 #define CC_C   	0x0001
 #define CC_P 	0x0004
@@ -211,7 +213,7 @@ static inline long i2l(long v)
 #define TEST_LEA16(STR)\
 {\
     asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
-        : "=wq" (res)\
+        : "=r" (res)\
         : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
     printf("lea %s = %08lx\n", STR, res);\
 }
@@ -338,21 +340,21 @@ void test_lea(void)
     TEST_LEAQ("0x4000(%%rsi, %%rcx, 8)");
 #else
     /* limited 16 bit addressing test */
-    //TEST_LEA16("0x4000");
-    //TEST_LEA16("(%%bx)");
-    //TEST_LEA16("(%%si)");
-    //TEST_LEA16("(%%di)");
-    //TEST_LEA16("0x40(%%bx)");
-    //TEST_LEA16("0x40(%%si)");
-    //TEST_LEA16("0x40(%%di)");
-    //TEST_LEA16("0x4000(%%bx)");
-    //TEST_LEA16("0x4000(%%si)");
-    //TEST_LEA16("(%%bx,%%si)");
-    //TEST_LEA16("(%%bx,%%di)");
-    //TEST_LEA16("0x40(%%bx,%%si)");
-    //TEST_LEA16("0x40(%%bx,%%di)");
-    //TEST_LEA16("0x4000(%%bx,%%si)");
-    //TEST_LEA16("0x4000(%%bx,%%di)");
+    TEST_LEA16("0x4000");
+    TEST_LEA16("(%%bx)");
+    TEST_LEA16("(%%si)");
+    TEST_LEA16("(%%di)");
+    TEST_LEA16("0x40(%%bx)");
+    TEST_LEA16("0x40(%%si)");
+    TEST_LEA16("0x40(%%di)");
+    TEST_LEA16("0x4000(%%bx)");
+    TEST_LEA16("0x4000(%%si)");
+    TEST_LEA16("(%%bx,%%si)");
+    TEST_LEA16("(%%bx,%%di)");
+    TEST_LEA16("0x40(%%bx,%%si)");
+    TEST_LEA16("0x40(%%bx,%%di)");
+    TEST_LEA16("0x4000(%%bx,%%si)");
+    TEST_LEA16("0x4000(%%bx,%%di)");
 #endif
 }
 
@@ -1072,7 +1074,7 @@ void test_fbcd(double a)
 
 void test_fenv(void)
 {
-    struct QEMU_PACKED {
+    struct __attribute__((__packed__)) {
         uint16_t fpuc;
         uint16_t dummy1;
         uint16_t fpus;
@@ -1082,7 +1084,7 @@ void test_fenv(void)
         uint32_t ignored[4];
         long double fpregs[8];
     } float_env32;
-    struct QEMU_PACKED {
+    struct __attribute__((__packed__)) {
         uint16_t fpuc;
         uint16_t fpus;
         uint16_t fptag;
@@ -1267,17 +1269,14 @@ void test_bcd(void)
 #define TEST_CMPXCHG(op, size, opconst, eax)\
 {\
     long op0, op1, op2;\
-    long eflags;\
     op0 = i2l(0x12345678);\
     op1 = i2l(0xfbca7654);\
     op2 = i2l(eax);\
-    asm(#op " %" size "0, %" size "1\n" \
-        "pushf\n" \
-        "pop %2\n" \
-        : "=q" (op0), opconst (op1), "=g" (eflags) \
+    asm(#op " %" size "0, %" size "1" \
+        : "=q" (op0), opconst (op1) \
         : "0" (op0), "a" (op2));\
-    printf("%-10s EAX=" FMTLX " A=" FMTLX " C=" FMTLX " CC=%02lx\n",\
-           #op, op2, op0, op1, eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
+    printf("%-10s EAX=" FMTLX " A=" FMTLX " C=" FMTLX "\n",\
+           #op, op2, op0, op1);\
 }
 
 void test_xchg(void)
@@ -1432,7 +1431,7 @@ void test_segs(void)
     struct {
         uint32_t offset;
         uint16_t seg;
-    } QEMU_PACKED segoff;
+    } __attribute__((__packed__)) segoff;
 
     ldt.entry_number = 1;
     ldt.base_addr = (unsigned long)&seg_data1;
@@ -1877,7 +1876,7 @@ int tab[2];
 
 void sig_handler(int sig, siginfo_t *info, void *puc)
 {
-    struct ucontext *uc = puc;
+    ucontext_t *uc = puc;
 
     printf("si_signo=%d si_errno=%d si_code=%d",
            info->si_signo, info->si_errno, info->si_code);
@@ -1988,7 +1987,7 @@ void test_exceptions(void)
     printf("lock nop exception:\n");
     if (setjmp(jmp_env) == 0) {
         /* now execute an invalid instruction */
-        asm volatile(".byte 0xf0, 0x90"); /* lock nop */
+        asm volatile(".byte 0xf0, 0x90");
     }
 #endif
 
@@ -2076,7 +2075,7 @@ void test_exceptions(void)
 /* specific precise single step test */
 void sig_trap_handler(int sig, siginfo_t *info, void *puc)
 {
-    struct ucontext *uc = puc;
+    ucontext_t *uc = puc;
     printf("EIP=" FMTLX "\n", (long)uc->uc_mcontext.gregs[REG_EIP]);
 }
 
@@ -2958,10 +2957,210 @@ void test_conv(void)
 #endif
 }
 
+
+void fatal(char *msg)
+{
+    fprintf(stderr, "*** FATAL ERROR: %s\n", (msg ? msg : "no message"));
+    fflush(stderr);
+    abort();
+}
+
+void byte_read(uint8_t* buffer, uint16_t offset, size_t num_bytes)
+{
+    printf("%-12s: offset=%x value=", "byte_r", offset);
+    size_t i = num_bytes;
+    while(i > 0)
+    {
+        i--;
+        printf("%02" PRIx8, buffer[offset + i]);
+    }
+    printf("\n");
+}
+
+uint64_t seq_counter = 0x8070605040302010;
+uint64_t get_seq64()
+{
+    seq_counter += 0x0101010101010101;
+    return seq_counter;
+}
+
+void byte_write_seq(uint8_t* target, uint16_t offset, size_t num_bytes)
+{
+    printf("%-12s: offset=%x value=", "byte_w", offset);
+    size_t i = num_bytes;
+    while(i > 0)
+    {
+        i--;
+        uint8_t byte = get_seq64();
+        target[offset + i] = byte;
+        printf("%02" PRIx8, byte);
+    }
+    printf("\n");
+}
+
+#define GENERATE_CHUNK_READ(INSTR, BITS, CONSTR)                    \
+    void chunk_read ## BITS(uint8_t* addr, uint16_t offset)         \
+    {                                                               \
+        uint ## BITS ## _t chunk = 0;                               \
+        asm volatile(INSTR " %1, %0" :                              \
+                     "=" CONSTR (chunk) :                           \
+                     "m" (*(addr + offset)), "0" (chunk));          \
+        printf("%-12s: offset=%x value=%" PRIx ## BITS "\n",        \
+               "chunk" #BITS "_r",                                  \
+               offset,                                              \
+               chunk);                                              \
+    }
+
+#define GENERATE_CHUNK_WRITE(INSTR, BITS, CONSTR)                   \
+    void chunk_write ## BITS(uint8_t* addr, uint16_t offset)        \
+    {                                                               \
+        uint ## BITS ## _t chunk = get_seq64();                     \
+        asm volatile(INSTR " %0, %1" :                              \
+                     "=" CONSTR (chunk) :                           \
+                     "m" (*(addr + offset)), "0" (chunk));          \
+        printf("%-12s: offset=%x value=%" PRIx ## BITS "\n",        \
+               "chunk" #BITS "_w",                                  \
+               offset,                                              \
+               chunk);                                              \
+    }
+
+#define GENERATE_CHUNK_FNS(INSTR, BITS, CONSTR)                   \
+    GENERATE_CHUNK_READ(INSTR, BITS, CONSTR)                      \
+    GENERATE_CHUNK_WRITE(INSTR, BITS, CONSTR)
+
+#define TEST_CHUNK_READ(BITS, ADDR, OFFSET)         \
+    byte_write_seq(ADDR, OFFSET, (BITS) >> 3);      \
+    chunk_read ## BITS(ADDR, OFFSET);
+
+#define TEST_CHUNK_WRITE(BITS, ADDR, OFFSET)    \
+    chunk_write ## BITS(ADDR, OFFSET);          \
+    byte_read(ADDR, OFFSET, (BITS) >> 3);
+
+#define TEST_CHUNK_READ_WRITE(BITS, ADDR, OFFSET)   \
+    byte_write_seq(ADDR, OFFSET, (BITS) >> 3);      \
+    chunk_read_write ## BITS(ADDR, OFFSET);         \
+    byte_read(ADDR, OFFSET, (BITS) >> 3);           \
+
+// Based on BITS, we calculate the offset where cross-page reads/writes would begin
+#define TEST_CROSS_PAGE(BITS, ADDR)                                     \
+    for(size_t offset = (PAGE_SIZE + 1 - (BITS >> 3));                  \
+        offset < PAGE_SIZE; offset++)                                   \
+    {                                                                   \
+        TEST_CHUNK_READ(BITS, ADDR, offset);                            \
+        TEST_CHUNK_WRITE(BITS, ADDR, offset);                           \
+        TEST_CHUNK_READ_WRITE(BITS, ADDR, offset);                      \
+    }
+
+GENERATE_CHUNK_FNS("movw", 16, "r");
+GENERATE_CHUNK_FNS("mov", 32, "r");
+
+#ifdef TEST_SSE
+GENERATE_CHUNK_FNS("movq", 64, "y");
+
+void chunk_read_write16(uint8_t* addr, uint16_t offset)
+{
+    uint16_t chunk = get_seq64();
+    asm volatile("addw %0, %1" :
+                 "=r" (chunk) :
+                 "m" (*(addr + offset)), "0" (chunk));
+    printf("%-12s: offset=%x value=%" PRIx16 "\n",
+           "chunk16_rw",
+           offset,
+           chunk);
+}
+
+void chunk_read_write32(uint8_t* addr, uint16_t offset)
+{
+    uint32_t chunk = get_seq64();
+    asm volatile("add %0, %1" :
+                 "=r" (chunk) :
+                 "m" (*(addr + offset)), "0" (chunk));
+    printf("%-12s: offset=%x value=%" PRIx32 "\n",
+           "chunk32_rw",
+           offset,
+           chunk);
+}
+
+// No 64 or 128-bit read-write x86 instructions support a memory address as the destination
+void chunk_read_write64(uint8_t* addr, uint16_t offset)
+{
+    UNUSED(addr);
+    UNUSED(offset);
+}
+
+void chunk_read_write128(uint8_t* addr, uint16_t offset)
+{
+    UNUSED(addr);
+    UNUSED(offset);
+}
+
+void chunk_read128(uint8_t* addr, uint16_t offset)
+{
+    XMMReg chunk;
+    chunk.q[0] = chunk.q[1] = 0.0;
+    asm volatile("movdqu %1, %0" :
+                 "=x" (chunk.dq) :
+                 "m" (*(addr + offset)), "0" (chunk.dq)
+        );
+    printf("%-12s: offset=%x value=" FMT64X FMT64X "\n",
+           "chunk128_r",
+           offset,
+           chunk.q[1],
+           chunk.q[0]);
+}
+
+void chunk_write128(uint8_t* addr, uint16_t offset)
+{
+    XMMReg chunk;
+    chunk.q[0] = get_seq64();
+    chunk.q[1] = get_seq64();
+    asm volatile("movdqu %0, %1" :
+                 "=x" (chunk.dq) :
+                 "m" (*(addr + offset)), "0" (chunk.dq)
+        );
+    printf("%-12s: offset=%x value=" FMT64X FMT64X "\n",
+           "chunk128_w",
+           offset,
+           chunk.q[1],
+           chunk.q[0]);
+}
+#endif
+
+void test_page_boundaries()
+{
+    // mmap 2 consecutive pages
+    uint8_t *const page0 = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+    // throwaway mmap to reduce likelhood of page0 and page1 mapping to consecutive physical frames
+    uint8_t *const throwaway = mmap(NULL, PAGE_SIZE, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+    uint8_t *const page1 = mmap(page0 + PAGE_SIZE, PAGE_SIZE,
+                                 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+
+    if(page0 == MAP_FAILED || throwaway == MAP_FAILED || page1 == MAP_FAILED)
+    {
+        fatal("mmap");
+    }
+
+    // Trigger page-faults causing virtual pages to be allocated to physical frames
+    page0[0] = 0x42;
+    throwaway[0] = 0x42;
+    page1[0] = 0x42;
+
+    TEST_CROSS_PAGE(16, page0);
+    TEST_CROSS_PAGE(32, page0);
+#ifdef TEST_SSE
+    TEST_CROSS_PAGE(64, page0);
+    TEST_CROSS_PAGE(128, page0);
+#endif
+
+    munmap(page0, PAGE_SIZE);
+    munmap(page1, PAGE_SIZE);
+}
+
 extern void *__start_initcall;
 extern void *__stop_initcall;
 
-
 int main(int argc, char **argv)
 {
     void **ptr;
@@ -2993,7 +3192,9 @@ int main(int argc, char **argv)
     test_vm86();
 #endif
 #if !defined(__x86_64__)
+    test_exceptions();
     test_self_modifying_code();
+    //test_single_step();
 #endif
     test_enter();
     test_conv();
@@ -3001,7 +3202,6 @@ int main(int argc, char **argv)
     test_sse();
     //test_fxsave();
 #endif
-    test_exceptions();
-    //test_single_step();
+    test_page_boundaries();
     return 0;
 }

+ 6 - 2
v86.css

@@ -57,9 +57,13 @@ a {
     width: 0;
     height: 0;
     resize: none;
-    border: 0;
-    padding: 0;
     position: absolute;
+    opacity: 0;
+    left: -9999em;
+    top: 0;
+    z-index: -10;
+    white-space: nowrap;
+    overflow: hidden;
 }
 h4 {
     margin: 0px 0px 20px 0px;

Some files were not shown because too many files changed in this diff