123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626 |
- #!/usr/bin/env node
- "use strict";
- const fs = require("fs");
- const path = require("path");
- const encodings = require("./x86_table");
- const c_ast = require("./c_ast");
- const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table } = require("./util");
- const APPEND_NONFAULTING_FLAG = "instr_flags |= JIT_INSTR_NONFAULTING_FLAG;";
- const OUT_DIR = get_switch_value("--output-dir") ||
- path.join(__dirname, "..", "build");
- mkdirpSync(OUT_DIR);
- const table_arg = get_switch_value("--table");
- const gen_all = get_switch_exist("--all");
- const to_generate = {
- jit: gen_all || table_arg === "jit",
- jit0f_16: gen_all || table_arg === "jit0f_16",
- jit0f_32: gen_all || table_arg === "jit0f_32",
- };
- console.assert(
- Object.keys(to_generate).some(k => to_generate[k]),
- "Pass --table [jit|jit0f_16|jit0f_32] or --all to pick which tables to generate"
- );
- gen_table();
- function gen_read_imm_call(op, size_variant)
- {
- let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
- if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
- {
- if(op.imm8)
- {
- return "read_imm8()";
- }
- else if(op.imm8s)
- {
- return "read_imm8s()";
- }
- else
- {
- if(op.immaddr)
- {
- // immaddr: depends on address size
- return "read_moffs()";
- }
- else
- {
- console.assert(op.imm1632 || op.imm16 || op.imm32);
- if(op.imm1632 && size === 16 || op.imm16)
- {
- return "read_imm16()";
- }
- else
- {
- console.assert(op.imm1632 && size === 32 || op.imm32);
- return "read_imm32s()";
- }
- }
- }
- }
- else
- {
- return undefined;
- }
- }
- function gen_call(name, args)
- {
- args = args || [];
- return `${name}(${args.join(", ")});`;
- }
- function gen_codegen_call(name, args)
- {
- args = args || [];
- const args_count = args.length;
- args = [].concat([`"${name}"`, name.length], args);
- return gen_call(`gen_fn${args_count}_const`, args);
- }
- function gen_codegen_call_modrm(name, args)
- {
- args = (args || []).slice();
- const args_count = args.length;
- args = [].concat([`"${name}"`, name.length], args);
- return [
- gen_call(`gen_modrm_resolve`, ["modrm_byte"]),
- gen_call(`gen_modrm_fn${args_count}`, args),
- ].join(" ");
- }
- function gen_custom_jit_call(name, args)
- {
- return gen_call(`${name}_jit`, args);
- }
- function gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, postfixes={})
- {
- const { mem_call_fn, reg_call_fn } = gen_call_fns;
- const { mem_postfix=[], reg_postfix=[] } = postfixes;
- return {
- type: "if-else",
- if_blocks: [{
- condition: "modrm_byte < 0xC0",
- body: []
- .concat([mem_call_fn(`${name}_mem`, mem_args)])
- .concat(mem_postfix),
- }],
- else_block: {
- body: [
- reg_call_fn(`${name}_reg`, reg_args)
- ].concat(reg_postfix),
- },
- };
- }
- /*
- * Current naming scheme:
- * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
- */
- function make_instruction_name(encoding, size, prefix_variant)
- {
- const suffix = encoding.os ? String(size) : "";
- const opcode_hex = hex(encoding.opcode & 0xFF, 2);
- const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
- const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
- const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
- return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
- }
- function get_nonfaulting_mem_reg_postfix(encoding)
- {
- const lea_special_case = encoding.opcode === 0x8D;
- // In general only reg_postfixes will append the nonfaulting flag, except in the special case
- // for LEA - it doesn't actually access memory, so the memory variant can be nonfaulting
- const mem_postfix = (encoding.nonfaulting && lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
- const reg_postfix = (encoding.nonfaulting && !lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
- return {
- mem_postfix,
- reg_postfix,
- };
- }
- function gen_instruction_body(encodings, size)
- {
- const encoding = encodings[0];
- let has_66 = false;
- let has_F2 = false;
- let has_F3 = false;
- for(let e of encodings)
- {
- if((e.opcode >>> 16) === 0x66) has_66 = true;
- if((e.opcode >>> 16) === 0xF2) has_F2 = true;
- if((e.opcode >>> 16) === 0xF3) has_F3 = true;
- }
- if(has_66 || has_F2 || has_F3)
- {
- console.assert((encoding.opcode & 0xFF00) === 0x0F00);
- // Leaving unsupported because:
- // 1. Instructions that use these prefixes are usually faulting
- // 2. It would need a refactor to allow us to pass the correct prefixed encoding object to
- // where the nonfaulting flags are set
- console.assert(
- !encodings.some(e => e.nonfaulting),
- "Unsupported: instruction with 66/f2/f3 prefix marked as nonfaulting. Opcode: 0x" + hex(encoding.opcode)
- );
- }
- const instruction_postfix = encoding.block_boundary ? ["instr_flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [];
- // May be overridden for custom encodings
- const gen_call_fns = {
- mem_call_fn: gen_codegen_call_modrm,
- reg_call_fn: gen_codegen_call,
- };
- if(encoding.fixed_g !== undefined)
- {
- // instruction with modrm byte where the middle 3 bits encode the instruction
- // group by opcode without prefix plus middle bits of modrm byte
- let cases = encodings.reduce((cases_by_opcode, case_) => {
- console.assert(typeof case_.fixed_g === "number");
- cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
- return cases_by_opcode;
- }, Object.create(null));
- cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
- return [
- "int32_t modrm_byte = read_imm8();",
- {
- type: "switch",
- condition: "modrm_byte >> 3 & 7",
- cases: cases.map(case_ => {
- const fixed_g = case_.fixed_g;
- let instruction_name = make_instruction_name(case_, size, undefined);
- const instruction_postfix = case_.block_boundary ? ["instr_flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [];
- const mem_args = [];
- const reg_args = ["modrm_byte & 7"];
- const imm_read = gen_read_imm_call(case_, size);
- if(imm_read)
- {
- mem_args.push(imm_read);
- reg_args.push(imm_read);
- }
- if(case_.custom)
- {
- console.assert(!case_.nonfaulting, "Unsupported: custom fixed_g instruction as nonfaulting");
- instruction_name += "_jit";
- mem_args.push("modrm_byte");
- gen_call_fns.mem_call_fn = gen_call;
- gen_call_fns.reg_call_fn = gen_call;
- }
- if(has_66 || has_F2 || has_F3)
- {
- const if_blocks = [];
- if(has_66) {
- const name = make_instruction_name(case_, size, 0x66);
- const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
- if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
- }
- if(has_F2) {
- const name = make_instruction_name(case_, size, 0xF2);
- const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
- if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
- }
- if(has_F3) {
- const name = make_instruction_name(case_, size, 0xF3);
- const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
- if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
- }
- const else_block = {
- body: [
- gen_modrm_mem_reg_split(
- instruction_name,
- gen_call_fns,
- mem_args,
- reg_args,
- {}
- )
- ],
- };
- return {
- conditions: [fixed_g],
- body: [
- "int32_t prefixes_ = *prefixes;",
- {
- type: "if-else",
- if_blocks,
- else_block,
- },
- ].concat(instruction_postfix),
- };
- }
- else
- {
- const body = [
- gen_modrm_mem_reg_split(
- instruction_name,
- gen_call_fns,
- mem_args,
- reg_args,
- get_nonfaulting_mem_reg_postfix(case_)
- )
- ].concat(instruction_postfix);
- return {
- conditions: [fixed_g],
- body,
- };
- }
- }),
- default_case: {
- body: [
- "assert(false);",
- gen_codegen_call("trigger_ud"),
- ],
- }
- },
- ].concat(instruction_postfix);
- }
- else if(has_66 || has_F2 || has_F3)
- {
- // instruction without modrm byte but with prefix
- console.assert(encoding.e);
- console.assert(!encoding.ignore_mod);
- const imm_read = gen_read_imm_call(encoding, size);
- const mem_args = ["modrm_byte >> 3 & 7"];
- const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
- if(imm_read)
- {
- mem_args.push(imm_read);
- reg_args.push(imm_read);
- }
- const if_blocks = [];
- if(has_66) {
- const name = make_instruction_name(encoding, size, 0x66);
- const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
- if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
- }
- if(has_F2) {
- const name = make_instruction_name(encoding, size, 0xF2);
- const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
- if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
- }
- if(has_F3) {
- const name = make_instruction_name(encoding, size, 0xF3);
- const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
- if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
- }
- const else_block = {
- body: [
- gen_modrm_mem_reg_split(
- make_instruction_name(encoding, size),
- gen_call_fns,
- mem_args,
- reg_args,
- {}
- )
- ],
- };
- return [
- "int32_t modrm_byte = read_imm8();",
- "int32_t prefixes_ = *prefixes;",
- {
- type: "if-else",
- if_blocks,
- else_block,
- }
- ].concat(instruction_postfix);
- }
- else if(encoding.fixed_g === undefined && encoding.e)
- {
- // instruction with modrm byte where the middle 3 bits encode a register
- console.assert(encodings.length === 1);
- const instruction_name = make_instruction_name(encoding, size);
- const imm_read = gen_read_imm_call(encoding, size);
- if(encoding.ignore_mod)
- {
- console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
- // Has modrm byte, but the 2 mod bits are ignored and both
- // operands are always registers (0f20-0f24)
- if(encoding.nonfaulting)
- {
- instruction_postfix.push(APPEND_NONFAULTING_FLAG);
- }
- return [
- "int32_t modrm_byte = read_imm8();",
- gen_codegen_call(instruction_name, ["modrm_byte & 7", "modrm_byte >> 3 & 7"]),
- ].concat(instruction_postfix);
- }
- else if(encoding.opcode === 0x8D) // lea
- {
- const mem_args = ["modrm_byte"];
- const reg_args = ["0", "0"];
- gen_call_fns.mem_call_fn = gen_custom_jit_call;
- return [
- "int32_t modrm_byte = read_imm8();",
- gen_modrm_mem_reg_split(
- instruction_name,
- gen_call_fns,
- mem_args,
- reg_args,
- get_nonfaulting_mem_reg_postfix(encoding)
- ),
- ].concat(instruction_postfix);
- }
- else
- {
- const mem_args = ["modrm_byte >> 3 & 7"];
- const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
- if(imm_read)
- {
- mem_args.push(imm_read);
- reg_args.push(imm_read);
- }
- if(encoding.custom)
- {
- // The default mem_call_fn adds a modrm_resolve call, but since we override it,
- // we also need to pass it in to our custom function to resolve it however it wishes
- mem_args.unshift("modrm_byte");
- gen_call_fns.mem_call_fn = gen_custom_jit_call;
- gen_call_fns.reg_call_fn = gen_custom_jit_call;
- }
- return [
- "int32_t modrm_byte = read_imm8();",
- gen_modrm_mem_reg_split(
- instruction_name,
- gen_call_fns,
- mem_args,
- reg_args,
- get_nonfaulting_mem_reg_postfix(encoding)
- ),
- ].concat(instruction_postfix);
- }
- }
- else if(encoding.prefix || encoding.custom)
- {
- console.assert(!encoding.nonfaulting, "Prefix/custom instructions cannot be marked as nonfaulting.");
- const instruction_name = make_instruction_name(encoding, size) + "_jit";
- const imm_read = gen_read_imm_call(encoding, size);
- const args = [];
- if(imm_read)
- {
- args.push(imm_read);
- }
- const call_prefix = encoding.prefix ? "instr_flags |= " : "";
- // Prefix calls can add to the return flags
- return [call_prefix + gen_call(instruction_name, args)].concat(instruction_postfix);
- }
- else
- {
- // instruction without modrm byte or prefix
- const imm_read = gen_read_imm_call(encoding, size);
- const instruction_name = make_instruction_name(encoding, size);
- const args = [];
- if(imm_read)
- {
- args.push(imm_read);
- }
- if(encoding.extra_imm16)
- {
- console.assert(imm_read);
- args.push("read_imm16()");
- }
- else if(encoding.extra_imm8)
- {
- console.assert(imm_read);
- args.push("read_imm8()");
- }
- if(encoding.nonfaulting)
- {
- instruction_postfix.push(APPEND_NONFAULTING_FLAG);
- }
- return [gen_codegen_call(instruction_name, args)].concat(instruction_postfix);
- }
- }
- function gen_table()
- {
- let by_opcode = Object.create(null);
- let by_opcode0f = Object.create(null);
- for(let o of encodings)
- {
- let opcode = o.opcode;
- if(opcode >= 0x100)
- {
- if((opcode & 0xFF00) === 0x0F00)
- {
- opcode &= 0xFF;
- by_opcode0f[opcode] = by_opcode0f[opcode] || [];
- by_opcode0f[opcode].push(o);
- }
- }
- else
- {
- by_opcode[opcode] = by_opcode[opcode] || [];
- by_opcode[opcode].push(o);
- }
- }
- let cases = [];
- for(let opcode = 0; opcode < 0x100; opcode++)
- {
- let encoding = by_opcode[opcode];
- console.assert(encoding && encoding.length);
- let opcode_hex = hex(opcode, 2);
- if(encoding[0].os)
- {
- cases.push({
- conditions: [`0x${opcode_hex}`],
- body: gen_instruction_body(encoding, 16),
- });
- cases.push({
- conditions: [`0x${opcode_hex}|0x100`],
- body: gen_instruction_body(encoding, 32),
- });
- }
- else
- {
- cases.push({
- conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
- body: gen_instruction_body(encoding, undefined),
- });
- }
- }
- const table = {
- type: "switch",
- condition: "opcode",
- cases,
- default_case: {
- body: ["assert(false);"]
- },
- };
- if(to_generate.jit)
- {
- finalize_table(
- OUT_DIR,
- "jit",
- c_ast.print_syntax_tree([table]).join("\n") + "\n"
- );
- }
- const cases0f_16 = [];
- const cases0f_32 = [];
- for(let opcode = 0; opcode < 0x100; opcode++)
- {
- let encoding = by_opcode0f[opcode];
- console.assert(encoding && encoding.length);
- let opcode_hex = hex(opcode, 2);
- if(encoding[0].os)
- {
- cases0f_16.push({
- conditions: [`0x${opcode_hex}`],
- body: gen_instruction_body(encoding, 16),
- });
- cases0f_32.push({
- conditions: [`0x${opcode_hex}`],
- body: gen_instruction_body(encoding, 32),
- });
- }
- else
- {
- let block = {
- conditions: [`0x${opcode_hex}`],
- body: gen_instruction_body(encoding, undefined),
- };
- cases0f_16.push(block);
- cases0f_32.push(block);
- }
- }
- const table0f_16 = {
- type: "switch",
- condition: "opcode",
- cases: cases0f_16,
- default_case: {
- body: ["assert(false);"]
- },
- };
- const table0f_32 = {
- type: "switch",
- condition: "opcode",
- cases: cases0f_32,
- default_case: {
- body: ["assert(false);"]
- },
- };
- if(to_generate.jit0f_16)
- {
- finalize_table(
- OUT_DIR,
- "jit0f_16",
- c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n"
- );
- }
- if(to_generate.jit0f_32)
- {
- finalize_table(
- OUT_DIR,
- "jit0f_32",
- c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n"
- );
- }
- }
|