#!/usr/bin/env node "use strict"; const fs = require("fs"); const path = require("path"); const encodings = require("./x86_table"); const c_ast = require("./c_ast"); const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table } = require("./util"); const APPEND_NONFAULTING_FLAG = "analysis.flags |= JIT_INSTR_NONFAULTING_FLAG;"; const OUT_DIR = get_switch_value("--output-dir") || path.join(__dirname, "..", "build"); mkdirpSync(OUT_DIR); const table_arg = get_switch_value("--table"); const gen_all = get_switch_exist("--all"); const to_generate = { analyzer: gen_all || table_arg === "analyzer", analyzer0f_16: gen_all || table_arg === "analyzer0f_16", analyzer0f_32: gen_all || table_arg === "analyzer0f_32", }; console.assert( Object.keys(to_generate).some(k => to_generate[k]), "Pass --table [analyzer|analyzer0f_16|analyzer0f_32] or --all to pick which tables to generate" ); gen_table(); function gen_read_imm_call(op, size_variant) { let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8; if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr) { if(op.imm8) { return "read_imm8()"; } else if(op.imm8s) { return "read_imm8s()"; } else { if(op.immaddr) { // immaddr: depends on address size return "read_moffs()"; } else { console.assert(op.imm1632 || op.imm16 || op.imm32); if(op.imm1632 && size === 16 || op.imm16) { return "read_imm16()"; } else { console.assert(op.imm1632 && size === 32 || op.imm32); return "read_imm32s()"; } } } } else { return undefined; } } function gen_call(name, args) { args = args || []; return `${name}(${args.join(", ")});`; } function gen_codegen_call(args) { return args.map(arg => arg + ";"); } function gen_codegen_call_modrm(args) { args = args.map(arg => arg + ";"); return [].concat(gen_call("modrm_skip", ["modrm_byte"]), args); } function gen_modrm_mem_reg_split(mem_args, reg_args, postfixes={}) { const { mem_postfix=[], reg_postfix=[] } = postfixes; return { type: "if-else", if_blocks: [{ condition: "modrm_byte < 0xC0", body: [] .concat(gen_codegen_call_modrm(mem_args)) .concat(mem_postfix), }], else_block: { body: gen_codegen_call(reg_args).concat(reg_postfix), }, }; } /* * Current naming scheme: * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|) */ function make_instruction_name(encoding, size, prefix_variant) { const suffix = encoding.os ? String(size) : ""; const opcode_hex = hex(encoding.opcode & 0xFF, 2); const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : ""; const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2); const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`; return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`; } function get_nonfaulting_mem_reg_postfix(encoding) { const lea_special_case = encoding.opcode === 0x8D; const mem_postfix = (encoding.nonfaulting && lea_special_case) ? [APPEND_NONFAULTING_FLAG] : []; const reg_postfix = (encoding.nonfaulting && !lea_special_case) ? [APPEND_NONFAULTING_FLAG] : []; return { mem_postfix, reg_postfix, }; } function create_instruction_postfix(encoding) { return [].concat( encoding.block_boundary ? ["analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [], encoding.no_next_instruction ? ["analysis.flags |= JIT_INSTR_NO_NEXT_INSTRUCTION_FLAG;"] : [] ); } function gen_instruction_body(encodings, size) { const encoding = encodings[0]; let has_66 = false; let has_F2 = false; let has_F3 = false; for(let e of encodings) { if((e.opcode >>> 16) === 0x66) has_66 = true; if((e.opcode >>> 16) === 0xF2) has_F2 = true; if((e.opcode >>> 16) === 0xF3) has_F3 = true; } if(has_66 || has_F2 || has_F3) { console.assert((encoding.opcode & 0xFF00) === 0x0F00); } if(encoding.fixed_g !== undefined) { // instruction with modrm byte where the middle 3 bits encode the instruction // group by opcode without prefix plus middle bits of modrm byte let cases = encodings.reduce((cases_by_opcode, case_) => { console.assert(typeof case_.fixed_g === "number"); cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_; return cases_by_opcode; }, Object.create(null)); cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g); return [ "int32_t modrm_byte = read_imm8();", { type: "switch", condition: "modrm_byte >> 3 & 7", cases: cases.map(case_ => { const fixed_g = case_.fixed_g; const instruction_postfix = create_instruction_postfix(case_); const mem_args = []; const reg_args = []; const imm_read = gen_read_imm_call(case_, size); if(imm_read) { mem_args.push(imm_read); reg_args.push(imm_read); } if(has_66 || has_F2 || has_F3) { const if_blocks = []; if(has_66) { const name = make_instruction_name(case_, size, 0x66); const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})]; if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, }); } if(has_F2) { const name = make_instruction_name(case_, size, 0xF2); const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})]; if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, }); } if(has_F3) { const name = make_instruction_name(case_, size, 0xF3); const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})]; if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, }); } const else_block = { body: [ gen_modrm_mem_reg_split( mem_args, reg_args, {} ) ], }; return { conditions: [fixed_g], body: [ "int32_t prefixes_ = *prefixes;", { type: "if-else", if_blocks, else_block, }, ].concat(instruction_postfix), }; } else { const body = [ gen_modrm_mem_reg_split( mem_args, reg_args, get_nonfaulting_mem_reg_postfix(case_) ) ].concat(instruction_postfix); return { conditions: [fixed_g], body, }; } }), default_case: { body: [ "assert(false);", "analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;", "analysis.flags |= JIT_INSTR_NO_NEXT_INSTRUCTION_FLAG;", ], } }, ]; } else if(has_66 || has_F2 || has_F3) { // instruction without modrm byte but with prefix console.assert(encoding.e); console.assert(!encoding.ignore_mod); const instruction_postfix = create_instruction_postfix(encoding); const imm_read = gen_read_imm_call(encoding, size); const mem_args = []; const reg_args = []; if(imm_read) { mem_args.push(imm_read); reg_args.push(imm_read); } const if_blocks = []; if(has_66) { const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})]; if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, }); } if(has_F2) { const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})]; if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, }); } if(has_F3) { const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})]; if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, }); } const else_block = { body: [ gen_modrm_mem_reg_split( mem_args, reg_args, {} ) ], }; return [ "int32_t modrm_byte = read_imm8();", "int32_t prefixes_ = *prefixes;", { type: "if-else", if_blocks, else_block, } ].concat(instruction_postfix); } else if(encoding.fixed_g === undefined && encoding.e) { // instruction with modrm byte where the middle 3 bits encode a register console.assert(encodings.length === 1); const instruction_postfix = create_instruction_postfix(encoding); const imm_read = gen_read_imm_call(encoding, size); if(encoding.ignore_mod) { console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)"); // Has modrm byte, but the 2 mod bits are ignored and both // operands are always registers (0f20-0f24) if(encoding.nonfaulting) { instruction_postfix.push(APPEND_NONFAULTING_FLAG); } return ["int32_t modrm_byte = read_imm8();"] .concat(gen_codegen_call([])) .concat(instruction_postfix); } else { const mem_args = []; const reg_args = []; if(imm_read) { mem_args.push(imm_read); reg_args.push(imm_read); } return [ "int32_t modrm_byte = read_imm8();", gen_modrm_mem_reg_split( mem_args, reg_args, get_nonfaulting_mem_reg_postfix(encoding) ), ].concat(instruction_postfix); } } else if(encoding.prefix) { console.assert(!encoding.nonfaulting, "Prefix/custom instructions cannot be marked as nonfaulting."); const instruction_postfix = create_instruction_postfix(encoding); const instruction_name = make_instruction_name(encoding, size) + "_analyze"; const imm_read = gen_read_imm_call(encoding, size); const args = []; if(imm_read) { args.push(imm_read); } const call_prefix = encoding.prefix ? "return " : ""; // Prefix calls can add to the return flags return [call_prefix + gen_call(instruction_name, args)].concat(instruction_postfix); } else { // instruction without modrm byte or prefix const instruction_postfix = create_instruction_postfix(encoding); const imm_read = gen_read_imm_call(encoding, size); const args = []; if(imm_read) { if(encoding.jump_offset_imm) { args.push("int32_t jump_offset = " + imm_read + ";"); args.push("analysis.jump_offset = jump_offset;"); args.push("analysis.flags |= is_osize_32() ? JIT_INSTR_IMM_JUMP32_FLAG : JIT_INSTR_IMM_JUMP16_FLAG;"); } else { args.push(imm_read + ";"); } } if(encoding.extra_imm16) { console.assert(imm_read); args.push(gen_call("read_imm16")); } else if(encoding.extra_imm8) { console.assert(imm_read); args.push(gen_call("read_imm8")); } if(encoding.nonfaulting) { instruction_postfix.push(APPEND_NONFAULTING_FLAG); } if(encoding.conditional_jump) { console.assert((encoding.opcode & ~0xF) === 0x70 || (encoding.opcode & ~0xF) === 0x0F80); instruction_postfix.push("analysis.condition_index = " + (encoding.opcode & 0xF) + ";"); } return args.concat(instruction_postfix); } } function gen_table() { let by_opcode = Object.create(null); let by_opcode0f = Object.create(null); for(let o of encodings) { let opcode = o.opcode; if(opcode >= 0x100) { if((opcode & 0xFF00) === 0x0F00) { opcode &= 0xFF; by_opcode0f[opcode] = by_opcode0f[opcode] || []; by_opcode0f[opcode].push(o); } } else { by_opcode[opcode] = by_opcode[opcode] || []; by_opcode[opcode].push(o); } } let cases = []; for(let opcode = 0; opcode < 0x100; opcode++) { let encoding = by_opcode[opcode]; console.assert(encoding && encoding.length); let opcode_hex = hex(opcode, 2); if(encoding[0].os) { cases.push({ conditions: [`0x${opcode_hex}`], body: gen_instruction_body(encoding, 16), }); cases.push({ conditions: [`0x${opcode_hex}|0x100`], body: gen_instruction_body(encoding, 32), }); } else { cases.push({ conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`], body: gen_instruction_body(encoding, undefined), }); } } const table = { type: "switch", condition: "opcode", cases, default_case: { body: ["assert(false);"] }, }; if(to_generate.analyzer) { finalize_table( OUT_DIR, "analyzer", c_ast.print_syntax_tree([table]).join("\n") + "\n" ); } const cases0f_16 = []; const cases0f_32 = []; for(let opcode = 0; opcode < 0x100; opcode++) { let encoding = by_opcode0f[opcode]; console.assert(encoding && encoding.length); let opcode_hex = hex(opcode, 2); if(encoding[0].os) { cases0f_16.push({ conditions: [`0x${opcode_hex}`], body: gen_instruction_body(encoding, 16), }); cases0f_32.push({ conditions: [`0x${opcode_hex}`], body: gen_instruction_body(encoding, 32), }); } else { let block = { conditions: [`0x${opcode_hex}`], body: gen_instruction_body(encoding, undefined), }; cases0f_16.push(block); cases0f_32.push(block); } } const table0f_16 = { type: "switch", condition: "opcode", cases: cases0f_16, default_case: { body: ["assert(false);"] }, }; const table0f_32 = { type: "switch", condition: "opcode", cases: cases0f_32, default_case: { body: ["assert(false);"] }, }; if(to_generate.analyzer0f_16) { finalize_table( OUT_DIR, "analyzer0f_16", c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n" ); } if(to_generate.analyzer0f_32) { finalize_table( OUT_DIR, "analyzer0f_32", c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n" ); } }