generate_jit.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. #!/usr/bin/env node
  2. "use strict";
  3. const fs = require("fs");
  4. const path = require("path");
  5. const encodings = require("./x86_table");
  6. const c_ast = require("./c_ast");
  7. const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table } = require("./util");
  8. const APPEND_NONFAULTING_FLAG = "instr_flags |= JIT_INSTR_NONFAULTING_FLAG;";
  9. const OUT_DIR = get_switch_value("--output-dir") ||
  10. path.join(__dirname, "..", "build");
  11. mkdirpSync(OUT_DIR);
  12. const table_arg = get_switch_value("--table");
  13. const gen_all = get_switch_exist("--all");
  14. const to_generate = {
  15. jit: gen_all || table_arg === "jit",
  16. jit0f_16: gen_all || table_arg === "jit0f_16",
  17. jit0f_32: gen_all || table_arg === "jit0f_32",
  18. };
  19. console.assert(
  20. Object.keys(to_generate).some(k => to_generate[k]),
  21. "Pass --table [jit|jit0f_16|jit0f_32] or --all to pick which tables to generate"
  22. );
  23. gen_table();
  24. function gen_read_imm_call(op, size_variant)
  25. {
  26. let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
  27. if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
  28. {
  29. if(op.imm8)
  30. {
  31. return "read_imm8()";
  32. }
  33. else if(op.imm8s)
  34. {
  35. return "read_imm8s()";
  36. }
  37. else
  38. {
  39. if(op.immaddr)
  40. {
  41. // immaddr: depends on address size
  42. return "read_moffs()";
  43. }
  44. else
  45. {
  46. console.assert(op.imm1632 || op.imm16 || op.imm32);
  47. if(op.imm1632 && size === 16 || op.imm16)
  48. {
  49. return "read_imm16()";
  50. }
  51. else
  52. {
  53. console.assert(op.imm1632 && size === 32 || op.imm32);
  54. return "read_imm32s()";
  55. }
  56. }
  57. }
  58. }
  59. else
  60. {
  61. return undefined;
  62. }
  63. }
  64. function gen_call(name, args)
  65. {
  66. args = args || [];
  67. return `${name}(${args.join(", ")});`;
  68. }
  69. function gen_codegen_call(name, args)
  70. {
  71. args = args || [];
  72. const args_count = args.length;
  73. args = [].concat([`"${name}"`, name.length], args);
  74. return gen_call(`gen_fn${args_count}_const`, args);
  75. }
  76. function gen_codegen_call_modrm(name, args)
  77. {
  78. args = (args || []).slice();
  79. const args_count = args.length;
  80. args = [].concat([`"${name}"`, name.length], args);
  81. return [
  82. gen_call(`gen_modrm_resolve`, ["modrm_byte"]),
  83. gen_call(`gen_modrm_fn${args_count}`, args),
  84. ].join(" ");
  85. }
  86. function gen_custom_jit_call(name, args)
  87. {
  88. return gen_call(`${name}_jit`, args);
  89. }
  90. function gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, postfixes={})
  91. {
  92. const { mem_call_fn, reg_call_fn } = gen_call_fns;
  93. const { mem_postfix=[], reg_postfix=[] } = postfixes;
  94. return {
  95. type: "if-else",
  96. if_blocks: [{
  97. condition: "modrm_byte < 0xC0",
  98. body: []
  99. .concat([mem_call_fn(`${name}_mem`, mem_args)])
  100. .concat(mem_postfix),
  101. }],
  102. else_block: {
  103. body: [
  104. reg_call_fn(`${name}_reg`, reg_args)
  105. ].concat(reg_postfix),
  106. },
  107. };
  108. }
  109. /*
  110. * Current naming scheme:
  111. * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
  112. */
  113. function make_instruction_name(encoding, size, prefix_variant)
  114. {
  115. const suffix = encoding.os ? String(size) : "";
  116. const opcode_hex = hex(encoding.opcode & 0xFF, 2);
  117. const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
  118. const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
  119. const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
  120. return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
  121. }
  122. function get_nonfaulting_mem_reg_postfix(encoding)
  123. {
  124. const lea_special_case = encoding.opcode === 0x8D;
  125. // In general only reg_postfixes will append the nonfaulting flag, except in the special case
  126. // for LEA - it doesn't actually access memory, so the memory variant can be nonfaulting
  127. const mem_postfix = (encoding.nonfaulting && lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
  128. const reg_postfix = (encoding.nonfaulting && !lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
  129. return {
  130. mem_postfix,
  131. reg_postfix,
  132. };
  133. }
  134. function gen_instruction_body(encodings, size)
  135. {
  136. const encoding = encodings[0];
  137. let has_66 = false;
  138. let has_F2 = false;
  139. let has_F3 = false;
  140. for(let e of encodings)
  141. {
  142. if((e.opcode >>> 16) === 0x66) has_66 = true;
  143. if((e.opcode >>> 16) === 0xF2) has_F2 = true;
  144. if((e.opcode >>> 16) === 0xF3) has_F3 = true;
  145. }
  146. if(has_66 || has_F2 || has_F3)
  147. {
  148. console.assert((encoding.opcode & 0xFF00) === 0x0F00);
  149. // Leaving unsupported because:
  150. // 1. Instructions that use these prefixes are usually faulting
  151. // 2. It would need a refactor to allow us to pass the correct prefixed encoding object to
  152. // where the nonfaulting flags are set
  153. console.assert(
  154. !encodings.some(e => e.nonfaulting),
  155. "Unsupported: instruction with 66/f2/f3 prefix marked as nonfaulting. Opcode: 0x" + hex(encoding.opcode)
  156. );
  157. }
  158. const instruction_postfix = encoding.block_boundary ? ["instr_flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [];
  159. // May be overridden for custom encodings
  160. const gen_call_fns = {
  161. mem_call_fn: gen_codegen_call_modrm,
  162. reg_call_fn: gen_codegen_call,
  163. };
  164. if(encoding.fixed_g !== undefined)
  165. {
  166. // instruction with modrm byte where the middle 3 bits encode the instruction
  167. // group by opcode without prefix plus middle bits of modrm byte
  168. let cases = encodings.reduce((cases_by_opcode, case_) => {
  169. console.assert(typeof case_.fixed_g === "number");
  170. cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
  171. return cases_by_opcode;
  172. }, Object.create(null));
  173. cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
  174. return [
  175. "int32_t modrm_byte = read_imm8();",
  176. {
  177. type: "switch",
  178. condition: "modrm_byte >> 3 & 7",
  179. cases: cases.map(case_ => {
  180. const fixed_g = case_.fixed_g;
  181. let instruction_name = make_instruction_name(case_, size, undefined);
  182. const instruction_postfix = case_.block_boundary ? ["instr_flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [];
  183. const mem_args = [];
  184. const reg_args = ["modrm_byte & 7"];
  185. const imm_read = gen_read_imm_call(case_, size);
  186. if(imm_read)
  187. {
  188. mem_args.push(imm_read);
  189. reg_args.push(imm_read);
  190. }
  191. if(case_.custom)
  192. {
  193. console.assert(!case_.nonfaulting, "Unsupported: custom fixed_g instruction as nonfaulting");
  194. instruction_name += "_jit";
  195. mem_args.push("modrm_byte");
  196. gen_call_fns.mem_call_fn = gen_call;
  197. gen_call_fns.reg_call_fn = gen_call;
  198. }
  199. if(has_66 || has_F2 || has_F3)
  200. {
  201. const if_blocks = [];
  202. if(has_66) {
  203. const name = make_instruction_name(case_, size, 0x66);
  204. const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
  205. if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
  206. }
  207. if(has_F2) {
  208. const name = make_instruction_name(case_, size, 0xF2);
  209. const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
  210. if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
  211. }
  212. if(has_F3) {
  213. const name = make_instruction_name(case_, size, 0xF3);
  214. const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
  215. if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
  216. }
  217. const else_block = {
  218. body: [
  219. gen_modrm_mem_reg_split(
  220. instruction_name,
  221. gen_call_fns,
  222. mem_args,
  223. reg_args,
  224. {}
  225. )
  226. ],
  227. };
  228. return {
  229. conditions: [fixed_g],
  230. body: [
  231. "int32_t prefixes_ = *prefixes;",
  232. {
  233. type: "if-else",
  234. if_blocks,
  235. else_block,
  236. },
  237. ].concat(instruction_postfix),
  238. };
  239. }
  240. else
  241. {
  242. const body = [
  243. gen_modrm_mem_reg_split(
  244. instruction_name,
  245. gen_call_fns,
  246. mem_args,
  247. reg_args,
  248. get_nonfaulting_mem_reg_postfix(case_)
  249. )
  250. ].concat(instruction_postfix);
  251. return {
  252. conditions: [fixed_g],
  253. body,
  254. };
  255. }
  256. }),
  257. default_case: {
  258. body: [
  259. "assert(false);",
  260. gen_codegen_call("trigger_ud"),
  261. ],
  262. }
  263. },
  264. ].concat(instruction_postfix);
  265. }
  266. else if(has_66 || has_F2 || has_F3)
  267. {
  268. // instruction without modrm byte but with prefix
  269. console.assert(encoding.e);
  270. console.assert(!encoding.ignore_mod);
  271. const imm_read = gen_read_imm_call(encoding, size);
  272. const mem_args = ["modrm_byte >> 3 & 7"];
  273. const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
  274. if(imm_read)
  275. {
  276. mem_args.push(imm_read);
  277. reg_args.push(imm_read);
  278. }
  279. const if_blocks = [];
  280. if(has_66) {
  281. const name = make_instruction_name(encoding, size, 0x66);
  282. const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
  283. if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
  284. }
  285. if(has_F2) {
  286. const name = make_instruction_name(encoding, size, 0xF2);
  287. const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
  288. if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
  289. }
  290. if(has_F3) {
  291. const name = make_instruction_name(encoding, size, 0xF3);
  292. const body = [gen_modrm_mem_reg_split(name, gen_call_fns, mem_args, reg_args, {})];
  293. if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
  294. }
  295. const else_block = {
  296. body: [
  297. gen_modrm_mem_reg_split(
  298. make_instruction_name(encoding, size),
  299. gen_call_fns,
  300. mem_args,
  301. reg_args,
  302. {}
  303. )
  304. ],
  305. };
  306. return [
  307. "int32_t modrm_byte = read_imm8();",
  308. "int32_t prefixes_ = *prefixes;",
  309. {
  310. type: "if-else",
  311. if_blocks,
  312. else_block,
  313. }
  314. ].concat(instruction_postfix);
  315. }
  316. else if(encoding.fixed_g === undefined && encoding.e)
  317. {
  318. // instruction with modrm byte where the middle 3 bits encode a register
  319. console.assert(encodings.length === 1);
  320. const instruction_name = make_instruction_name(encoding, size);
  321. const imm_read = gen_read_imm_call(encoding, size);
  322. if(encoding.ignore_mod)
  323. {
  324. console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
  325. // Has modrm byte, but the 2 mod bits are ignored and both
  326. // operands are always registers (0f20-0f24)
  327. if(encoding.nonfaulting)
  328. {
  329. instruction_postfix.push(APPEND_NONFAULTING_FLAG);
  330. }
  331. return [
  332. "int32_t modrm_byte = read_imm8();",
  333. gen_codegen_call(instruction_name, ["modrm_byte & 7", "modrm_byte >> 3 & 7"]),
  334. ].concat(instruction_postfix);
  335. }
  336. else if(encoding.opcode === 0x8D) // lea
  337. {
  338. const mem_args = ["modrm_byte"];
  339. const reg_args = ["0", "0"];
  340. gen_call_fns.mem_call_fn = gen_custom_jit_call;
  341. return [
  342. "int32_t modrm_byte = read_imm8();",
  343. gen_modrm_mem_reg_split(
  344. instruction_name,
  345. gen_call_fns,
  346. mem_args,
  347. reg_args,
  348. get_nonfaulting_mem_reg_postfix(encoding)
  349. ),
  350. ].concat(instruction_postfix);
  351. }
  352. else
  353. {
  354. const mem_args = ["modrm_byte >> 3 & 7"];
  355. const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
  356. if(imm_read)
  357. {
  358. mem_args.push(imm_read);
  359. reg_args.push(imm_read);
  360. }
  361. if(encoding.custom)
  362. {
  363. // The default mem_call_fn adds a modrm_resolve call, but since we override it,
  364. // we also need to pass it in to our custom function to resolve it however it wishes
  365. mem_args.unshift("modrm_byte");
  366. gen_call_fns.mem_call_fn = gen_custom_jit_call;
  367. gen_call_fns.reg_call_fn = gen_custom_jit_call;
  368. }
  369. return [
  370. "int32_t modrm_byte = read_imm8();",
  371. gen_modrm_mem_reg_split(
  372. instruction_name,
  373. gen_call_fns,
  374. mem_args,
  375. reg_args,
  376. get_nonfaulting_mem_reg_postfix(encoding)
  377. ),
  378. ].concat(instruction_postfix);
  379. }
  380. }
  381. else if(encoding.prefix || encoding.custom)
  382. {
  383. console.assert(!encoding.nonfaulting, "Prefix/custom instructions cannot be marked as nonfaulting.");
  384. const instruction_name = make_instruction_name(encoding, size) + "_jit";
  385. const imm_read = gen_read_imm_call(encoding, size);
  386. const args = [];
  387. if(imm_read)
  388. {
  389. args.push(imm_read);
  390. }
  391. const call_prefix = encoding.prefix ? "instr_flags |= " : "";
  392. // Prefix calls can add to the return flags
  393. return [call_prefix + gen_call(instruction_name, args)].concat(instruction_postfix);
  394. }
  395. else
  396. {
  397. // instruction without modrm byte or prefix
  398. const imm_read = gen_read_imm_call(encoding, size);
  399. const instruction_name = make_instruction_name(encoding, size);
  400. const args = [];
  401. if(imm_read)
  402. {
  403. args.push(imm_read);
  404. }
  405. if(encoding.extra_imm16)
  406. {
  407. console.assert(imm_read);
  408. args.push("read_imm16()");
  409. }
  410. else if(encoding.extra_imm8)
  411. {
  412. console.assert(imm_read);
  413. args.push("read_imm8()");
  414. }
  415. if(encoding.nonfaulting)
  416. {
  417. instruction_postfix.push(APPEND_NONFAULTING_FLAG);
  418. }
  419. return [gen_codegen_call(instruction_name, args)].concat(instruction_postfix);
  420. }
  421. }
  422. function gen_table()
  423. {
  424. let by_opcode = Object.create(null);
  425. let by_opcode0f = Object.create(null);
  426. for(let o of encodings)
  427. {
  428. let opcode = o.opcode;
  429. if(opcode >= 0x100)
  430. {
  431. if((opcode & 0xFF00) === 0x0F00)
  432. {
  433. opcode &= 0xFF;
  434. by_opcode0f[opcode] = by_opcode0f[opcode] || [];
  435. by_opcode0f[opcode].push(o);
  436. }
  437. }
  438. else
  439. {
  440. by_opcode[opcode] = by_opcode[opcode] || [];
  441. by_opcode[opcode].push(o);
  442. }
  443. }
  444. let cases = [];
  445. for(let opcode = 0; opcode < 0x100; opcode++)
  446. {
  447. let encoding = by_opcode[opcode];
  448. console.assert(encoding && encoding.length);
  449. let opcode_hex = hex(opcode, 2);
  450. if(encoding[0].os)
  451. {
  452. cases.push({
  453. conditions: [`0x${opcode_hex}`],
  454. body: gen_instruction_body(encoding, 16),
  455. });
  456. cases.push({
  457. conditions: [`0x${opcode_hex}|0x100`],
  458. body: gen_instruction_body(encoding, 32),
  459. });
  460. }
  461. else
  462. {
  463. cases.push({
  464. conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
  465. body: gen_instruction_body(encoding, undefined),
  466. });
  467. }
  468. }
  469. const table = {
  470. type: "switch",
  471. condition: "opcode",
  472. cases,
  473. default_case: {
  474. body: ["assert(false);"]
  475. },
  476. };
  477. if(to_generate.jit)
  478. {
  479. finalize_table(
  480. OUT_DIR,
  481. "jit",
  482. c_ast.print_syntax_tree([table]).join("\n") + "\n"
  483. );
  484. }
  485. const cases0f_16 = [];
  486. const cases0f_32 = [];
  487. for(let opcode = 0; opcode < 0x100; opcode++)
  488. {
  489. let encoding = by_opcode0f[opcode];
  490. console.assert(encoding && encoding.length);
  491. let opcode_hex = hex(opcode, 2);
  492. if(encoding[0].os)
  493. {
  494. cases0f_16.push({
  495. conditions: [`0x${opcode_hex}`],
  496. body: gen_instruction_body(encoding, 16),
  497. });
  498. cases0f_32.push({
  499. conditions: [`0x${opcode_hex}`],
  500. body: gen_instruction_body(encoding, 32),
  501. });
  502. }
  503. else
  504. {
  505. let block = {
  506. conditions: [`0x${opcode_hex}`],
  507. body: gen_instruction_body(encoding, undefined),
  508. };
  509. cases0f_16.push(block);
  510. cases0f_32.push(block);
  511. }
  512. }
  513. const table0f_16 = {
  514. type: "switch",
  515. condition: "opcode",
  516. cases: cases0f_16,
  517. default_case: {
  518. body: ["assert(false);"]
  519. },
  520. };
  521. const table0f_32 = {
  522. type: "switch",
  523. condition: "opcode",
  524. cases: cases0f_32,
  525. default_case: {
  526. body: ["assert(false);"]
  527. },
  528. };
  529. if(to_generate.jit0f_16)
  530. {
  531. finalize_table(
  532. OUT_DIR,
  533. "jit0f_16",
  534. c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n"
  535. );
  536. }
  537. if(to_generate.jit0f_32)
  538. {
  539. finalize_table(
  540. OUT_DIR,
  541. "jit0f_32",
  542. c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n"
  543. );
  544. }
  545. }