generate_jit.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. #!/usr/bin/env node
  2. "use strict";
  3. const assert = require("assert").strict;
  4. const fs = require("fs");
  5. const path = require("path");
  6. const x86_table = require("./x86_table");
  7. const rust_ast = require("./rust_ast");
  8. const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table_rust } = require("./util");
  9. const OUT_DIR = path.join(__dirname, "..", "src/rust/gen/");
  10. mkdirpSync(OUT_DIR);
  11. const table_arg = get_switch_value("--table");
  12. const gen_all = get_switch_exist("--all");
  13. const to_generate = {
  14. jit: gen_all || table_arg === "jit",
  15. jit0f: gen_all || table_arg === "jit0f",
  16. };
  17. assert(
  18. Object.keys(to_generate).some(k => to_generate[k]),
  19. "Pass --table [jit|jit0f] or --all to pick which tables to generate"
  20. );
  21. gen_table();
  22. function gen_read_imm_call(op, size_variant)
  23. {
  24. let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
  25. if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
  26. {
  27. if(op.imm8)
  28. {
  29. return "ctx.cpu.read_imm8()";
  30. }
  31. else if(op.imm8s)
  32. {
  33. return "ctx.cpu.read_imm8s()";
  34. }
  35. else
  36. {
  37. if(op.immaddr)
  38. {
  39. // immaddr: depends on address size
  40. return "ctx.cpu.read_moffs()";
  41. }
  42. else
  43. {
  44. assert(op.imm1632 || op.imm16 || op.imm32);
  45. if(op.imm1632 && size === 16 || op.imm16)
  46. {
  47. return "ctx.cpu.read_imm16()";
  48. }
  49. else
  50. {
  51. assert(op.imm1632 && size === 32 || op.imm32);
  52. return "ctx.cpu.read_imm32()";
  53. }
  54. }
  55. }
  56. }
  57. else
  58. {
  59. return undefined;
  60. }
  61. }
  62. function gen_call(name, args)
  63. {
  64. args = args || [];
  65. return `${name}(${args.join(", ")});`;
  66. }
  67. /*
  68. * Current naming scheme:
  69. * instr(16|32|)_(66|F2|F3)?0F?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
  70. */
  71. function make_instruction_name(encoding, size)
  72. {
  73. const suffix = encoding.os ? String(size) : "";
  74. const opcode_hex = hex(encoding.opcode & 0xFF, 2);
  75. const first_prefix = (encoding.opcode & 0xFF00) === 0 ? "" : hex(encoding.opcode >> 8 & 0xFF, 2);
  76. const second_prefix = (encoding.opcode & 0xFF0000) === 0 ? "" : hex(encoding.opcode >> 16 & 0xFF, 2);
  77. const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
  78. assert(first_prefix === "" || first_prefix === "0F" || first_prefix === "F2" || first_prefix === "F3");
  79. assert(second_prefix === "" || second_prefix === "66" || second_prefix === "F2" || second_prefix === "F3");
  80. return `instr${suffix}_${second_prefix}${first_prefix}${opcode_hex}${fixed_g_suffix}`;
  81. }
  82. function gen_instruction_body(encodings, size)
  83. {
  84. const encoding = encodings[0];
  85. let has_66 = [];
  86. let has_F2 = [];
  87. let has_F3 = [];
  88. let no_prefix = [];
  89. for(let e of encodings)
  90. {
  91. if((e.opcode >>> 16) === 0x66) has_66.push(e);
  92. else if((e.opcode >>> 8 & 0xFF) === 0xF2 || (e.opcode >>> 16) === 0xF2) has_F2.push(e);
  93. else if((e.opcode >>> 8 & 0xFF) === 0xF3 || (e.opcode >>> 16) === 0xF3) has_F3.push(e);
  94. else no_prefix.push(e);
  95. }
  96. if(has_F2.length || has_F3.length)
  97. {
  98. assert((encoding.opcode & 0xFF0000) === 0 || (encoding.opcode & 0xFF00) === 0x0F00);
  99. }
  100. if(has_66.length)
  101. {
  102. assert((encoding.opcode & 0xFF00) === 0x0F00);
  103. }
  104. const code = [];
  105. if(encoding.e)
  106. {
  107. code.push("let modrm_byte = ctx.cpu.read_imm8();");
  108. }
  109. if(has_66.length || has_F2.length || has_F3.length)
  110. {
  111. const if_blocks = [];
  112. if(has_66.length) {
  113. const body = gen_instruction_body_after_prefix(has_66, size);
  114. if_blocks.push({ condition: "ctx.cpu.prefixes & ::prefix::PREFIX_66 != 0", body, });
  115. }
  116. if(has_F2.length) {
  117. const body = gen_instruction_body_after_prefix(has_F2, size);
  118. if_blocks.push({ condition: "ctx.cpu.prefixes & ::prefix::PREFIX_F2 != 0", body, });
  119. }
  120. if(has_F3.length) {
  121. const body = gen_instruction_body_after_prefix(has_F3, size);
  122. if_blocks.push({ condition: "ctx.cpu.prefixes & ::prefix::PREFIX_F3 != 0", body, });
  123. }
  124. const else_block = {
  125. body: gen_instruction_body_after_prefix(no_prefix, size),
  126. };
  127. return [].concat(
  128. code,
  129. {
  130. type: "if-else",
  131. if_blocks,
  132. else_block,
  133. }
  134. );
  135. }
  136. else {
  137. return [].concat(
  138. code,
  139. gen_instruction_body_after_prefix(encodings, size)
  140. );
  141. }
  142. }
  143. function gen_instruction_body_after_prefix(encodings, size)
  144. {
  145. const encoding = encodings[0];
  146. if(encoding.fixed_g !== undefined)
  147. {
  148. assert(encoding.e);
  149. // instruction with modrm byte where the middle 3 bits encode the instruction
  150. // group by opcode without prefix plus middle bits of modrm byte
  151. let cases = encodings.reduce((cases_by_opcode, case_) => {
  152. assert(typeof case_.fixed_g === "number");
  153. cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
  154. return cases_by_opcode;
  155. }, Object.create(null));
  156. cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
  157. return [
  158. {
  159. type: "switch",
  160. condition: "modrm_byte >> 3 & 7",
  161. cases: cases.map(case_ => {
  162. const fixed_g = case_.fixed_g;
  163. const body = gen_instruction_body_after_fixed_g(case_, size);
  164. return {
  165. conditions: [fixed_g],
  166. body,
  167. };
  168. }),
  169. default_case: {
  170. body: [].concat(
  171. gen_call(`::codegen::gen_trigger_ud`, ["ctx"]),
  172. "*instr_flags |= ::jit::JIT_INSTR_BLOCK_BOUNDARY_FLAG;"
  173. ),
  174. }
  175. },
  176. ];
  177. }
  178. else {
  179. assert(encodings.length === 1);
  180. return gen_instruction_body_after_fixed_g(encodings[0], size);
  181. }
  182. }
  183. function gen_instruction_body_after_fixed_g(encoding, size)
  184. {
  185. const instruction_postfix = [];
  186. if(encoding.block_boundary || (!encoding.custom && encoding.e))
  187. {
  188. instruction_postfix.push("*instr_flags |= ::jit::JIT_INSTR_BLOCK_BOUNDARY_FLAG;");
  189. }
  190. const instruction_prefix = [];
  191. if(encoding.task_switch_test || encoding.sse)
  192. {
  193. instruction_prefix.push(
  194. gen_call(encoding.sse ? "::codegen::gen_task_switch_test_mmx" : "::codegen::gen_task_switch_test", ["ctx"])
  195. );
  196. }
  197. const imm_read = gen_read_imm_call(encoding, size);
  198. const imm_read_bindings = [];
  199. if(imm_read)
  200. {
  201. imm_read_bindings.push(`let imm = ${imm_read} as u32;`);
  202. }
  203. const instruction_name = make_instruction_name(encoding, size);
  204. if(!encoding.prefix)
  205. {
  206. if(encoding.custom)
  207. {
  208. }
  209. else
  210. {
  211. instruction_prefix.push(
  212. gen_call("::codegen::gen_move_registers_from_locals_to_memory", ["ctx"])
  213. );
  214. instruction_postfix.push(
  215. gen_call("::codegen::gen_move_registers_from_memory_to_locals", ["ctx"])
  216. );
  217. }
  218. }
  219. if(encoding.e)
  220. {
  221. const reg_postfix = [];
  222. const mem_postfix = [];
  223. if(encoding.mem_ud)
  224. {
  225. mem_postfix.push(
  226. "*instr_flags |= ::jit::JIT_INSTR_BLOCK_BOUNDARY_FLAG;"
  227. );
  228. }
  229. if(encoding.reg_ud)
  230. {
  231. reg_postfix.push(
  232. "*instr_flags |= ::jit::JIT_INSTR_BLOCK_BOUNDARY_FLAG;"
  233. );
  234. }
  235. if(encoding.ignore_mod)
  236. {
  237. assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
  238. // Has modrm byte, but the 2 mod bits are ignored and both
  239. // operands are always registers (0f20-0f24)
  240. const args = ["ctx.builder", `"${instruction_name}"`, "(modrm_byte & 7) as u32", "(modrm_byte >> 3 & 7) as u32"];
  241. return [].concat(
  242. instruction_prefix,
  243. gen_call(`::codegen::gen_fn${args.length - 2}_const`, args),
  244. reg_postfix,
  245. instruction_postfix
  246. );
  247. }
  248. else if(encoding.custom)
  249. {
  250. const mem_args = ["ctx", "addr"];
  251. const reg_args = ["ctx", "(modrm_byte & 7) as u32"];
  252. if(encoding.fixed_g === undefined)
  253. {
  254. mem_args.push("(modrm_byte >> 3 & 7) as u32");
  255. reg_args.push("(modrm_byte >> 3 & 7) as u32");
  256. }
  257. if(imm_read)
  258. {
  259. mem_args.push("imm");
  260. reg_args.push("imm");
  261. }
  262. return [].concat(
  263. instruction_prefix,
  264. {
  265. type: "if-else",
  266. if_blocks: [{
  267. condition: "modrm_byte < 0xC0",
  268. body: [].concat(
  269. "let addr = ::modrm::decode(ctx.cpu, modrm_byte);",
  270. imm_read_bindings,
  271. gen_call(`::jit_instructions::${instruction_name}_mem_jit`, mem_args),
  272. mem_postfix
  273. ),
  274. }],
  275. else_block: {
  276. body: [].concat(
  277. imm_read_bindings,
  278. gen_call(`::jit_instructions::${instruction_name}_reg_jit`, reg_args),
  279. reg_postfix
  280. ),
  281. },
  282. },
  283. instruction_postfix
  284. );
  285. }
  286. else
  287. {
  288. const mem_args = ["ctx.builder", `"${instruction_name}_mem"`];
  289. const reg_args = ["ctx.builder", `"${instruction_name}_reg"`, "(modrm_byte & 7) as u32"];
  290. if(encoding.fixed_g === undefined)
  291. {
  292. mem_args.push("(modrm_byte >> 3 & 7) as u32");
  293. reg_args.push("(modrm_byte >> 3 & 7) as u32");
  294. }
  295. if(imm_read)
  296. {
  297. mem_args.push("imm");
  298. reg_args.push("imm");
  299. }
  300. return [].concat(
  301. instruction_prefix,
  302. {
  303. type: "if-else",
  304. if_blocks: [{
  305. condition: "modrm_byte < 0xC0",
  306. body: [].concat(
  307. "let addr = ::modrm::decode(ctx.cpu, modrm_byte);",
  308. gen_call(`::codegen::gen_modrm_resolve`, ["ctx", "addr"]),
  309. imm_read_bindings,
  310. gen_call(`::codegen::gen_modrm_fn${mem_args.length - 2}`, mem_args),
  311. mem_postfix
  312. ),
  313. }],
  314. else_block: {
  315. body: [].concat(
  316. imm_read_bindings,
  317. gen_call(`::codegen::gen_fn${reg_args.length - 2}_const`, reg_args),
  318. reg_postfix
  319. ),
  320. },
  321. },
  322. instruction_postfix
  323. );
  324. }
  325. }
  326. else if(encoding.prefix || encoding.custom)
  327. {
  328. // custom, but not modrm
  329. const args = ["ctx"];
  330. if(imm_read)
  331. {
  332. args.push("imm");
  333. }
  334. if(encoding.prefix)
  335. {
  336. args.push("instr_flags");
  337. }
  338. return [].concat(
  339. instruction_prefix,
  340. imm_read_bindings,
  341. gen_call(`::jit_instructions::${instruction_name}_jit`, args),
  342. instruction_postfix
  343. );
  344. }
  345. else
  346. {
  347. // instruction without modrm byte or prefix
  348. const args = ["ctx.builder", `"${instruction_name}"`];
  349. if(imm_read)
  350. {
  351. args.push("imm");
  352. }
  353. if(encoding.extra_imm16)
  354. {
  355. assert(imm_read);
  356. imm_read_bindings.push(`let imm2 = ctx.cpu.read_imm16() as u32;`);
  357. args.push("imm2");
  358. }
  359. else if(encoding.extra_imm8)
  360. {
  361. assert(imm_read);
  362. imm_read_bindings.push(`let imm2 = ctx.cpu.read_imm8() as u32;`);
  363. args.push("imm2");
  364. }
  365. return [].concat(
  366. instruction_prefix,
  367. imm_read_bindings,
  368. gen_call(`::codegen::gen_fn${args.length - 2}_const`, args),
  369. instruction_postfix
  370. );
  371. }
  372. }
  373. function gen_table()
  374. {
  375. let by_opcode = Object.create(null);
  376. let by_opcode0f = Object.create(null);
  377. for(let o of x86_table)
  378. {
  379. let opcode = o.opcode;
  380. if((opcode & 0xFF00) === 0x0F00)
  381. {
  382. opcode &= 0xFF;
  383. by_opcode0f[opcode] = by_opcode0f[opcode] || [];
  384. by_opcode0f[opcode].push(o);
  385. }
  386. else
  387. {
  388. opcode &= 0xFF;
  389. by_opcode[opcode] = by_opcode[opcode] || [];
  390. by_opcode[opcode].push(o);
  391. }
  392. }
  393. let cases = [];
  394. for(let opcode = 0; opcode < 0x100; opcode++)
  395. {
  396. let encoding = by_opcode[opcode];
  397. assert(encoding && encoding.length);
  398. let opcode_hex = hex(opcode, 2);
  399. let opcode_high_hex = hex(opcode | 0x100, 2);
  400. if(encoding[0].os)
  401. {
  402. cases.push({
  403. conditions: [`0x${opcode_hex}`],
  404. body: gen_instruction_body(encoding, 16),
  405. });
  406. cases.push({
  407. conditions: [`0x${opcode_high_hex}`],
  408. body: gen_instruction_body(encoding, 32),
  409. });
  410. }
  411. else
  412. {
  413. cases.push({
  414. conditions: [`0x${opcode_hex}`, `0x${opcode_high_hex}`],
  415. body: gen_instruction_body(encoding, undefined),
  416. });
  417. }
  418. }
  419. const table = {
  420. type: "switch",
  421. condition: "opcode",
  422. cases,
  423. default_case: {
  424. body: ["assert!(false);"]
  425. },
  426. };
  427. if(to_generate.jit)
  428. {
  429. const code = [
  430. "#[cfg_attr(rustfmt, rustfmt_skip)]",
  431. "pub fn jit(opcode: u32, ctx: &mut ::jit::JitContext, instr_flags: &mut u32) {",
  432. table,
  433. "}",
  434. ];
  435. finalize_table_rust(
  436. OUT_DIR,
  437. "jit.rs",
  438. rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
  439. );
  440. }
  441. const cases0f = [];
  442. for(let opcode = 0; opcode < 0x100; opcode++)
  443. {
  444. let encoding = by_opcode0f[opcode];
  445. assert(encoding && encoding.length);
  446. let opcode_hex = hex(opcode, 2);
  447. let opcode_high_hex = hex(opcode | 0x100, 2);
  448. if(encoding[0].os)
  449. {
  450. cases0f.push({
  451. conditions: [`0x${opcode_hex}`],
  452. body: gen_instruction_body(encoding, 16),
  453. });
  454. cases0f.push({
  455. conditions: [`0x${opcode_high_hex}`],
  456. body: gen_instruction_body(encoding, 32),
  457. });
  458. }
  459. else
  460. {
  461. let block = {
  462. conditions: [`0x${opcode_hex}`, `0x${opcode_high_hex}`],
  463. body: gen_instruction_body(encoding, undefined),
  464. };
  465. cases0f.push(block);
  466. }
  467. }
  468. const table0f = {
  469. type: "switch",
  470. condition: "opcode",
  471. cases: cases0f,
  472. default_case: {
  473. body: ["assert!(false);"]
  474. },
  475. };
  476. if(to_generate.jit0f)
  477. {
  478. const code = [
  479. "#[cfg_attr(rustfmt, rustfmt_skip)]",
  480. "pub fn jit(opcode: u32, ctx: &mut ::jit::JitContext, instr_flags: &mut u32) {",
  481. table0f,
  482. "}",
  483. ];
  484. finalize_table_rust(
  485. OUT_DIR,
  486. "jit0f.rs",
  487. rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
  488. );
  489. }
  490. }