generate_interpreter.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. #!/usr/bin/env node
  2. "use strict";
  3. const assert = require("assert").strict;
  4. const fs = require("fs");
  5. const path = require("path");
  6. const x86_table = require("./x86_table");
  7. const rust_ast = require("./rust_ast");
  8. const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table_rust } = require("./util");
  9. const OUT_DIR = path.join(__dirname, "..", "src/rust/gen/");
  10. mkdirpSync(OUT_DIR);
  11. const table_arg = get_switch_value("--table");
  12. const gen_all = get_switch_exist("--all");
  13. const to_generate = {
  14. interpreter: gen_all || table_arg === "interpreter",
  15. interpreter0f: gen_all || table_arg === "interpreter0f",
  16. };
  17. assert(
  18. Object.keys(to_generate).some(k => to_generate[k]),
  19. "Pass --table [interpreter|interpreter0f] or --all to pick which tables to generate"
  20. );
  21. gen_table();
  22. function wrap_imm_call(imm)
  23. {
  24. return `match ${imm} { Ok(o) => o, Err(()) => return }`;
  25. }
  26. function gen_read_imm_call(op, size_variant)
  27. {
  28. let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
  29. if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
  30. {
  31. if(op.imm8)
  32. {
  33. return wrap_imm_call("read_imm8()");
  34. }
  35. else if(op.imm8s)
  36. {
  37. return wrap_imm_call("read_imm8s()");
  38. }
  39. else
  40. {
  41. if(op.immaddr)
  42. {
  43. // immaddr: depends on address size
  44. return wrap_imm_call("read_moffs()");
  45. }
  46. else
  47. {
  48. assert(op.imm1632 || op.imm16 || op.imm32);
  49. if(op.imm1632 && size === 16 || op.imm16)
  50. {
  51. return wrap_imm_call("read_imm16()");
  52. }
  53. else
  54. {
  55. assert(op.imm1632 && size === 32 || op.imm32);
  56. return wrap_imm_call("read_imm32s()");
  57. }
  58. }
  59. }
  60. }
  61. else
  62. {
  63. return undefined;
  64. }
  65. }
  66. function gen_call(name, args)
  67. {
  68. args = args || [];
  69. return `${name}(${args.join(", ")});`;
  70. }
  71. /*
  72. * Current naming scheme:
  73. * instr(16|32|)_(66|F2|F3)?0F?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
  74. */
  75. function make_instruction_name(encoding, size)
  76. {
  77. const suffix = encoding.os ? String(size) : "";
  78. const opcode_hex = hex(encoding.opcode & 0xFF, 2);
  79. const first_prefix = (encoding.opcode & 0xFF00) === 0 ? "" : hex(encoding.opcode >> 8 & 0xFF, 2);
  80. const second_prefix = (encoding.opcode & 0xFF0000) === 0 ? "" : hex(encoding.opcode >> 16 & 0xFF, 2);
  81. const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
  82. const module = first_prefix === "0F" || second_prefix === "0F" ? "instructions_0f" : "instructions";
  83. assert(first_prefix === "" || first_prefix === "0F" || first_prefix === "F2" || first_prefix === "F3");
  84. assert(second_prefix === "" || second_prefix === "66" || second_prefix === "F2" || second_prefix === "F3");
  85. return `${module}::instr${suffix}_${second_prefix}${first_prefix}${opcode_hex}${fixed_g_suffix}`;
  86. }
  87. function gen_instruction_body(encodings, size)
  88. {
  89. const encoding = encodings[0];
  90. let has_66 = [];
  91. let has_F2 = [];
  92. let has_F3 = [];
  93. let no_prefix = [];
  94. for(let e of encodings)
  95. {
  96. if((e.opcode >>> 16) === 0x66) has_66.push(e);
  97. else if((e.opcode >>> 8 & 0xFF) === 0xF2 || (e.opcode >>> 16) === 0xF2) has_F2.push(e);
  98. else if((e.opcode >>> 8 & 0xFF) === 0xF3 || (e.opcode >>> 16) === 0xF3) has_F3.push(e);
  99. else no_prefix.push(e);
  100. }
  101. if(has_F2.length || has_F3.length)
  102. {
  103. assert((encoding.opcode & 0xFF0000) === 0 || (encoding.opcode & 0xFF00) === 0x0F00);
  104. }
  105. if(has_66.length)
  106. {
  107. assert((encoding.opcode & 0xFF00) === 0x0F00);
  108. }
  109. const code = [];
  110. if(encoding.e)
  111. {
  112. code.push(`let modrm_byte = ${wrap_imm_call("read_imm8()")};`);
  113. }
  114. if(has_66.length || has_F2.length || has_F3.length)
  115. {
  116. const if_blocks = [];
  117. if(has_66.length) {
  118. const body = gen_instruction_body_after_prefix(has_66, size);
  119. if_blocks.push({ condition: "prefixes_ & PREFIX_66 != 0", body, });
  120. }
  121. if(has_F2.length) {
  122. const body = gen_instruction_body_after_prefix(has_F2, size);
  123. if_blocks.push({ condition: "prefixes_ & PREFIX_F2 != 0", body, });
  124. }
  125. if(has_F3.length) {
  126. const body = gen_instruction_body_after_prefix(has_F3, size);
  127. if_blocks.push({ condition: "prefixes_ & PREFIX_F3 != 0", body, });
  128. }
  129. const check_prefixes = encoding.sse ? "(PREFIX_66 | PREFIX_F2 | PREFIX_F3)" : "(PREFIX_F2 | PREFIX_F3)";
  130. const else_block = {
  131. body: [].concat(
  132. "dbg_assert!((prefixes_ & " + check_prefixes + ") == 0);",
  133. gen_instruction_body_after_prefix(no_prefix, size)
  134. )
  135. };
  136. return [].concat(
  137. "let prefixes_ = *prefixes as i32;",
  138. code,
  139. {
  140. type: "if-else",
  141. if_blocks,
  142. else_block,
  143. }
  144. );
  145. }
  146. else {
  147. return [].concat(
  148. code,
  149. gen_instruction_body_after_prefix(encodings, size)
  150. );
  151. }
  152. }
  153. function gen_instruction_body_after_prefix(encodings, size)
  154. {
  155. const encoding = encodings[0];
  156. if(encoding.fixed_g !== undefined)
  157. {
  158. assert(encoding.e);
  159. // instruction with modrm byte where the middle 3 bits encode the instruction
  160. // group by opcode without prefix plus middle bits of modrm byte
  161. let cases = encodings.reduce((cases_by_opcode, case_) => {
  162. assert(typeof case_.fixed_g === "number");
  163. cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
  164. return cases_by_opcode;
  165. }, Object.create(null));
  166. cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
  167. return [
  168. {
  169. type: "switch",
  170. condition: "modrm_byte >> 3 & 7",
  171. cases: cases.map(case_ => {
  172. const fixed_g = case_.fixed_g;
  173. const body = gen_instruction_body_after_fixed_g(case_, size);
  174. return {
  175. conditions: [fixed_g],
  176. body,
  177. };
  178. }),
  179. default_case: {
  180. body: [
  181. `if DEBUG { panic!("Bad instruction at {:x}", *instruction_pointer); }`,
  182. "trigger_ud();",
  183. ],
  184. }
  185. },
  186. ];
  187. }
  188. else {
  189. assert(encodings.length === 1);
  190. return gen_instruction_body_after_fixed_g(encodings[0], size);
  191. }
  192. }
  193. function gen_instruction_body_after_fixed_g(encoding, size)
  194. {
  195. const instruction_prefix = [];
  196. const instruction_postfix =
  197. (encoding.block_boundary && !encoding.no_block_boundary_in_interpreted) ||
  198. (!encoding.custom && encoding.e) ?
  199. ["after_block_boundary();"] : [];
  200. if(encoding.task_switch_test || encoding.sse)
  201. {
  202. instruction_prefix.push(
  203. {
  204. type: "if-else",
  205. if_blocks: [
  206. {
  207. condition: encoding.sse ? "!task_switch_test_mmx()" : "!task_switch_test()",
  208. body: ["return;"],
  209. }
  210. ],
  211. });
  212. }
  213. const imm_read = gen_read_imm_call(encoding, size);
  214. const instruction_name = make_instruction_name(encoding, size);
  215. if(encoding.e)
  216. {
  217. // instruction with modrm byte
  218. const imm_read = gen_read_imm_call(encoding, size);
  219. if(encoding.ignore_mod)
  220. {
  221. assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
  222. // Has modrm byte, but the 2 mod bits are ignored and both
  223. // operands are always registers (0f20-0f24)
  224. return [].concat(
  225. instruction_prefix,
  226. gen_call(instruction_name, ["modrm_byte & 7", "modrm_byte >> 3 & 7"]),
  227. instruction_postfix
  228. );
  229. }
  230. else
  231. {
  232. let mem_args;
  233. if(encoding.custom_modrm_resolve)
  234. {
  235. // requires special handling around modrm_resolve
  236. mem_args = ["modrm_byte"];
  237. }
  238. else
  239. {
  240. mem_args = ["match modrm_resolve(modrm_byte) { Ok(a) => a, Err(()) => return }"];
  241. }
  242. const reg_args = ["modrm_byte & 7"];
  243. if(encoding.fixed_g === undefined)
  244. {
  245. mem_args.push("modrm_byte >> 3 & 7");
  246. reg_args.push("modrm_byte >> 3 & 7");
  247. }
  248. if(imm_read)
  249. {
  250. mem_args.push(imm_read);
  251. reg_args.push(imm_read);
  252. }
  253. return [].concat(
  254. instruction_prefix,
  255. {
  256. type: "if-else",
  257. if_blocks: [
  258. {
  259. condition: "modrm_byte < 0xC0",
  260. body: [].concat(
  261. gen_call(`${instruction_name}_mem`, mem_args)
  262. ),
  263. }
  264. ],
  265. else_block: {
  266. body: [gen_call(`${instruction_name}_reg`, reg_args)],
  267. },
  268. },
  269. instruction_postfix
  270. );
  271. }
  272. }
  273. else
  274. {
  275. const args = [];
  276. if(imm_read)
  277. {
  278. args.push(imm_read);
  279. }
  280. if(encoding.extra_imm16)
  281. {
  282. assert(imm_read);
  283. args.push(wrap_imm_call("read_imm16()"));
  284. }
  285. else if(encoding.extra_imm8)
  286. {
  287. assert(imm_read);
  288. args.push(wrap_imm_call("read_imm8()"));
  289. }
  290. return [].concat(
  291. instruction_prefix,
  292. gen_call(instruction_name, args),
  293. instruction_postfix
  294. );
  295. }
  296. }
  297. function gen_table()
  298. {
  299. let by_opcode = Object.create(null);
  300. let by_opcode0f = Object.create(null);
  301. for(let o of x86_table)
  302. {
  303. let opcode = o.opcode;
  304. if((opcode & 0xFF00) === 0x0F00)
  305. {
  306. opcode &= 0xFF;
  307. by_opcode0f[opcode] = by_opcode0f[opcode] || [];
  308. by_opcode0f[opcode].push(o);
  309. }
  310. else
  311. {
  312. opcode &= 0xFF;
  313. by_opcode[opcode] = by_opcode[opcode] || [];
  314. by_opcode[opcode].push(o);
  315. }
  316. }
  317. let cases = [];
  318. for(let opcode = 0; opcode < 0x100; opcode++)
  319. {
  320. let encoding = by_opcode[opcode];
  321. assert(encoding && encoding.length);
  322. let opcode_hex = hex(opcode, 2);
  323. let opcode_high_hex = hex(opcode | 0x100, 2);
  324. if(encoding[0].os)
  325. {
  326. cases.push({
  327. conditions: [`0x${opcode_hex}`],
  328. body: gen_instruction_body(encoding, 16),
  329. });
  330. cases.push({
  331. conditions: [`0x${opcode_high_hex}`],
  332. body: gen_instruction_body(encoding, 32),
  333. });
  334. }
  335. else
  336. {
  337. cases.push({
  338. conditions: [`0x${opcode_hex}`, `0x${opcode_high_hex}`],
  339. body: gen_instruction_body(encoding, undefined),
  340. });
  341. }
  342. }
  343. const table = {
  344. type: "switch",
  345. condition: "opcode",
  346. cases,
  347. default_case: {
  348. body: ["assert!(false);"]
  349. },
  350. };
  351. if(to_generate.interpreter)
  352. {
  353. const code = [
  354. "#![cfg_attr(rustfmt, rustfmt_skip)]",
  355. "use cpu::cpu::{after_block_boundary, modrm_resolve};",
  356. "use cpu::cpu::{read_imm8, read_imm8s, read_imm16, read_imm32s, read_moffs};",
  357. "use cpu::cpu::{task_switch_test, trigger_ud, DEBUG, PREFIX_F2, PREFIX_F3};",
  358. "use cpu::instructions;",
  359. "use cpu::global_pointers::{instruction_pointer, prefixes};",
  360. "pub unsafe fn run(opcode: u32) {",
  361. table,
  362. "}",
  363. ];
  364. finalize_table_rust(
  365. OUT_DIR,
  366. "interpreter.rs",
  367. rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
  368. );
  369. }
  370. const cases0f = [];
  371. for(let opcode = 0; opcode < 0x100; opcode++)
  372. {
  373. let encoding = by_opcode0f[opcode];
  374. assert(encoding && encoding.length);
  375. let opcode_hex = hex(opcode, 2);
  376. let opcode_high_hex = hex(opcode | 0x100, 2);
  377. if(encoding[0].os)
  378. {
  379. cases0f.push({
  380. conditions: [`0x${opcode_hex}`],
  381. body: gen_instruction_body(encoding, 16),
  382. });
  383. cases0f.push({
  384. conditions: [`0x${opcode_high_hex}`],
  385. body: gen_instruction_body(encoding, 32),
  386. });
  387. }
  388. else
  389. {
  390. let block = {
  391. conditions: [`0x${opcode_hex}`, `0x${opcode_high_hex}`],
  392. body: gen_instruction_body(encoding, undefined),
  393. };
  394. cases0f.push(block);
  395. }
  396. }
  397. const table0f = {
  398. type: "switch",
  399. condition: "opcode",
  400. cases: cases0f,
  401. default_case: {
  402. body: ["assert!(false);"]
  403. },
  404. };
  405. if(to_generate.interpreter0f)
  406. {
  407. const code = [
  408. "#![cfg_attr(rustfmt, rustfmt_skip)]",
  409. "use cpu::cpu::{after_block_boundary, modrm_resolve};",
  410. "use cpu::cpu::{read_imm8, read_imm16, read_imm32s};",
  411. "use cpu::cpu::{task_switch_test, task_switch_test_mmx, trigger_ud};",
  412. "use cpu::cpu::{DEBUG, PREFIX_66, PREFIX_F2, PREFIX_F3};",
  413. "use cpu::instructions_0f;",
  414. "use cpu::global_pointers::{instruction_pointer, prefixes};",
  415. "pub unsafe fn run(opcode: u32) {",
  416. table0f,
  417. "}",
  418. ];
  419. finalize_table_rust(
  420. OUT_DIR,
  421. "interpreter0f.rs",
  422. rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
  423. );
  424. }
  425. }