generate_interpreter.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. #!/usr/bin/env node
  2. "use strict";
  3. const assert = require("assert").strict;
  4. const fs = require("fs");
  5. const path = require("path");
  6. const x86_table = require("./x86_table");
  7. const rust_ast = require("./rust_ast");
  8. const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table_rust } = require("./util");
  9. const OUT_DIR = path.join(__dirname, "..", "src/rust/gen/");
  10. mkdirpSync(OUT_DIR);
  11. const table_arg = get_switch_value("--table");
  12. const gen_all = get_switch_exist("--all");
  13. const to_generate = {
  14. interpreter: gen_all || table_arg === "interpreter",
  15. interpreter0f: gen_all || table_arg === "interpreter0f",
  16. };
  17. assert(
  18. Object.keys(to_generate).some(k => to_generate[k]),
  19. "Pass --table [interpreter|interpreter0f] or --all to pick which tables to generate"
  20. );
  21. gen_table();
  22. function wrap_imm_call(imm)
  23. {
  24. return `match ${imm} { Ok(o) => o, Err(()) => return }`;
  25. }
  26. function gen_read_imm_call(op, size_variant)
  27. {
  28. let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
  29. if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
  30. {
  31. if(op.imm8)
  32. {
  33. return wrap_imm_call("read_imm8()");
  34. }
  35. else if(op.imm8s)
  36. {
  37. return wrap_imm_call("read_imm8s()");
  38. }
  39. else
  40. {
  41. if(op.immaddr)
  42. {
  43. // immaddr: depends on address size
  44. return wrap_imm_call("read_moffs()");
  45. }
  46. else
  47. {
  48. assert(op.imm1632 || op.imm16 || op.imm32);
  49. if(op.imm1632 && size === 16 || op.imm16)
  50. {
  51. return wrap_imm_call("read_imm16()");
  52. }
  53. else
  54. {
  55. assert(op.imm1632 && size === 32 || op.imm32);
  56. return wrap_imm_call("read_imm32s()");
  57. }
  58. }
  59. }
  60. }
  61. else
  62. {
  63. return undefined;
  64. }
  65. }
  66. function gen_call(name, args)
  67. {
  68. args = args || [];
  69. return `${name}(${args.join(", ")});`;
  70. }
  71. /*
  72. * Current naming scheme:
  73. * instr(16|32|)_(66|F2|F3)?0F?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
  74. */
  75. function make_instruction_name(encoding, size)
  76. {
  77. const suffix = encoding.os ? String(size) : "";
  78. const opcode_hex = hex(encoding.opcode & 0xFF, 2);
  79. const first_prefix = (encoding.opcode & 0xFF00) === 0 ? "" : hex(encoding.opcode >> 8 & 0xFF, 2);
  80. const second_prefix = (encoding.opcode & 0xFF0000) === 0 ? "" : hex(encoding.opcode >> 16 & 0xFF, 2);
  81. const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
  82. assert(first_prefix === "" || first_prefix === "0F" || first_prefix === "F2" || first_prefix === "F3");
  83. assert(second_prefix === "" || second_prefix === "66" || second_prefix === "F2" || second_prefix === "F3");
  84. return `instr${suffix}_${second_prefix}${first_prefix}${opcode_hex}${fixed_g_suffix}`;
  85. }
  86. function gen_instruction_body(encodings, size)
  87. {
  88. const encoding = encodings[0];
  89. let has_66 = [];
  90. let has_F2 = [];
  91. let has_F3 = [];
  92. let no_prefix = [];
  93. for(let e of encodings)
  94. {
  95. if((e.opcode >>> 16) === 0x66) has_66.push(e);
  96. else if((e.opcode >>> 8 & 0xFF) === 0xF2 || (e.opcode >>> 16) === 0xF2) has_F2.push(e);
  97. else if((e.opcode >>> 8 & 0xFF) === 0xF3 || (e.opcode >>> 16) === 0xF3) has_F3.push(e);
  98. else no_prefix.push(e);
  99. }
  100. if(has_F2.length || has_F3.length)
  101. {
  102. assert((encoding.opcode & 0xFF0000) === 0 || (encoding.opcode & 0xFF00) === 0x0F00);
  103. }
  104. if(has_66.length)
  105. {
  106. assert((encoding.opcode & 0xFF00) === 0x0F00);
  107. }
  108. const code = [];
  109. if(encoding.e)
  110. {
  111. code.push(`let modrm_byte = ${wrap_imm_call("read_imm8()")};`);
  112. }
  113. if(has_66.length || has_F2.length || has_F3.length)
  114. {
  115. const if_blocks = [];
  116. if(has_66.length) {
  117. const body = gen_instruction_body_after_prefix(has_66, size);
  118. if_blocks.push({ condition: "prefixes_ & PREFIX_66 != 0", body, });
  119. }
  120. if(has_F2.length) {
  121. const body = gen_instruction_body_after_prefix(has_F2, size);
  122. if_blocks.push({ condition: "prefixes_ & PREFIX_F2 != 0", body, });
  123. }
  124. if(has_F3.length) {
  125. const body = gen_instruction_body_after_prefix(has_F3, size);
  126. if_blocks.push({ condition: "prefixes_ & PREFIX_F3 != 0", body, });
  127. }
  128. const check_prefixes = encoding.sse ? "(PREFIX_66 | PREFIX_F2 | PREFIX_F3)" : "(PREFIX_F2 | PREFIX_F3)";
  129. const else_block = {
  130. body: [].concat(
  131. "dbg_assert!((prefixes_ & " + check_prefixes + ") == 0);",
  132. gen_instruction_body_after_prefix(no_prefix, size)
  133. )
  134. };
  135. return [].concat(
  136. "let prefixes_ = *prefixes as i32;",
  137. code,
  138. {
  139. type: "if-else",
  140. if_blocks,
  141. else_block,
  142. }
  143. );
  144. }
  145. else {
  146. return [].concat(
  147. code,
  148. gen_instruction_body_after_prefix(encodings, size)
  149. );
  150. }
  151. }
  152. function gen_instruction_body_after_prefix(encodings, size)
  153. {
  154. const encoding = encodings[0];
  155. if(encoding.fixed_g !== undefined)
  156. {
  157. assert(encoding.e);
  158. // instruction with modrm byte where the middle 3 bits encode the instruction
  159. // group by opcode without prefix plus middle bits of modrm byte
  160. let cases = encodings.reduce((cases_by_opcode, case_) => {
  161. assert(typeof case_.fixed_g === "number");
  162. cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
  163. return cases_by_opcode;
  164. }, Object.create(null));
  165. cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
  166. return [
  167. {
  168. type: "switch",
  169. condition: "modrm_byte >> 3 & 7",
  170. cases: cases.map(case_ => {
  171. const fixed_g = case_.fixed_g;
  172. const body = gen_instruction_body_after_fixed_g(case_, size);
  173. return {
  174. conditions: [fixed_g],
  175. body,
  176. };
  177. }),
  178. default_case: {
  179. body: [
  180. `if DEBUG { panic!("Bad instruction at {:x}", *instruction_pointer); }`,
  181. "trigger_ud();",
  182. ],
  183. }
  184. },
  185. ];
  186. }
  187. else {
  188. assert(encodings.length === 1);
  189. return gen_instruction_body_after_fixed_g(encodings[0], size);
  190. }
  191. }
  192. function gen_instruction_body_after_fixed_g(encoding, size)
  193. {
  194. const instruction_prefix = [];
  195. const instruction_postfix =
  196. (encoding.block_boundary && !encoding.no_block_boundary_in_interpreted) ||
  197. (!encoding.custom && encoding.e) ?
  198. ["after_block_boundary();"] : [];
  199. if(encoding.task_switch_test || encoding.sse)
  200. {
  201. instruction_prefix.push(
  202. {
  203. type: "if-else",
  204. if_blocks: [
  205. {
  206. condition: encoding.sse ? "!task_switch_test_mmx()" : "!task_switch_test()",
  207. body: ["return;"],
  208. }
  209. ],
  210. });
  211. }
  212. const imm_read = gen_read_imm_call(encoding, size);
  213. const instruction_name = make_instruction_name(encoding, size);
  214. if(encoding.e)
  215. {
  216. // instruction with modrm byte
  217. const imm_read = gen_read_imm_call(encoding, size);
  218. if(encoding.ignore_mod)
  219. {
  220. assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
  221. // Has modrm byte, but the 2 mod bits are ignored and both
  222. // operands are always registers (0f20-0f24)
  223. return [].concat(
  224. instruction_prefix,
  225. gen_call(instruction_name, ["modrm_byte & 7", "modrm_byte >> 3 & 7"]),
  226. instruction_postfix
  227. );
  228. }
  229. else
  230. {
  231. let mem_args;
  232. if(encoding.custom_modrm_resolve)
  233. {
  234. // requires special handling around modrm_resolve
  235. mem_args = ["modrm_byte"];
  236. }
  237. else
  238. {
  239. mem_args = ["match modrm_resolve(modrm_byte) { Ok(a) => a, Err(()) => return }"];
  240. }
  241. const reg_args = ["modrm_byte & 7"];
  242. if(encoding.fixed_g === undefined)
  243. {
  244. mem_args.push("modrm_byte >> 3 & 7");
  245. reg_args.push("modrm_byte >> 3 & 7");
  246. }
  247. if(imm_read)
  248. {
  249. mem_args.push(imm_read);
  250. reg_args.push(imm_read);
  251. }
  252. return [].concat(
  253. instruction_prefix,
  254. {
  255. type: "if-else",
  256. if_blocks: [
  257. {
  258. condition: "modrm_byte < 0xC0",
  259. body: [].concat(
  260. gen_call(`${instruction_name}_mem`, mem_args)
  261. ),
  262. }
  263. ],
  264. else_block: {
  265. body: [gen_call(`${instruction_name}_reg`, reg_args)],
  266. },
  267. },
  268. instruction_postfix
  269. );
  270. }
  271. }
  272. else
  273. {
  274. const args = [];
  275. if(imm_read)
  276. {
  277. args.push(imm_read);
  278. }
  279. if(encoding.extra_imm16)
  280. {
  281. assert(imm_read);
  282. args.push(wrap_imm_call("read_imm16()"));
  283. }
  284. else if(encoding.extra_imm8)
  285. {
  286. assert(imm_read);
  287. args.push(wrap_imm_call("read_imm8()"));
  288. }
  289. return [].concat(
  290. instruction_prefix,
  291. gen_call(instruction_name, args),
  292. instruction_postfix
  293. );
  294. }
  295. }
  296. function gen_table()
  297. {
  298. let by_opcode = Object.create(null);
  299. let by_opcode0f = Object.create(null);
  300. for(let o of x86_table)
  301. {
  302. let opcode = o.opcode;
  303. if((opcode & 0xFF00) === 0x0F00)
  304. {
  305. opcode &= 0xFF;
  306. by_opcode0f[opcode] = by_opcode0f[opcode] || [];
  307. by_opcode0f[opcode].push(o);
  308. }
  309. else
  310. {
  311. opcode &= 0xFF;
  312. by_opcode[opcode] = by_opcode[opcode] || [];
  313. by_opcode[opcode].push(o);
  314. }
  315. }
  316. let cases = [];
  317. for(let opcode = 0; opcode < 0x100; opcode++)
  318. {
  319. let encoding = by_opcode[opcode];
  320. assert(encoding && encoding.length);
  321. let opcode_hex = hex(opcode, 2);
  322. let opcode_high_hex = hex(opcode | 0x100, 2);
  323. if(encoding[0].os)
  324. {
  325. cases.push({
  326. conditions: [`0x${opcode_hex}`],
  327. body: gen_instruction_body(encoding, 16),
  328. });
  329. cases.push({
  330. conditions: [`0x${opcode_high_hex}`],
  331. body: gen_instruction_body(encoding, 32),
  332. });
  333. }
  334. else
  335. {
  336. cases.push({
  337. conditions: [`0x${opcode_hex}`, `0x${opcode_high_hex}`],
  338. body: gen_instruction_body(encoding, undefined),
  339. });
  340. }
  341. }
  342. const table = {
  343. type: "switch",
  344. condition: "opcode",
  345. cases,
  346. default_case: {
  347. body: ["assert!(false);"]
  348. },
  349. };
  350. if(to_generate.interpreter)
  351. {
  352. const code = [
  353. "#![cfg_attr(rustfmt, rustfmt_skip)]",
  354. "use cpu2::cpu::*;",
  355. "use cpu2::instructions::*;",
  356. "use cpu2::global_pointers::*;",
  357. "pub unsafe fn run(opcode: u32) {",
  358. table,
  359. "}",
  360. ];
  361. finalize_table_rust(
  362. OUT_DIR,
  363. "interpreter.rs",
  364. rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
  365. );
  366. }
  367. const cases0f = [];
  368. for(let opcode = 0; opcode < 0x100; opcode++)
  369. {
  370. let encoding = by_opcode0f[opcode];
  371. assert(encoding && encoding.length);
  372. let opcode_hex = hex(opcode, 2);
  373. let opcode_high_hex = hex(opcode | 0x100, 2);
  374. if(encoding[0].os)
  375. {
  376. cases0f.push({
  377. conditions: [`0x${opcode_hex}`],
  378. body: gen_instruction_body(encoding, 16),
  379. });
  380. cases0f.push({
  381. conditions: [`0x${opcode_high_hex}`],
  382. body: gen_instruction_body(encoding, 32),
  383. });
  384. }
  385. else
  386. {
  387. let block = {
  388. conditions: [`0x${opcode_hex}`, `0x${opcode_high_hex}`],
  389. body: gen_instruction_body(encoding, undefined),
  390. };
  391. cases0f.push(block);
  392. }
  393. }
  394. const table0f = {
  395. type: "switch",
  396. condition: "opcode",
  397. cases: cases0f,
  398. default_case: {
  399. body: ["assert!(false);"]
  400. },
  401. };
  402. if(to_generate.interpreter0f)
  403. {
  404. const code = [
  405. "#![cfg_attr(rustfmt, rustfmt_skip)]",
  406. "use cpu2::cpu::*;",
  407. "use cpu2::instructions_0f::*;",
  408. "use cpu2::global_pointers::*;",
  409. "pub unsafe fn run(opcode: u32) {",
  410. table0f,
  411. "}",
  412. ];
  413. finalize_table_rust(
  414. OUT_DIR,
  415. "interpreter0f.rs",
  416. rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
  417. );
  418. }
  419. }