generate_interpreter.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. #!/usr/bin/env node
  2. "use strict";
  3. const fs = require("fs");
  4. const path = require("path");
  5. const encodings = require("./x86_table");
  6. const c_ast = require("./c_ast");
  7. const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table } = require("./util");
  8. const OUT_DIR = get_switch_value("--output-dir") ||
  9. path.join(__dirname, "..", "build");
  10. mkdirpSync(OUT_DIR);
  11. const table_arg = get_switch_value("--table");
  12. const gen_all = get_switch_exist("--all");
  13. const to_generate = {
  14. interpreter: gen_all || table_arg === "interpreter",
  15. interpreter0f_16: gen_all || table_arg === "interpreter0f_16",
  16. interpreter0f_32: gen_all || table_arg === "interpreter0f_32",
  17. };
  18. console.assert(
  19. Object.keys(to_generate).some(k => to_generate[k]),
  20. "Pass --table [interpreter|interpreter0f_16|interpreter0f_32] or --all to pick which tables to generate"
  21. );
  22. gen_table();
  23. function gen_read_imm_call(op, size_variant)
  24. {
  25. let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
  26. if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
  27. {
  28. if(op.imm8)
  29. {
  30. return "read_imm8()";
  31. }
  32. else if(op.imm8s)
  33. {
  34. return "read_imm8s()";
  35. }
  36. else
  37. {
  38. if(op.immaddr)
  39. {
  40. // immaddr: depends on address size
  41. return "read_moffs()";
  42. }
  43. else
  44. {
  45. console.assert(op.imm1632 || op.imm16 || op.imm32);
  46. if(op.imm1632 && size === 16 || op.imm16)
  47. {
  48. return "read_imm16()";
  49. }
  50. else
  51. {
  52. console.assert(op.imm1632 && size === 32 || op.imm32);
  53. return "read_imm32s()";
  54. }
  55. }
  56. }
  57. }
  58. else
  59. {
  60. return undefined;
  61. }
  62. }
  63. function gen_call(name, args)
  64. {
  65. args = args || [];
  66. return `${name}(${args.join(", ")});`;
  67. }
  68. function gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)
  69. {
  70. return {
  71. type: "if-else",
  72. if_blocks: [{
  73. condition: "modrm_byte < 0xC0",
  74. body: (modrm_resolve_prefix ? [modrm_resolve_prefix] : []).concat(gen_call(`${name}_mem`, mem_args)),
  75. }],
  76. else_block: {
  77. body: [gen_call(`${name}_reg`, reg_args)],
  78. },
  79. };
  80. }
  81. /*
  82. * Current naming scheme:
  83. * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
  84. */
  85. function make_instruction_name(encoding, size, prefix_variant)
  86. {
  87. const suffix = encoding.os ? String(size) : "";
  88. const opcode_hex = hex(encoding.opcode & 0xFF, 2);
  89. const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
  90. const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
  91. const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
  92. return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
  93. }
  94. function gen_instruction_body(encodings, size)
  95. {
  96. const encoding = encodings[0];
  97. let has_66 = false;
  98. let has_F2 = false;
  99. let has_F3 = false;
  100. for(let e of encodings)
  101. {
  102. if((e.opcode >>> 16) === 0x66) has_66 = true;
  103. if((e.opcode >>> 16) === 0xF2) has_F2 = true;
  104. if((e.opcode >>> 16) === 0xF3) has_F3 = true;
  105. }
  106. if(has_66 || has_F2 || has_F3)
  107. {
  108. console.assert((encoding.opcode & 0xFF00) === 0x0F00);
  109. }
  110. const instruction_postfix = encoding.block_boundary ? ["after_block_boundary();"] : [];
  111. if(encoding.fixed_g !== undefined)
  112. {
  113. // instruction with modrm byte where the middle 3 bits encode the instruction
  114. // group by opcode without prefix plus middle bits of modrm byte
  115. let cases = encodings.reduce((cases_by_opcode, case_) => {
  116. console.assert(typeof case_.fixed_g === "number");
  117. cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
  118. return cases_by_opcode;
  119. }, Object.create(null));
  120. cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
  121. return [
  122. "int32_t modrm_byte = read_imm8();",
  123. {
  124. type: "switch",
  125. condition: "modrm_byte >> 3 & 7",
  126. cases: cases.map(case_ => {
  127. const fixed_g = case_.fixed_g;
  128. const instruction_name = make_instruction_name(case_, size, undefined);
  129. const instruction_postfix = case_.block_boundary ? ["after_block_boundary();"] : [];
  130. let modrm_resolve_prefix = undefined;
  131. if(case_.requires_prefix_call)
  132. {
  133. modrm_resolve_prefix = gen_call(instruction_name + "_mem_pre");
  134. }
  135. const mem_args = ["modrm_resolve(modrm_byte)"];
  136. const reg_args = ["modrm_byte & 7"];
  137. const imm_read = gen_read_imm_call(case_, size);
  138. if(imm_read)
  139. {
  140. mem_args.push(imm_read);
  141. reg_args.push(imm_read);
  142. }
  143. if(has_66 || has_F2 || has_F3)
  144. {
  145. const if_blocks = [];
  146. if(has_66) {
  147. const name = make_instruction_name(case_, size, 0x66);
  148. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  149. if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
  150. }
  151. if(has_F2) {
  152. const name = make_instruction_name(case_, size, 0xF2);
  153. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  154. if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
  155. }
  156. if(has_F3) {
  157. const name = make_instruction_name(case_, size, 0xF3);
  158. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  159. if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
  160. }
  161. const else_block = {
  162. body: [gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args)],
  163. };
  164. return {
  165. conditions: [fixed_g],
  166. body: [
  167. "int32_t prefixes_ = *prefixes;",
  168. {
  169. type: "if-else",
  170. if_blocks,
  171. else_block,
  172. },
  173. ].concat(instruction_postfix),
  174. };
  175. }
  176. else
  177. {
  178. const body = [gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args)].concat(instruction_postfix);
  179. return {
  180. conditions: [fixed_g],
  181. body,
  182. };
  183. }
  184. }),
  185. default_case: {
  186. body: [
  187. "assert(false);",
  188. "trigger_ud();",
  189. ],
  190. }
  191. }
  192. ].concat(instruction_postfix);
  193. }
  194. else if(has_66 || has_F2 || has_F3)
  195. {
  196. // instruction without modrm byte but with prefix
  197. console.assert(encoding.e);
  198. console.assert(!encoding.ignore_mod);
  199. console.assert(!encoding.requires_prefix_call, "Unexpected instruction (66/f2/f3 with prefix call)");
  200. const imm_read = gen_read_imm_call(encoding, size);
  201. const mem_args = ["modrm_resolve(modrm_byte)", "modrm_byte >> 3 & 7"];
  202. const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
  203. if(imm_read)
  204. {
  205. mem_args.push(imm_read);
  206. reg_args.push(imm_read);
  207. }
  208. const if_blocks = [];
  209. const modrm_resolve_prefix = undefined;
  210. if(has_66) {
  211. const name = make_instruction_name(encoding, size, 0x66);
  212. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  213. if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
  214. }
  215. if(has_F2) {
  216. const name = make_instruction_name(encoding, size, 0xF2);
  217. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  218. if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
  219. }
  220. if(has_F3) {
  221. const name = make_instruction_name(encoding, size, 0xF3);
  222. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  223. if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
  224. }
  225. const else_block = {
  226. body: [gen_modrm_mem_reg_split(make_instruction_name(encoding, size), modrm_resolve_prefix, mem_args, reg_args)],
  227. };
  228. return [
  229. "int32_t modrm_byte = read_imm8();",
  230. "int32_t prefixes_ = *prefixes;",
  231. {
  232. type: "if-else",
  233. if_blocks,
  234. else_block,
  235. }
  236. ].concat(instruction_postfix);
  237. }
  238. else if(encoding.fixed_g === undefined && encoding.e)
  239. {
  240. // instruction with modrm byte where the middle 3 bits encode a register
  241. console.assert(encodings.length === 1);
  242. const instruction_name = make_instruction_name(encoding, size);
  243. let modrm_resolve_prefix = undefined;
  244. if(encoding.requires_prefix_call)
  245. {
  246. modrm_resolve_prefix = gen_call(instruction_name + "_mem_pre");
  247. }
  248. const imm_read = gen_read_imm_call(encoding, size);
  249. if(encoding.ignore_mod)
  250. {
  251. console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
  252. console.assert(!modrm_resolve_prefix, "Unexpected instruction (ignore mod with prefix call)");
  253. // Has modrm byte, but the 2 mod bits are ignored and both
  254. // operands are always registers (0f20-0f24)
  255. return [
  256. "int32_t modrm_byte = read_imm8();",
  257. gen_call(instruction_name, ["modrm_byte & 7", "modrm_byte >> 3 & 7"]),
  258. ].concat(instruction_postfix);
  259. }
  260. else
  261. {
  262. const mem_args = ["modrm_resolve(modrm_byte)", "modrm_byte >> 3 & 7"];
  263. const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
  264. if(imm_read)
  265. {
  266. mem_args.push(imm_read);
  267. reg_args.push(imm_read);
  268. }
  269. return [
  270. "int32_t modrm_byte = read_imm8();",
  271. gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args),
  272. ].concat(instruction_postfix);
  273. }
  274. }
  275. else
  276. {
  277. // instruction without modrm byte or prefix
  278. const imm_read = gen_read_imm_call(encoding, size);
  279. const instruction_name = make_instruction_name(encoding, size);
  280. const args = [];
  281. if(imm_read)
  282. {
  283. args.push(imm_read);
  284. }
  285. if(encoding.extra_imm16)
  286. {
  287. console.assert(imm_read);
  288. args.push("read_imm16()");
  289. }
  290. else if(encoding.extra_imm8)
  291. {
  292. console.assert(imm_read);
  293. args.push("read_imm8()");
  294. }
  295. return [gen_call(instruction_name, args)].concat(instruction_postfix);
  296. }
  297. }
  298. function gen_table()
  299. {
  300. let by_opcode = Object.create(null);
  301. let by_opcode0f = Object.create(null);
  302. for(let o of encodings)
  303. {
  304. let opcode = o.opcode;
  305. if(opcode >= 0x100)
  306. {
  307. if((opcode & 0xFF00) === 0x0F00)
  308. {
  309. opcode &= 0xFF;
  310. by_opcode0f[opcode] = by_opcode0f[opcode] || [];
  311. by_opcode0f[opcode].push(o);
  312. }
  313. }
  314. else
  315. {
  316. by_opcode[opcode] = by_opcode[opcode] || [];
  317. by_opcode[opcode].push(o);
  318. }
  319. }
  320. let cases = [];
  321. for(let opcode = 0; opcode < 0x100; opcode++)
  322. {
  323. let encoding = by_opcode[opcode];
  324. console.assert(encoding && encoding.length);
  325. let opcode_hex = hex(opcode, 2);
  326. if(encoding[0].os)
  327. {
  328. cases.push({
  329. conditions: [`0x${opcode_hex}`],
  330. body: gen_instruction_body(encoding, 16),
  331. });
  332. cases.push({
  333. conditions: [`0x${opcode_hex}|0x100`],
  334. body: gen_instruction_body(encoding, 32),
  335. });
  336. }
  337. else
  338. {
  339. cases.push({
  340. conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
  341. body: gen_instruction_body(encoding, undefined),
  342. });
  343. }
  344. }
  345. const table = {
  346. type: "switch",
  347. condition: "opcode",
  348. cases,
  349. default_case: {
  350. body: ["assert(false);"]
  351. },
  352. };
  353. if(to_generate.interpreter)
  354. {
  355. finalize_table(
  356. OUT_DIR,
  357. "interpreter",
  358. c_ast.print_syntax_tree([table]).join("\n") + "\n"
  359. );
  360. }
  361. const cases0f_16 = [];
  362. const cases0f_32 = [];
  363. for(let opcode = 0; opcode < 0x100; opcode++)
  364. {
  365. let encoding = by_opcode0f[opcode];
  366. console.assert(encoding && encoding.length);
  367. let opcode_hex = hex(opcode, 2);
  368. if(encoding[0].os)
  369. {
  370. cases0f_16.push({
  371. conditions: [`0x${opcode_hex}`],
  372. body: gen_instruction_body(encoding, 16),
  373. });
  374. cases0f_32.push({
  375. conditions: [`0x${opcode_hex}`],
  376. body: gen_instruction_body(encoding, 32),
  377. });
  378. }
  379. else
  380. {
  381. let block = {
  382. conditions: [`0x${opcode_hex}`],
  383. body: gen_instruction_body(encoding, undefined),
  384. };
  385. cases0f_16.push(block);
  386. cases0f_32.push(block);
  387. }
  388. }
  389. const table0f_16 = {
  390. type: "switch",
  391. condition: "opcode",
  392. cases: cases0f_16,
  393. default_case: {
  394. body: ["assert(false);"]
  395. },
  396. };
  397. const table0f_32 = {
  398. type: "switch",
  399. condition: "opcode",
  400. cases: cases0f_32,
  401. default_case: {
  402. body: ["assert(false);"]
  403. },
  404. };
  405. if(to_generate.interpreter0f_16)
  406. {
  407. finalize_table(
  408. OUT_DIR,
  409. "interpreter0f_16",
  410. c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n"
  411. );
  412. }
  413. if(to_generate.interpreter0f_32)
  414. {
  415. finalize_table(
  416. OUT_DIR,
  417. "interpreter0f_32",
  418. c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n"
  419. );
  420. }
  421. }