generate_interpreter.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. #!/usr/bin/env node
  2. "use strict";
  3. const fs = require("fs");
  4. const encodings = require("./x86_table");
  5. const c_ast = require("./c_ast");
  6. const { hex } = require("./util");
  7. gen_table();
  8. function gen_read_imm_call(op, size_variant)
  9. {
  10. let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
  11. if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
  12. {
  13. if(op.imm8)
  14. {
  15. return "read_imm8()";
  16. }
  17. else if(op.imm8s)
  18. {
  19. return "read_imm8s()";
  20. }
  21. else
  22. {
  23. if(op.immaddr)
  24. {
  25. // immaddr: depends on address size
  26. return "read_moffs()";
  27. }
  28. else
  29. {
  30. console.assert(op.imm1632 || op.imm16 || op.imm32);
  31. if(op.imm1632 && size === 16 || op.imm16)
  32. {
  33. return "read_imm16()";
  34. }
  35. else
  36. {
  37. console.assert(op.imm1632 && size === 32 || op.imm32);
  38. return "read_imm32s()";
  39. }
  40. }
  41. }
  42. }
  43. else
  44. {
  45. return undefined;
  46. }
  47. }
  48. function gen_call(name, args)
  49. {
  50. args = args || [];
  51. return `${name}(${args.join(", ")});`;
  52. }
  53. function gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)
  54. {
  55. return {
  56. type: "if-else",
  57. if_blocks: [{
  58. condition: "modrm_byte < 0xC0",
  59. body: (modrm_resolve_prefix ? [modrm_resolve_prefix] : []).concat(gen_call(`${name}_mem`, mem_args)),
  60. }],
  61. else_block: {
  62. body: [gen_call(`${name}_reg`, reg_args)],
  63. },
  64. };
  65. }
  66. /*
  67. * Current naming scheme:
  68. * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
  69. */
  70. function make_instruction_name(encoding, size, prefix_variant)
  71. {
  72. const suffix = encoding.os ? String(size) : "";
  73. const opcode_hex = hex(encoding.opcode & 0xFF, 2);
  74. const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
  75. const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
  76. const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
  77. return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
  78. }
  79. function gen_instruction_body(encodings, size)
  80. {
  81. const encoding = encodings[0];
  82. let has_66 = false;
  83. let has_F2 = false;
  84. let has_F3 = false;
  85. for(let e of encodings)
  86. {
  87. if((e.opcode >>> 16) === 0x66) has_66 = true;
  88. if((e.opcode >>> 16) === 0xF2) has_F2 = true;
  89. if((e.opcode >>> 16) === 0xF3) has_F3 = true;
  90. }
  91. if(has_66 || has_F2 || has_F3)
  92. {
  93. console.assert((encoding.opcode & 0xFF00) === 0x0F00);
  94. }
  95. if(encoding.fixed_g !== undefined)
  96. {
  97. // instruction with modrm byte where the middle 3 bits encode the instruction
  98. // group by opcode without prefix plus middle bits of modrm byte
  99. let cases = encodings.reduce((cases_by_opcode, case_) => {
  100. console.assert(typeof case_.fixed_g === "number");
  101. cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
  102. return cases_by_opcode;
  103. }, Object.create(null));
  104. cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
  105. return [
  106. "int32_t modrm_byte = read_imm8();",
  107. {
  108. type: "switch",
  109. condition: "modrm_byte >> 3 & 7",
  110. cases: cases.map(case_ => {
  111. const fixed_g = case_.fixed_g;
  112. const instruction_name = make_instruction_name(case_, size, undefined);
  113. let modrm_resolve_prefix = undefined;
  114. if(case_.requires_prefix_call)
  115. {
  116. modrm_resolve_prefix = gen_call(instruction_name + "_mem_pre");
  117. }
  118. const mem_args = ["modrm_resolve(modrm_byte)"];
  119. const reg_args = ["modrm_byte & 7"];
  120. const imm_read = gen_read_imm_call(case_, size);
  121. if(imm_read)
  122. {
  123. mem_args.push(imm_read);
  124. reg_args.push(imm_read);
  125. }
  126. if(has_66 || has_F2 || has_F3)
  127. {
  128. const if_blocks = [];
  129. if(has_66) {
  130. const name = make_instruction_name(case_, size, 0x66);
  131. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  132. if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
  133. }
  134. if(has_F2) {
  135. const name = make_instruction_name(case_, size, 0xF2);
  136. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  137. if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
  138. }
  139. if(has_F3) {
  140. const name = make_instruction_name(case_, size, 0xF3);
  141. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  142. if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
  143. }
  144. const else_block = {
  145. body: [gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args)],
  146. };
  147. return {
  148. conditions: [fixed_g],
  149. body: [
  150. "int32_t prefixes_ = *prefixes;",
  151. {
  152. type: "if-else",
  153. if_blocks,
  154. else_block,
  155. },
  156. ],
  157. };
  158. }
  159. else
  160. {
  161. const body = [gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args)];
  162. return {
  163. conditions: [fixed_g],
  164. body,
  165. };
  166. }
  167. }),
  168. default_case: {
  169. body: [
  170. "assert(false);",
  171. "trigger_ud();",
  172. ],
  173. }
  174. }
  175. ];
  176. }
  177. else if(has_66 || has_F2 || has_F3)
  178. {
  179. // instruction withoud modrm byte but with prefix
  180. console.assert(encoding.e);
  181. console.assert(!encoding.ignore_mod);
  182. console.assert(!encoding.requires_prefix_call, "Unexpected instruction (66/f2/f3 with prefix call)");
  183. const imm_read = gen_read_imm_call(encoding, size);
  184. const mem_args = ["modrm_resolve(modrm_byte)", "modrm_byte >> 3 & 7"];
  185. const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
  186. if(imm_read)
  187. {
  188. mem_args.push(imm_read);
  189. reg_args.push(imm_read);
  190. }
  191. const if_blocks = [];
  192. const modrm_resolve_prefix = undefined;
  193. if(has_66) {
  194. const name = make_instruction_name(encoding, size, 0x66);
  195. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  196. if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
  197. }
  198. if(has_F2) {
  199. const name = make_instruction_name(encoding, size, 0xF2);
  200. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  201. if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
  202. }
  203. if(has_F3) {
  204. const name = make_instruction_name(encoding, size, 0xF3);
  205. const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
  206. if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
  207. }
  208. const else_block = {
  209. body: [gen_modrm_mem_reg_split(make_instruction_name(encoding, size), modrm_resolve_prefix, mem_args, reg_args)],
  210. };
  211. return [
  212. "int32_t modrm_byte = read_imm8();",
  213. "int32_t prefixes_ = *prefixes;",
  214. {
  215. type: "if-else",
  216. if_blocks,
  217. else_block,
  218. }
  219. ];
  220. }
  221. else if(encoding.fixed_g === undefined && encoding.e)
  222. {
  223. // instruction with modrm byte where the middle 3 bits encode a register
  224. console.assert(encodings.length === 1);
  225. const instruction_name = make_instruction_name(encoding, size);
  226. let modrm_resolve_prefix = undefined;
  227. if(encoding.requires_prefix_call)
  228. {
  229. modrm_resolve_prefix = gen_call(instruction_name + "_mem_pre");
  230. }
  231. const imm_read = gen_read_imm_call(encoding, size);
  232. if(encoding.ignore_mod)
  233. {
  234. console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
  235. console.assert(!modrm_resolve_prefix, "Unexpected instruction (ignore mod with prefix call)");
  236. // Has modrm byte, but the 2 mod bits are ignored and both
  237. // operands are always registers (0f20-0f24)
  238. return [
  239. "int32_t modrm_byte = read_imm8();",
  240. gen_call(instruction_name, ["modrm_byte & 7", "modrm_byte >> 3 & 7"]),
  241. ];
  242. }
  243. else
  244. {
  245. const mem_args = ["modrm_resolve(modrm_byte)", "modrm_byte >> 3 & 7"];
  246. const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
  247. if(imm_read)
  248. {
  249. mem_args.push(imm_read);
  250. reg_args.push(imm_read);
  251. }
  252. return [
  253. "int32_t modrm_byte = read_imm8();",
  254. gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args),
  255. ];
  256. }
  257. }
  258. else
  259. {
  260. // instruction without modrm byte or prefix
  261. const imm_read = gen_read_imm_call(encoding, size);
  262. const instruction_name = make_instruction_name(encoding, size);
  263. const args = [];
  264. if(imm_read)
  265. {
  266. args.push(imm_read);
  267. }
  268. if(encoding.extra_imm16)
  269. {
  270. console.assert(imm_read);
  271. args.push("read_imm16()");
  272. }
  273. else if(encoding.extra_imm8)
  274. {
  275. console.assert(imm_read);
  276. args.push("read_imm8()");
  277. }
  278. return [gen_call(instruction_name, args)];
  279. }
  280. }
  281. function gen_table()
  282. {
  283. let by_opcode = Object.create(null);
  284. let by_opcode0f = Object.create(null);
  285. for(let o of encodings)
  286. {
  287. let opcode = o.opcode;
  288. if(opcode >= 0x100)
  289. {
  290. if((opcode & 0xFF00) === 0x0F00)
  291. {
  292. opcode &= 0xFF;
  293. by_opcode0f[opcode] = by_opcode0f[opcode] || [];
  294. by_opcode0f[opcode].push(o);
  295. }
  296. }
  297. else
  298. {
  299. by_opcode[opcode] = by_opcode[opcode] || [];
  300. by_opcode[opcode].push(o);
  301. }
  302. }
  303. let cases = [];
  304. for(let opcode = 0; opcode < 0x100; opcode++)
  305. {
  306. let encoding = by_opcode[opcode];
  307. console.assert(encoding && encoding.length);
  308. let opcode_hex = hex(opcode, 2);
  309. if(encoding[0].os)
  310. {
  311. cases.push({
  312. conditions: [`0x${opcode_hex}`],
  313. body: gen_instruction_body(encoding, 16),
  314. });
  315. cases.push({
  316. conditions: [`0x${opcode_hex}|0x100`],
  317. body: gen_instruction_body(encoding, 32),
  318. });
  319. }
  320. else
  321. {
  322. cases.push({
  323. conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
  324. body: gen_instruction_body(encoding, undefined),
  325. });
  326. }
  327. }
  328. const table = {
  329. type: "switch",
  330. condition: "opcode",
  331. cases,
  332. default_case: {
  333. body: ["assert(false);"]
  334. },
  335. };
  336. fs.writeFileSync("/tmp/table", c_ast.print_syntax_tree([table]).join("\n") + "\n");
  337. const cases0f_16 = [];
  338. const cases0f_32 = [];
  339. for(let opcode = 0; opcode < 0x100; opcode++)
  340. {
  341. let encoding = by_opcode0f[opcode];
  342. if(!encoding)
  343. {
  344. encoding = [
  345. {
  346. opcode: 0x0F00 | opcode,
  347. },
  348. ];
  349. }
  350. console.assert(encoding && encoding.length);
  351. let opcode_hex = hex(opcode, 2);
  352. if(encoding[0].os)
  353. {
  354. cases0f_16.push({
  355. conditions: [`0x${opcode_hex}`],
  356. body: gen_instruction_body(encoding, 16),
  357. });
  358. cases0f_32.push({
  359. conditions: [`0x${opcode_hex}`],
  360. body: gen_instruction_body(encoding, 32),
  361. });
  362. }
  363. else
  364. {
  365. let block = {
  366. conditions: [`0x${opcode_hex}`],
  367. body: gen_instruction_body(encoding, undefined),
  368. };
  369. cases0f_16.push(block);
  370. cases0f_32.push(block);
  371. }
  372. }
  373. const table0f_16 = {
  374. type: "switch",
  375. condition: "opcode",
  376. cases: cases0f_16,
  377. default_case: {
  378. body: ["assert(false);"]
  379. },
  380. };
  381. const table0f_32 = {
  382. type: "switch",
  383. condition: "opcode",
  384. cases: cases0f_32,
  385. default_case: {
  386. body: ["assert(false);"]
  387. },
  388. };
  389. fs.writeFileSync("/tmp/table0f_16", c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n");
  390. fs.writeFileSync("/tmp/table0f_32", c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n");
  391. }