generate_analyzer.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. #!/usr/bin/env node
  2. "use strict";
  3. const fs = require("fs");
  4. const path = require("path");
  5. const x86_table = require("./x86_table");
  6. const rust_ast = require("./rust_ast");
  7. const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table_rust } = require("./util");
  8. const OUT_DIR = path.join(__dirname, "..", "src/rust/gen/");
  9. mkdirpSync(OUT_DIR);
  10. const table_arg = get_switch_value("--table");
  11. const gen_all = get_switch_exist("--all");
  12. const to_generate = {
  13. analyzer: gen_all || table_arg === "analyzer",
  14. analyzer0f_16: gen_all || table_arg === "analyzer0f_16",
  15. analyzer0f_32: gen_all || table_arg === "analyzer0f_32",
  16. };
  17. console.assert(
  18. Object.keys(to_generate).some(k => to_generate[k]),
  19. "Pass --table [analyzer|analyzer0f_16|analyzer0f_32] or --all to pick which tables to generate"
  20. );
  21. gen_table();
  22. function gen_read_imm_call(op, size_variant)
  23. {
  24. let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
  25. if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
  26. {
  27. if(op.imm8)
  28. {
  29. return "cpu.read_imm8()";
  30. }
  31. else if(op.imm8s)
  32. {
  33. return "cpu.read_imm8s()";
  34. }
  35. else
  36. {
  37. if(op.immaddr)
  38. {
  39. // immaddr: depends on address size
  40. return "cpu.read_moffs()";
  41. }
  42. else
  43. {
  44. console.assert(op.imm1632 || op.imm16 || op.imm32);
  45. if(op.imm1632 && size === 16 || op.imm16)
  46. {
  47. return "cpu.read_imm16()";
  48. }
  49. else
  50. {
  51. console.assert(op.imm1632 && size === 32 || op.imm32);
  52. return "cpu.read_imm32()";
  53. }
  54. }
  55. }
  56. }
  57. else
  58. {
  59. return undefined;
  60. }
  61. }
  62. function gen_call(name, args)
  63. {
  64. args = args || [];
  65. return `${name}(${args.join(", ")});`;
  66. }
  67. /*
  68. * Current naming scheme:
  69. * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
  70. */
  71. function make_instruction_name(encoding, size)
  72. {
  73. const suffix = encoding.os ? String(size) : "";
  74. const opcode_hex = hex(encoding.opcode & 0xFF, 2);
  75. const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
  76. const prefix = (encoding.opcode & 0xFF0000) === 0 ? "" : hex(encoding.opcode >> 16 & 0xFF, 2);
  77. const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
  78. return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
  79. }
  80. function gen_instruction_body(encodings, size)
  81. {
  82. const encoding = encodings[0];
  83. let has_66 = [];
  84. let has_F2 = [];
  85. let has_F3 = [];
  86. let no_prefix = [];
  87. for(let e of encodings)
  88. {
  89. if((e.opcode >>> 16) === 0x66) has_66.push(e);
  90. else if((e.opcode >>> 16) === 0xF2) has_F2.push(e);
  91. else if((e.opcode >>> 16) === 0xF3) has_F3.push(e);
  92. else no_prefix.push(e);
  93. }
  94. if(has_66.length || has_F2.length || has_F3.length)
  95. {
  96. console.assert((encoding.opcode & 0xFF00) === 0x0F00);
  97. }
  98. const code = [];
  99. if(encoding.e)
  100. {
  101. code.push("let modrm_byte = cpu.read_imm8();");
  102. }
  103. if(has_66.length || has_F2.length || has_F3.length)
  104. {
  105. const if_blocks = [];
  106. if(has_66.length) {
  107. const body = gen_instruction_body_after_prefix(has_66, size);
  108. if_blocks.push({ condition: "cpu.prefixes & ::prefix::PREFIX_66 != 0", body, });
  109. }
  110. if(has_F2.length) {
  111. const body = gen_instruction_body_after_prefix(has_F2, size);
  112. if_blocks.push({ condition: "cpu.prefixes & ::prefix::PREFIX_F2 != 0", body, });
  113. }
  114. if(has_F3.length) {
  115. const body = gen_instruction_body_after_prefix(has_F3, size);
  116. if_blocks.push({ condition: "cpu.prefixes & ::prefix::PREFIX_F3 != 0", body, });
  117. }
  118. const else_block = {
  119. body: gen_instruction_body_after_prefix(no_prefix, size),
  120. };
  121. return [].concat(
  122. code,
  123. {
  124. type: "if-else",
  125. if_blocks,
  126. else_block,
  127. }
  128. );
  129. }
  130. else {
  131. return [].concat(
  132. code,
  133. gen_instruction_body_after_prefix(encodings, size)
  134. );
  135. }
  136. }
  137. function gen_instruction_body_after_prefix(encodings, size)
  138. {
  139. const encoding = encodings[0];
  140. if(encoding.fixed_g !== undefined)
  141. {
  142. console.assert(encoding.e);
  143. // instruction with modrm byte where the middle 3 bits encode the instruction
  144. // group by opcode without prefix plus middle bits of modrm byte
  145. let cases = encodings.reduce((cases_by_opcode, case_) => {
  146. console.assert(typeof case_.fixed_g === "number");
  147. cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
  148. return cases_by_opcode;
  149. }, Object.create(null));
  150. cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
  151. return [
  152. {
  153. type: "switch",
  154. condition: "modrm_byte >> 3 & 7",
  155. cases: cases.map(case_ => {
  156. const fixed_g = case_.fixed_g;
  157. const body = gen_instruction_body_after_fixed_g(case_, size);
  158. return {
  159. conditions: [fixed_g],
  160. body,
  161. };
  162. }),
  163. default_case: {
  164. body: [
  165. "analysis.ty = ::analysis::AnalysisType::BlockBoundary;",
  166. "analysis.no_next_instruction = true;",
  167. ],
  168. }
  169. },
  170. ];
  171. }
  172. else {
  173. console.assert(encodings.length === 1);
  174. return gen_instruction_body_after_fixed_g(encodings[0], size);
  175. }
  176. }
  177. function gen_instruction_body_after_fixed_g(encoding, size)
  178. {
  179. const imm_read = gen_read_imm_call(encoding, size);
  180. const instruction_postfix = [];
  181. if(encoding.block_boundary && !encoding.jump_offset_imm)
  182. {
  183. instruction_postfix.push("analysis.ty = ::analysis::AnalysisType::BlockBoundary;");
  184. }
  185. if(encoding.no_next_instruction)
  186. {
  187. instruction_postfix.push("analysis.no_next_instruction = true;");
  188. }
  189. if(encoding.prefix)
  190. {
  191. const instruction_name = "::analysis::" + make_instruction_name(encoding, size) + "_analyze";
  192. const args = ["cpu", "analysis"];
  193. console.assert(!imm_read);
  194. return [].concat(
  195. gen_call(instruction_name, args),
  196. instruction_postfix
  197. );
  198. }
  199. else if(encoding.e)
  200. {
  201. // instruction with modrm byte where the middle 3 bits encode a register
  202. if(encoding.ignore_mod)
  203. {
  204. console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
  205. // Has modrm byte, but the 2 mod bits are ignored and both
  206. // operands are always registers (0f20-0f24)
  207. return instruction_postfix;
  208. }
  209. else
  210. {
  211. return [].concat(
  212. {
  213. type: "if-else",
  214. if_blocks: [{
  215. condition: "modrm_byte < 0xC0",
  216. body: [
  217. gen_call("::analysis::modrm_analyze", ["cpu", "modrm_byte"])
  218. ],
  219. }],
  220. },
  221. imm_read ? [imm_read + ";"] : [],
  222. instruction_postfix
  223. );
  224. }
  225. }
  226. else
  227. {
  228. // instruction without modrm byte or prefix
  229. const body = [];
  230. if(imm_read)
  231. {
  232. if(encoding.jump_offset_imm)
  233. {
  234. body.push("let jump_offset = " + imm_read + ";");
  235. if(encoding.conditional_jump)
  236. {
  237. console.assert((encoding.opcode & ~0xF) === 0x70 || (encoding.opcode & ~0xF) === 0x0F80);
  238. const condition_index = encoding.opcode & 0xF;
  239. body.push(`analysis.ty = ::analysis::AnalysisType::Jump { offset: jump_offset as i32, condition: Some(${condition_index}), is_32: cpu.osize_32() };`);
  240. }
  241. else
  242. {
  243. body.push(`analysis.ty = ::analysis::AnalysisType::Jump { offset: jump_offset as i32, condition: None, is_32: cpu.osize_32() };`);
  244. }
  245. }
  246. else
  247. {
  248. body.push(imm_read + ";");
  249. }
  250. }
  251. if(encoding.extra_imm16)
  252. {
  253. console.assert(imm_read);
  254. body.push(gen_call("cpu.read_imm16"));
  255. }
  256. else if(encoding.extra_imm8)
  257. {
  258. console.assert(imm_read);
  259. body.push(gen_call("cpu.read_imm8"));
  260. }
  261. return [].concat(
  262. body,
  263. instruction_postfix
  264. );
  265. }
  266. }
  267. function gen_table()
  268. {
  269. let by_opcode = Object.create(null);
  270. let by_opcode0f = Object.create(null);
  271. for(let o of x86_table)
  272. {
  273. let opcode = o.opcode;
  274. if(opcode >= 0x100)
  275. {
  276. if((opcode & 0xFF00) === 0x0F00)
  277. {
  278. opcode &= 0xFF;
  279. by_opcode0f[opcode] = by_opcode0f[opcode] || [];
  280. by_opcode0f[opcode].push(o);
  281. }
  282. }
  283. else
  284. {
  285. by_opcode[opcode] = by_opcode[opcode] || [];
  286. by_opcode[opcode].push(o);
  287. }
  288. }
  289. let cases = [];
  290. for(let opcode = 0; opcode < 0x100; opcode++)
  291. {
  292. let encoding = by_opcode[opcode];
  293. console.assert(encoding && encoding.length);
  294. let opcode_hex = hex(opcode, 2);
  295. let opcode_high_hex = hex(opcode | 0x100, 2);
  296. if(encoding[0].os)
  297. {
  298. cases.push({
  299. conditions: [`0x${opcode_hex}`],
  300. body: gen_instruction_body(encoding, 16),
  301. });
  302. cases.push({
  303. conditions: [`0x${opcode_high_hex}`],
  304. body: gen_instruction_body(encoding, 32),
  305. });
  306. }
  307. else
  308. {
  309. cases.push({
  310. conditions: [`0x${opcode_hex}`, `0x${opcode_high_hex}`],
  311. body: gen_instruction_body(encoding, undefined),
  312. });
  313. }
  314. }
  315. const table = {
  316. type: "switch",
  317. condition: "opcode",
  318. cases,
  319. default_case: {
  320. body: ["dbg_assert!(false);"]
  321. },
  322. };
  323. if(to_generate.analyzer)
  324. {
  325. const code = [
  326. "#[cfg_attr(rustfmt, rustfmt_skip)]",
  327. "pub fn analyzer(opcode: u32, cpu: &mut ::cpu_context::CpuContext, analysis: &mut ::analysis::Analysis) {",
  328. table,
  329. "}",
  330. ];
  331. finalize_table_rust(
  332. OUT_DIR,
  333. "analyzer.rs",
  334. rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
  335. );
  336. }
  337. const cases0f_16 = [];
  338. const cases0f_32 = [];
  339. for(let opcode = 0; opcode < 0x100; opcode++)
  340. {
  341. let encoding = by_opcode0f[opcode];
  342. console.assert(encoding && encoding.length);
  343. let opcode_hex = hex(opcode, 2);
  344. if(encoding[0].os)
  345. {
  346. cases0f_16.push({
  347. conditions: [`0x${opcode_hex}`],
  348. body: gen_instruction_body(encoding, 16),
  349. });
  350. cases0f_32.push({
  351. conditions: [`0x${opcode_hex}`],
  352. body: gen_instruction_body(encoding, 32),
  353. });
  354. }
  355. else
  356. {
  357. let block = {
  358. conditions: [`0x${opcode_hex}`],
  359. body: gen_instruction_body(encoding, undefined),
  360. };
  361. cases0f_16.push(block);
  362. cases0f_32.push(block);
  363. }
  364. }
  365. const table0f_16 = {
  366. type: "switch",
  367. condition: "opcode",
  368. cases: cases0f_16,
  369. default_case: {
  370. body: ["dbg_assert!(false);"]
  371. },
  372. };
  373. const table0f_32 = {
  374. type: "switch",
  375. condition: "opcode",
  376. cases: cases0f_32,
  377. default_case: {
  378. body: ["dbg_assert!(false);"]
  379. },
  380. };
  381. if(to_generate.analyzer0f_16)
  382. {
  383. const code = [
  384. "#![allow(unused)]",
  385. "#[cfg_attr(rustfmt, rustfmt_skip)]",
  386. "pub fn analyzer(opcode: u8, cpu: &mut ::cpu_context::CpuContext, analysis: &mut ::analysis::Analysis) {",
  387. table0f_16,
  388. "}"
  389. ];
  390. finalize_table_rust(
  391. OUT_DIR,
  392. "analyzer0f_16.rs",
  393. rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
  394. );
  395. }
  396. if(to_generate.analyzer0f_32)
  397. {
  398. const code = [
  399. "#![allow(unused)]",
  400. "#[cfg_attr(rustfmt, rustfmt_skip)]",
  401. "pub fn analyzer(opcode: u8, cpu: &mut ::cpu_context::CpuContext, analysis: &mut ::analysis::Analysis) {",
  402. table0f_32,
  403. "}"
  404. ];
  405. finalize_table_rust(
  406. OUT_DIR,
  407. "analyzer0f_32.rs",
  408. rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
  409. );
  410. }
  411. }