generate_analyzer.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. #!/usr/bin/env node
  2. "use strict";
  3. const fs = require("fs");
  4. const path = require("path");
  5. const encodings = require("./x86_table");
  6. const c_ast = require("./c_ast");
  7. const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table } = require("./util");
  8. const APPEND_NONFAULTING_FLAG = "analysis.flags |= JIT_INSTR_NONFAULTING_FLAG;";
  9. const OUT_DIR = get_switch_value("--output-dir") || path.join(__dirname, "..", "build");
  10. mkdirpSync(OUT_DIR);
  11. const table_arg = get_switch_value("--table");
  12. const gen_all = get_switch_exist("--all");
  13. const to_generate = {
  14. analyzer: gen_all || table_arg === "analyzer",
  15. analyzer0f_16: gen_all || table_arg === "analyzer0f_16",
  16. analyzer0f_32: gen_all || table_arg === "analyzer0f_32",
  17. };
  18. console.assert(
  19. Object.keys(to_generate).some(k => to_generate[k]),
  20. "Pass --table [analyzer|analyzer0f_16|analyzer0f_32] or --all to pick which tables to generate"
  21. );
  22. gen_table();
  23. function gen_read_imm_call(op, size_variant)
  24. {
  25. let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
  26. if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
  27. {
  28. if(op.imm8)
  29. {
  30. return "read_imm8()";
  31. }
  32. else if(op.imm8s)
  33. {
  34. return "read_imm8s()";
  35. }
  36. else
  37. {
  38. if(op.immaddr)
  39. {
  40. // immaddr: depends on address size
  41. return "read_moffs()";
  42. }
  43. else
  44. {
  45. console.assert(op.imm1632 || op.imm16 || op.imm32);
  46. if(op.imm1632 && size === 16 || op.imm16)
  47. {
  48. return "read_imm16()";
  49. }
  50. else
  51. {
  52. console.assert(op.imm1632 && size === 32 || op.imm32);
  53. return "read_imm32s()";
  54. }
  55. }
  56. }
  57. }
  58. else
  59. {
  60. return undefined;
  61. }
  62. }
  63. function gen_call(name, args)
  64. {
  65. args = args || [];
  66. return `${name}(${args.join(", ")});`;
  67. }
  68. function gen_codegen_call(args)
  69. {
  70. return args.map(arg => arg + ";");
  71. }
  72. function gen_codegen_call_modrm(args)
  73. {
  74. args = args.map(arg => arg + ";");
  75. return [].concat(gen_call("modrm_skip", ["modrm_byte"]), args);
  76. }
  77. function gen_modrm_mem_reg_split(mem_args, reg_args, postfixes={})
  78. {
  79. const { mem_postfix=[], reg_postfix=[] } = postfixes;
  80. return {
  81. type: "if-else",
  82. if_blocks: [{
  83. condition: "modrm_byte < 0xC0",
  84. body: []
  85. .concat(gen_codegen_call_modrm(mem_args))
  86. .concat(mem_postfix),
  87. }],
  88. else_block: {
  89. body: gen_codegen_call(reg_args).concat(reg_postfix),
  90. },
  91. };
  92. }
  93. /*
  94. * Current naming scheme:
  95. * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
  96. */
  97. function make_instruction_name(encoding, size, prefix_variant)
  98. {
  99. const suffix = encoding.os ? String(size) : "";
  100. const opcode_hex = hex(encoding.opcode & 0xFF, 2);
  101. const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
  102. const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
  103. const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
  104. return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
  105. }
  106. function get_nonfaulting_mem_reg_postfix(encoding)
  107. {
  108. const lea_special_case = encoding.opcode === 0x8D;
  109. const mem_postfix = (encoding.nonfaulting && lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
  110. const reg_postfix = (encoding.nonfaulting && !lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
  111. return {
  112. mem_postfix,
  113. reg_postfix,
  114. };
  115. }
  116. function gen_instruction_body(encodings, size)
  117. {
  118. const encoding = encodings[0];
  119. let has_66 = false;
  120. let has_F2 = false;
  121. let has_F3 = false;
  122. for(let e of encodings)
  123. {
  124. if((e.opcode >>> 16) === 0x66) has_66 = true;
  125. if((e.opcode >>> 16) === 0xF2) has_F2 = true;
  126. if((e.opcode >>> 16) === 0xF3) has_F3 = true;
  127. }
  128. console.assert(
  129. !encodings.some(e => e.nonfaulting && e.block_boundary),
  130. "Unsupported: instruction cannot be both a jump and nonfaulting. Opcode: 0x" + hex(encoding.opcode)
  131. );
  132. if(has_66 || has_F2 || has_F3)
  133. {
  134. console.assert((encoding.opcode & 0xFF00) === 0x0F00);
  135. }
  136. const instruction_postfix = encoding.block_boundary ? ["analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [];
  137. if(encoding.fixed_g !== undefined)
  138. {
  139. // instruction with modrm byte where the middle 3 bits encode the instruction
  140. // group by opcode without prefix plus middle bits of modrm byte
  141. let cases = encodings.reduce((cases_by_opcode, case_) => {
  142. console.assert(typeof case_.fixed_g === "number");
  143. cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
  144. return cases_by_opcode;
  145. }, Object.create(null));
  146. cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
  147. return [
  148. "int32_t modrm_byte = read_imm8();",
  149. {
  150. type: "switch",
  151. condition: "modrm_byte >> 3 & 7",
  152. cases: cases.map(case_ => {
  153. const fixed_g = case_.fixed_g;
  154. const instruction_postfix = case_.block_boundary ? ["analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [];
  155. const mem_args = [];
  156. const reg_args = [];
  157. const imm_read = gen_read_imm_call(case_, size);
  158. if(imm_read)
  159. {
  160. mem_args.push(imm_read);
  161. reg_args.push(imm_read);
  162. }
  163. if(has_66 || has_F2 || has_F3)
  164. {
  165. const if_blocks = [];
  166. if(has_66) {
  167. const name = make_instruction_name(case_, size, 0x66);
  168. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  169. if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
  170. }
  171. if(has_F2) {
  172. const name = make_instruction_name(case_, size, 0xF2);
  173. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  174. if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
  175. }
  176. if(has_F3) {
  177. const name = make_instruction_name(case_, size, 0xF3);
  178. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  179. if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
  180. }
  181. const else_block = {
  182. body: [
  183. gen_modrm_mem_reg_split(
  184. mem_args,
  185. reg_args,
  186. {}
  187. )
  188. ],
  189. };
  190. return {
  191. conditions: [fixed_g],
  192. body: [
  193. "int32_t prefixes_ = *prefixes;",
  194. {
  195. type: "if-else",
  196. if_blocks,
  197. else_block,
  198. },
  199. ].concat(instruction_postfix),
  200. };
  201. }
  202. else
  203. {
  204. const body = [
  205. gen_modrm_mem_reg_split(
  206. mem_args,
  207. reg_args,
  208. get_nonfaulting_mem_reg_postfix(case_)
  209. )
  210. ].concat(instruction_postfix);
  211. return {
  212. conditions: [fixed_g],
  213. body,
  214. };
  215. }
  216. }),
  217. default_case: {
  218. body: [
  219. "assert(false);",
  220. "analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;",
  221. ],
  222. }
  223. },
  224. ].concat(instruction_postfix);
  225. }
  226. else if(has_66 || has_F2 || has_F3)
  227. {
  228. // instruction without modrm byte but with prefix
  229. console.assert(encoding.e);
  230. console.assert(!encoding.ignore_mod);
  231. const imm_read = gen_read_imm_call(encoding, size);
  232. const mem_args = [];
  233. const reg_args = [];
  234. if(imm_read)
  235. {
  236. mem_args.push(imm_read);
  237. reg_args.push(imm_read);
  238. }
  239. const if_blocks = [];
  240. if(has_66) {
  241. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  242. if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
  243. }
  244. if(has_F2) {
  245. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  246. if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
  247. }
  248. if(has_F3) {
  249. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  250. if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
  251. }
  252. const else_block = {
  253. body: [
  254. gen_modrm_mem_reg_split(
  255. mem_args,
  256. reg_args,
  257. {}
  258. )
  259. ],
  260. };
  261. return [
  262. "int32_t modrm_byte = read_imm8();",
  263. "int32_t prefixes_ = *prefixes;",
  264. {
  265. type: "if-else",
  266. if_blocks,
  267. else_block,
  268. }
  269. ].concat(instruction_postfix);
  270. }
  271. else if(encoding.fixed_g === undefined && encoding.e)
  272. {
  273. // instruction with modrm byte where the middle 3 bits encode a register
  274. console.assert(encodings.length === 1);
  275. const imm_read = gen_read_imm_call(encoding, size);
  276. if(encoding.ignore_mod)
  277. {
  278. console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
  279. // Has modrm byte, but the 2 mod bits are ignored and both
  280. // operands are always registers (0f20-0f24)
  281. if(encoding.nonfaulting)
  282. {
  283. instruction_postfix.push(APPEND_NONFAULTING_FLAG);
  284. }
  285. return ["int32_t modrm_byte = read_imm8();"]
  286. .concat(gen_codegen_call([]))
  287. .concat(instruction_postfix);
  288. }
  289. else
  290. {
  291. const mem_args = [];
  292. const reg_args = [];
  293. if(imm_read)
  294. {
  295. mem_args.push(imm_read);
  296. reg_args.push(imm_read);
  297. }
  298. return [
  299. "int32_t modrm_byte = read_imm8();",
  300. gen_modrm_mem_reg_split(
  301. mem_args,
  302. reg_args,
  303. get_nonfaulting_mem_reg_postfix(encoding)
  304. ),
  305. ].concat(instruction_postfix);
  306. }
  307. }
  308. else if(encoding.prefix)
  309. {
  310. console.assert(!encoding.nonfaulting, "Prefix/custom instructions cannot be marked as nonfaulting.");
  311. const instruction_name = make_instruction_name(encoding, size) + "_analyze";
  312. const imm_read = gen_read_imm_call(encoding, size);
  313. const args = [];
  314. if(imm_read)
  315. {
  316. args.push(imm_read);
  317. }
  318. const call_prefix = encoding.prefix ? "return " : "";
  319. // Prefix calls can add to the return flags
  320. return [call_prefix + gen_call(instruction_name, args)].concat(instruction_postfix);
  321. }
  322. else
  323. {
  324. // instruction without modrm byte or prefix
  325. const imm_read = gen_read_imm_call(encoding, size);
  326. const args = [];
  327. if(imm_read)
  328. {
  329. if(encoding.jump_offset_imm)
  330. {
  331. args.push("int32_t jump_offset = " + imm_read + ";");
  332. args.push(`
  333. analysis.jump_target = is_osize_32() ?
  334. *instruction_pointer + jump_offset :
  335. get_seg_cs() + ((*instruction_pointer - get_seg_cs() + jump_offset) & 0xFFFF);`);
  336. }
  337. else
  338. {
  339. args.push(imm_read + ";");
  340. }
  341. }
  342. if(encoding.extra_imm16)
  343. {
  344. console.assert(imm_read);
  345. args.push(gen_call("read_imm16"));
  346. }
  347. else if(encoding.extra_imm8)
  348. {
  349. console.assert(imm_read);
  350. args.push(gen_call("read_imm8"));
  351. }
  352. if(encoding.nonfaulting)
  353. {
  354. instruction_postfix.push(APPEND_NONFAULTING_FLAG);
  355. }
  356. if(encoding.conditional_jump)
  357. {
  358. instruction_postfix.push("analysis.condition_index = " + (encoding.opcode & 0xF) + ";");
  359. }
  360. return args.concat(instruction_postfix);
  361. }
  362. }
  363. function gen_table()
  364. {
  365. let by_opcode = Object.create(null);
  366. let by_opcode0f = Object.create(null);
  367. for(let o of encodings)
  368. {
  369. let opcode = o.opcode;
  370. if(opcode >= 0x100)
  371. {
  372. if((opcode & 0xFF00) === 0x0F00)
  373. {
  374. opcode &= 0xFF;
  375. by_opcode0f[opcode] = by_opcode0f[opcode] || [];
  376. by_opcode0f[opcode].push(o);
  377. }
  378. }
  379. else
  380. {
  381. by_opcode[opcode] = by_opcode[opcode] || [];
  382. by_opcode[opcode].push(o);
  383. }
  384. }
  385. let cases = [];
  386. for(let opcode = 0; opcode < 0x100; opcode++)
  387. {
  388. let encoding = by_opcode[opcode];
  389. console.assert(encoding && encoding.length);
  390. let opcode_hex = hex(opcode, 2);
  391. if(encoding[0].os)
  392. {
  393. cases.push({
  394. conditions: [`0x${opcode_hex}`],
  395. body: gen_instruction_body(encoding, 16),
  396. });
  397. cases.push({
  398. conditions: [`0x${opcode_hex}|0x100`],
  399. body: gen_instruction_body(encoding, 32),
  400. });
  401. }
  402. else
  403. {
  404. cases.push({
  405. conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
  406. body: gen_instruction_body(encoding, undefined),
  407. });
  408. }
  409. }
  410. const table = {
  411. type: "switch",
  412. condition: "opcode",
  413. cases,
  414. default_case: {
  415. body: ["assert(false);"]
  416. },
  417. };
  418. if(to_generate.analyzer)
  419. {
  420. finalize_table(
  421. OUT_DIR,
  422. "analyzer",
  423. c_ast.print_syntax_tree([table]).join("\n") + "\n"
  424. );
  425. }
  426. const cases0f_16 = [];
  427. const cases0f_32 = [];
  428. for(let opcode = 0; opcode < 0x100; opcode++)
  429. {
  430. let encoding = by_opcode0f[opcode];
  431. if(!encoding)
  432. {
  433. encoding = [
  434. {
  435. opcode: 0x0F00 | opcode,
  436. },
  437. ];
  438. }
  439. console.assert(encoding && encoding.length);
  440. let opcode_hex = hex(opcode, 2);
  441. if(encoding[0].os)
  442. {
  443. cases0f_16.push({
  444. conditions: [`0x${opcode_hex}`],
  445. body: gen_instruction_body(encoding, 16),
  446. });
  447. cases0f_32.push({
  448. conditions: [`0x${opcode_hex}`],
  449. body: gen_instruction_body(encoding, 32),
  450. });
  451. }
  452. else
  453. {
  454. let block = {
  455. conditions: [`0x${opcode_hex}`],
  456. body: gen_instruction_body(encoding, undefined),
  457. };
  458. cases0f_16.push(block);
  459. cases0f_32.push(block);
  460. }
  461. }
  462. const table0f_16 = {
  463. type: "switch",
  464. condition: "opcode",
  465. cases: cases0f_16,
  466. default_case: {
  467. body: ["assert(false);"]
  468. },
  469. };
  470. const table0f_32 = {
  471. type: "switch",
  472. condition: "opcode",
  473. cases: cases0f_32,
  474. default_case: {
  475. body: ["assert(false);"]
  476. },
  477. };
  478. if(to_generate.analyzer0f_16)
  479. {
  480. finalize_table(
  481. OUT_DIR,
  482. "analyzer0f_16",
  483. c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n"
  484. );
  485. }
  486. if(to_generate.analyzer0f_32)
  487. {
  488. finalize_table(
  489. OUT_DIR,
  490. "analyzer0f_32",
  491. c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n"
  492. );
  493. }
  494. }