generate_analyzer.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. #!/usr/bin/env node
  2. "use strict";
  3. const fs = require("fs");
  4. const path = require("path");
  5. const encodings = require("./x86_table");
  6. const c_ast = require("./c_ast");
  7. const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table } = require("./util");
  8. const APPEND_NONFAULTING_FLAG = "analysis.flags |= JIT_INSTR_NONFAULTING_FLAG;";
  9. const OUT_DIR = get_switch_value("--output-dir") || path.join(__dirname, "..", "build");
  10. mkdirpSync(OUT_DIR);
  11. const table_arg = get_switch_value("--table");
  12. const gen_all = get_switch_exist("--all");
  13. const to_generate = {
  14. analyzer: gen_all || table_arg === "analyzer",
  15. analyzer0f_16: gen_all || table_arg === "analyzer0f_16",
  16. analyzer0f_32: gen_all || table_arg === "analyzer0f_32",
  17. };
  18. console.assert(
  19. Object.keys(to_generate).some(k => to_generate[k]),
  20. "Pass --table [analyzer|analyzer0f_16|analyzer0f_32] or --all to pick which tables to generate"
  21. );
  22. gen_table();
  23. function gen_read_imm_call(op, size_variant)
  24. {
  25. let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
  26. if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
  27. {
  28. if(op.imm8)
  29. {
  30. return "read_imm8()";
  31. }
  32. else if(op.imm8s)
  33. {
  34. return "read_imm8s()";
  35. }
  36. else
  37. {
  38. if(op.immaddr)
  39. {
  40. // immaddr: depends on address size
  41. return "read_moffs()";
  42. }
  43. else
  44. {
  45. console.assert(op.imm1632 || op.imm16 || op.imm32);
  46. if(op.imm1632 && size === 16 || op.imm16)
  47. {
  48. return "read_imm16()";
  49. }
  50. else
  51. {
  52. console.assert(op.imm1632 && size === 32 || op.imm32);
  53. return "read_imm32s()";
  54. }
  55. }
  56. }
  57. }
  58. else
  59. {
  60. return undefined;
  61. }
  62. }
  63. function gen_call(name, args)
  64. {
  65. args = args || [];
  66. return `${name}(${args.join(", ")});`;
  67. }
  68. function gen_codegen_call(args)
  69. {
  70. return args.map(arg => arg + ";");
  71. }
  72. function gen_codegen_call_modrm(args)
  73. {
  74. args = args.map(arg => arg + ";");
  75. return [].concat(gen_call("modrm_skip", ["modrm_byte"]), args);
  76. }
  77. function gen_modrm_mem_reg_split(mem_args, reg_args, postfixes={})
  78. {
  79. const { mem_postfix=[], reg_postfix=[] } = postfixes;
  80. return {
  81. type: "if-else",
  82. if_blocks: [{
  83. condition: "modrm_byte < 0xC0",
  84. body: []
  85. .concat(gen_codegen_call_modrm(mem_args))
  86. .concat(mem_postfix),
  87. }],
  88. else_block: {
  89. body: gen_codegen_call(reg_args).concat(reg_postfix),
  90. },
  91. };
  92. }
  93. /*
  94. * Current naming scheme:
  95. * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
  96. */
  97. function make_instruction_name(encoding, size, prefix_variant)
  98. {
  99. const suffix = encoding.os ? String(size) : "";
  100. const opcode_hex = hex(encoding.opcode & 0xFF, 2);
  101. const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
  102. const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
  103. const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
  104. return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
  105. }
  106. function get_nonfaulting_mem_reg_postfix(encoding)
  107. {
  108. const lea_special_case = encoding.opcode === 0x8D;
  109. const mem_postfix = (encoding.nonfaulting && lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
  110. const reg_postfix = (encoding.nonfaulting && !lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
  111. return {
  112. mem_postfix,
  113. reg_postfix,
  114. };
  115. }
  116. function create_instruction_postfix(encoding)
  117. {
  118. return [].concat(
  119. encoding.block_boundary ? ["analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [],
  120. encoding.no_next_instruction ? ["analysis.flags |= JIT_INSTR_NO_NEXT_INSTRUCTION_FLAG;"] : []
  121. );
  122. }
  123. function gen_instruction_body(encodings, size)
  124. {
  125. const encoding = encodings[0];
  126. let has_66 = false;
  127. let has_F2 = false;
  128. let has_F3 = false;
  129. for(let e of encodings)
  130. {
  131. if((e.opcode >>> 16) === 0x66) has_66 = true;
  132. if((e.opcode >>> 16) === 0xF2) has_F2 = true;
  133. if((e.opcode >>> 16) === 0xF3) has_F3 = true;
  134. }
  135. if(has_66 || has_F2 || has_F3)
  136. {
  137. console.assert((encoding.opcode & 0xFF00) === 0x0F00);
  138. }
  139. if(encoding.fixed_g !== undefined)
  140. {
  141. // instruction with modrm byte where the middle 3 bits encode the instruction
  142. // group by opcode without prefix plus middle bits of modrm byte
  143. let cases = encodings.reduce((cases_by_opcode, case_) => {
  144. console.assert(typeof case_.fixed_g === "number");
  145. cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
  146. return cases_by_opcode;
  147. }, Object.create(null));
  148. cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
  149. return [
  150. "int32_t modrm_byte = read_imm8();",
  151. {
  152. type: "switch",
  153. condition: "modrm_byte >> 3 & 7",
  154. cases: cases.map(case_ => {
  155. const fixed_g = case_.fixed_g;
  156. const instruction_postfix = create_instruction_postfix(case_);
  157. const mem_args = [];
  158. const reg_args = [];
  159. const imm_read = gen_read_imm_call(case_, size);
  160. if(imm_read)
  161. {
  162. mem_args.push(imm_read);
  163. reg_args.push(imm_read);
  164. }
  165. if(has_66 || has_F2 || has_F3)
  166. {
  167. const if_blocks = [];
  168. if(has_66) {
  169. const name = make_instruction_name(case_, size, 0x66);
  170. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  171. if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
  172. }
  173. if(has_F2) {
  174. const name = make_instruction_name(case_, size, 0xF2);
  175. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  176. if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
  177. }
  178. if(has_F3) {
  179. const name = make_instruction_name(case_, size, 0xF3);
  180. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  181. if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
  182. }
  183. const else_block = {
  184. body: [
  185. gen_modrm_mem_reg_split(
  186. mem_args,
  187. reg_args,
  188. {}
  189. )
  190. ],
  191. };
  192. return {
  193. conditions: [fixed_g],
  194. body: [
  195. "int32_t prefixes_ = *prefixes;",
  196. {
  197. type: "if-else",
  198. if_blocks,
  199. else_block,
  200. },
  201. ].concat(instruction_postfix),
  202. };
  203. }
  204. else
  205. {
  206. const body = [
  207. gen_modrm_mem_reg_split(
  208. mem_args,
  209. reg_args,
  210. get_nonfaulting_mem_reg_postfix(case_)
  211. )
  212. ].concat(instruction_postfix);
  213. return {
  214. conditions: [fixed_g],
  215. body,
  216. };
  217. }
  218. }),
  219. default_case: {
  220. body: [
  221. "assert(false);",
  222. "analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;",
  223. "analysis.flags |= JIT_INSTR_NO_NEXT_INSTRUCTION_FLAG;",
  224. ],
  225. }
  226. },
  227. ];
  228. }
  229. else if(has_66 || has_F2 || has_F3)
  230. {
  231. // instruction without modrm byte but with prefix
  232. console.assert(encoding.e);
  233. console.assert(!encoding.ignore_mod);
  234. const instruction_postfix = create_instruction_postfix(encoding);
  235. const imm_read = gen_read_imm_call(encoding, size);
  236. const mem_args = [];
  237. const reg_args = [];
  238. if(imm_read)
  239. {
  240. mem_args.push(imm_read);
  241. reg_args.push(imm_read);
  242. }
  243. const if_blocks = [];
  244. if(has_66) {
  245. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  246. if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
  247. }
  248. if(has_F2) {
  249. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  250. if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
  251. }
  252. if(has_F3) {
  253. const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
  254. if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
  255. }
  256. const else_block = {
  257. body: [
  258. gen_modrm_mem_reg_split(
  259. mem_args,
  260. reg_args,
  261. {}
  262. )
  263. ],
  264. };
  265. return [
  266. "int32_t modrm_byte = read_imm8();",
  267. "int32_t prefixes_ = *prefixes;",
  268. {
  269. type: "if-else",
  270. if_blocks,
  271. else_block,
  272. }
  273. ].concat(instruction_postfix);
  274. }
  275. else if(encoding.fixed_g === undefined && encoding.e)
  276. {
  277. // instruction with modrm byte where the middle 3 bits encode a register
  278. console.assert(encodings.length === 1);
  279. const instruction_postfix = create_instruction_postfix(encoding);
  280. const imm_read = gen_read_imm_call(encoding, size);
  281. if(encoding.ignore_mod)
  282. {
  283. console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
  284. // Has modrm byte, but the 2 mod bits are ignored and both
  285. // operands are always registers (0f20-0f24)
  286. if(encoding.nonfaulting)
  287. {
  288. instruction_postfix.push(APPEND_NONFAULTING_FLAG);
  289. }
  290. return ["int32_t modrm_byte = read_imm8();"]
  291. .concat(gen_codegen_call([]))
  292. .concat(instruction_postfix);
  293. }
  294. else
  295. {
  296. const mem_args = [];
  297. const reg_args = [];
  298. if(imm_read)
  299. {
  300. mem_args.push(imm_read);
  301. reg_args.push(imm_read);
  302. }
  303. return [
  304. "int32_t modrm_byte = read_imm8();",
  305. gen_modrm_mem_reg_split(
  306. mem_args,
  307. reg_args,
  308. get_nonfaulting_mem_reg_postfix(encoding)
  309. ),
  310. ].concat(instruction_postfix);
  311. }
  312. }
  313. else if(encoding.prefix)
  314. {
  315. console.assert(!encoding.nonfaulting, "Prefix/custom instructions cannot be marked as nonfaulting.");
  316. const instruction_postfix = create_instruction_postfix(encoding);
  317. const instruction_name = make_instruction_name(encoding, size) + "_analyze";
  318. const imm_read = gen_read_imm_call(encoding, size);
  319. const args = [];
  320. if(imm_read)
  321. {
  322. args.push(imm_read);
  323. }
  324. const call_prefix = encoding.prefix ? "return " : "";
  325. // Prefix calls can add to the return flags
  326. return [call_prefix + gen_call(instruction_name, args)].concat(instruction_postfix);
  327. }
  328. else
  329. {
  330. // instruction without modrm byte or prefix
  331. const instruction_postfix = create_instruction_postfix(encoding);
  332. const imm_read = gen_read_imm_call(encoding, size);
  333. const args = [];
  334. if(imm_read)
  335. {
  336. if(encoding.jump_offset_imm)
  337. {
  338. args.push("int32_t jump_offset = " + imm_read + ";");
  339. args.push("analysis.jump_offset = jump_offset;");
  340. args.push("analysis.flags |= is_osize_32() ? JIT_INSTR_IMM_JUMP32_FLAG : JIT_INSTR_IMM_JUMP16_FLAG;");
  341. }
  342. else
  343. {
  344. args.push(imm_read + ";");
  345. }
  346. }
  347. if(encoding.extra_imm16)
  348. {
  349. console.assert(imm_read);
  350. args.push(gen_call("read_imm16"));
  351. }
  352. else if(encoding.extra_imm8)
  353. {
  354. console.assert(imm_read);
  355. args.push(gen_call("read_imm8"));
  356. }
  357. if(encoding.nonfaulting)
  358. {
  359. instruction_postfix.push(APPEND_NONFAULTING_FLAG);
  360. }
  361. if(encoding.conditional_jump)
  362. {
  363. console.assert((encoding.opcode & ~0xF) === 0x70 || (encoding.opcode & ~0xF) === 0x0F80);
  364. instruction_postfix.push("analysis.condition_index = " + (encoding.opcode & 0xF) + ";");
  365. }
  366. return args.concat(instruction_postfix);
  367. }
  368. }
  369. function gen_table()
  370. {
  371. let by_opcode = Object.create(null);
  372. let by_opcode0f = Object.create(null);
  373. for(let o of encodings)
  374. {
  375. let opcode = o.opcode;
  376. if(opcode >= 0x100)
  377. {
  378. if((opcode & 0xFF00) === 0x0F00)
  379. {
  380. opcode &= 0xFF;
  381. by_opcode0f[opcode] = by_opcode0f[opcode] || [];
  382. by_opcode0f[opcode].push(o);
  383. }
  384. }
  385. else
  386. {
  387. by_opcode[opcode] = by_opcode[opcode] || [];
  388. by_opcode[opcode].push(o);
  389. }
  390. }
  391. let cases = [];
  392. for(let opcode = 0; opcode < 0x100; opcode++)
  393. {
  394. let encoding = by_opcode[opcode];
  395. console.assert(encoding && encoding.length);
  396. let opcode_hex = hex(opcode, 2);
  397. if(encoding[0].os)
  398. {
  399. cases.push({
  400. conditions: [`0x${opcode_hex}`],
  401. body: gen_instruction_body(encoding, 16),
  402. });
  403. cases.push({
  404. conditions: [`0x${opcode_hex}|0x100`],
  405. body: gen_instruction_body(encoding, 32),
  406. });
  407. }
  408. else
  409. {
  410. cases.push({
  411. conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
  412. body: gen_instruction_body(encoding, undefined),
  413. });
  414. }
  415. }
  416. const table = {
  417. type: "switch",
  418. condition: "opcode",
  419. cases,
  420. default_case: {
  421. body: ["assert(false);"]
  422. },
  423. };
  424. if(to_generate.analyzer)
  425. {
  426. finalize_table(
  427. OUT_DIR,
  428. "analyzer",
  429. c_ast.print_syntax_tree([table]).join("\n") + "\n"
  430. );
  431. }
  432. const cases0f_16 = [];
  433. const cases0f_32 = [];
  434. for(let opcode = 0; opcode < 0x100; opcode++)
  435. {
  436. let encoding = by_opcode0f[opcode];
  437. console.assert(encoding && encoding.length);
  438. let opcode_hex = hex(opcode, 2);
  439. if(encoding[0].os)
  440. {
  441. cases0f_16.push({
  442. conditions: [`0x${opcode_hex}`],
  443. body: gen_instruction_body(encoding, 16),
  444. });
  445. cases0f_32.push({
  446. conditions: [`0x${opcode_hex}`],
  447. body: gen_instruction_body(encoding, 32),
  448. });
  449. }
  450. else
  451. {
  452. let block = {
  453. conditions: [`0x${opcode_hex}`],
  454. body: gen_instruction_body(encoding, undefined),
  455. };
  456. cases0f_16.push(block);
  457. cases0f_32.push(block);
  458. }
  459. }
  460. const table0f_16 = {
  461. type: "switch",
  462. condition: "opcode",
  463. cases: cases0f_16,
  464. default_case: {
  465. body: ["assert(false);"]
  466. },
  467. };
  468. const table0f_32 = {
  469. type: "switch",
  470. condition: "opcode",
  471. cases: cases0f_32,
  472. default_case: {
  473. body: ["assert(false);"]
  474. },
  475. };
  476. if(to_generate.analyzer0f_16)
  477. {
  478. finalize_table(
  479. OUT_DIR,
  480. "analyzer0f_16",
  481. c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n"
  482. );
  483. }
  484. if(to_generate.analyzer0f_32)
  485. {
  486. finalize_table(
  487. OUT_DIR,
  488. "analyzer0f_32",
  489. c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n"
  490. );
  491. }
  492. }