instructions_0f.c 108 KB


  1. #include <assert.h>
  2. #include <math.h>
  3. #include <stdbool.h>
  4. #include <stdint.h>
  5. #include <stdlib.h>
  6. #include "arith.h"
  7. #include "const.h"
  8. #include "cpu.h"
  9. #include "fpu.h"
  10. #include "global_pointers.h"
  11. #include "instructions.h"
  12. #include "instructions_0f.h"
  13. #include "js_imports.h"
  14. #include "log.h"
  15. #include "memory.h"
  16. #include "misc_instr.h"
  17. #include "sse_instr.h"
  18. #pragma clang diagnostic push
  19. #pragma clang diagnostic ignored "-Wunused-parameter"
  20. bool* const apic_enabled;
  21. void instr_0F00_0_mem(int32_t addr) {
  22. // sldt
  23. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  24. safe_write16(addr, sreg[LDTR]);
  25. }
  26. void instr_0F00_0_reg(int32_t r) {
  27. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  28. write_reg_osize(r, sreg[LDTR]);
  29. }
  30. void instr_0F00_1_mem(int32_t addr) {
  31. // str
  32. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  33. safe_write16(addr, sreg[TR]);
  34. }
  35. void instr_0F00_1_reg(int32_t r) {
  36. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  37. write_reg_osize(r, sreg[TR]);
  38. }
  39. void instr_0F00_2_mem(int32_t addr) {
  40. // lldt
  41. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  42. if(cpl[0]) trigger_gp(0);
  43. load_ldt(safe_read16(addr));
  44. }
  45. void instr_0F00_2_reg(int32_t r) {
  46. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  47. if(cpl[0]) trigger_gp(0);
  48. load_ldt(read_reg16(r));
  49. }
  50. void instr_0F00_3_mem(int32_t addr) {
  51. // ltr
  52. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  53. if(cpl[0]) trigger_gp(0);
  54. load_tr(safe_read16(addr));
  55. }
  56. void instr_0F00_3_reg(int32_t r) {
  57. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  58. if(cpl[0]) trigger_gp(0);
  59. load_tr(read_reg16(r));
  60. }
  61. void instr_0F00_4_mem(int32_t addr) {
  62. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  63. verr(safe_read16(addr));
  64. }
  65. void instr_0F00_4_reg(int32_t r) {
  66. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  67. verr(read_reg16(r));
  68. }
  69. void instr_0F00_5_mem(int32_t addr) {
  70. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  71. verw(safe_read16(addr));
  72. }
  73. void instr_0F00_5_reg(int32_t r) {
  74. if(!protected_mode[0] || vm86_mode()) trigger_ud();
  75. verw(read_reg16(r));
  76. }
  77. void instr_0F01_0_reg(int32_t r) { trigger_ud(); }
  78. void instr_0F01_0_mem(int32_t addr) {
  79. // sgdt
  80. writable_or_pagefault(addr, 6);
  81. int32_t mask = is_osize_32() ? -1 : 0x00FFFFFF;
  82. safe_write16(addr, gdtr_size[0]);
  83. safe_write32(addr + 2, gdtr_offset[0] & mask);
  84. }
  85. void instr_0F01_1_reg(int32_t r) { trigger_ud(); }
  86. void instr_0F01_1_mem(int32_t addr) {
  87. // sidt
  88. writable_or_pagefault(addr, 6);
  89. int32_t mask = is_osize_32() ? -1 : 0x00FFFFFF;
  90. safe_write16(addr, idtr_size[0]);
  91. safe_write32(addr + 2, idtr_offset[0] & mask);
  92. }
  93. void instr_0F01_2_reg(int32_t r) { trigger_ud(); }
  94. void instr_0F01_2_mem(int32_t addr) {
  95. // lgdt
  96. if(cpl[0]) trigger_gp(0);
  97. int32_t size = safe_read16(addr);
  98. int32_t offset = safe_read32s(addr + 2);
  99. int32_t mask = is_osize_32() ? -1 : 0x00FFFFFF;
  100. gdtr_size[0] = size;
  101. gdtr_offset[0] = offset & mask;
  102. }
  103. void instr_0F01_3_reg(int32_t r) { trigger_ud(); }
  104. void instr_0F01_3_mem(int32_t addr) {
  105. // lidt
  106. if(cpl[0]) trigger_gp(0);
  107. int32_t size = safe_read16(addr);
  108. int32_t offset = safe_read32s(addr + 2);
  109. int32_t mask = is_osize_32() ? -1 : 0x00FFFFFF;
  110. idtr_size[0] = size;
  111. idtr_offset[0] = offset & mask;
  112. }
  113. void instr_0F01_4_reg(int32_t r) {
  114. // smsw
  115. write_reg_osize(r, cr[0]);
  116. }
  117. void instr_0F01_4_mem(int32_t addr) {
  118. safe_write16(addr, cr[0] & 0xFFFF);
  119. }
  120. void lmsw(int32_t new_cr0) {
  121. new_cr0 = (cr[0] & ~0xF) | (new_cr0 & 0xF);
  122. if(protected_mode[0])
  123. {
  124. // lmsw cannot be used to switch back
  125. new_cr0 |= CR0_PE;
  126. }
  127. set_cr0(new_cr0);
  128. }
  129. void instr_0F01_6_reg(int32_t r) {
  130. if(cpl[0]) trigger_gp(0);
  131. lmsw(read_reg16(r));
  132. }
  133. void instr_0F01_6_mem(int32_t addr) {
  134. if(cpl[0]) trigger_gp(0);
  135. lmsw(safe_read16(addr));
  136. }
  137. void instr_0F01_7_reg(int32_t r) { trigger_ud(); }
  138. void instr_0F01_7_mem(int32_t addr) {
  139. // invlpg
  140. if(cpl[0]) trigger_gp(0);
  141. invlpg(addr);
  142. }
  143. DEFINE_MODRM_INSTR_READ16(instr16_0F02, write_reg16(r, lar(___, read_reg16(r))))
  144. DEFINE_MODRM_INSTR_READ16(instr32_0F02, write_reg32(r, lar(___, read_reg32(r))))
  145. DEFINE_MODRM_INSTR_READ16(instr16_0F03, write_reg16(r, lsl(___, read_reg16(r))))
  146. DEFINE_MODRM_INSTR_READ16(instr32_0F03, write_reg32(r, lsl(___, read_reg32(r))))
  147. void instr_0F04() { undefined_instruction(); }
  148. void instr_0F05() { undefined_instruction(); }
  149. void instr_0F06() {
  150. // clts
  151. if(cpl[0])
  152. {
  153. dbg_log("clts #gp");
  154. trigger_gp(0);
  155. }
  156. else
  157. {
  158. //dbg_log("clts");
  159. cr[0] &= ~CR0_TS;
  160. }
  161. }
  162. void instr_0F07() { undefined_instruction(); }
  163. void instr_0F08() {
  164. // invd
  165. todo();
  166. }
  167. void instr_0F09() {
  168. if(cpl[0])
  169. {
  170. dbg_log("wbinvd #gp");
  171. trigger_gp(0);
  172. }
  173. // wbinvd
  174. }
  175. void instr_0F0A() { undefined_instruction(); }
  176. void instr_0F0B() {
  177. // UD2
  178. trigger_ud();
  179. }
  180. void instr_0F0C() { undefined_instruction(); }
  181. void instr_0F0D() {
  182. // nop
  183. todo();
  184. }
  185. void instr_0F0E() { undefined_instruction(); }
  186. void instr_0F0F() { undefined_instruction(); }
  187. void instr_0F10(union reg128 source, int32_t r) {
  188. // movups xmm, xmm/m128
  189. mov_rm_r128(source, r);
  190. }
  191. DEFINE_SSE_SPLIT(instr_0F10, safe_read128s, read_xmm128s)
  192. void instr_F30F10_reg(int32_t r1, int32_t r2) {
  193. // movss xmm, xmm/m32
  194. task_switch_test_mmx();
  195. union reg128 data = read_xmm128s(r1);
  196. union reg128 orig = read_xmm128s(r2);
  197. write_xmm128(r2, data.u32[0], orig.u32[1], orig.u32[2], orig.u32[3]);
  198. }
  199. void instr_F30F10_mem(int32_t addr, int32_t r) {
  200. // movss xmm, xmm/m32
  201. task_switch_test_mmx();
  202. int32_t data = safe_read32s(addr);
  203. write_xmm128(r, data, 0, 0, 0);
  204. }
  205. void instr_660F10(union reg128 source, int32_t r) {
  206. // movupd xmm, xmm/m128
  207. mov_rm_r128(source, r);
  208. }
  209. DEFINE_SSE_SPLIT(instr_660F10, safe_read128s, read_xmm128s)
  210. void instr_F20F10_reg(int32_t r1, int32_t r2) {
  211. // movsd xmm, xmm/m64
  212. task_switch_test_mmx();
  213. union reg128 data = read_xmm128s(r1);
  214. union reg128 orig = read_xmm128s(r2);
  215. write_xmm128(r2, data.u32[0], data.u32[1], orig.u32[2], orig.u32[3]);
  216. }
  217. void instr_F20F10_mem(int32_t addr, int32_t r) {
  218. // movsd xmm, xmm/m64
  219. task_switch_test_mmx();
  220. union reg64 data = safe_read64s(addr);
  221. write_xmm128(r, data.u32[0], data.u32[1], 0, 0);
  222. }
  223. void instr_0F11_reg(int32_t r1, int32_t r2) {
  224. // movups xmm/m128, xmm
  225. mov_r_r128(r1, r2);
  226. }
  227. void instr_0F11_mem(int32_t addr, int32_t r) {
  228. // movups xmm/m128, xmm
  229. mov_r_m128(addr, r);
  230. }
  231. void instr_F30F11_reg(int32_t rm_dest, int32_t reg_src) {
  232. // movss xmm/m32, xmm
  233. task_switch_test_mmx();
  234. union reg128 data = read_xmm128s(reg_src);
  235. union reg128 orig = read_xmm128s(rm_dest);
  236. write_xmm128(rm_dest, data.u32[0], orig.u32[1], orig.u32[2], orig.u32[3]);
  237. }
  238. void instr_F30F11_mem(int32_t addr, int32_t r) {
  239. // movss xmm/m32, xmm
  240. task_switch_test_mmx();
  241. union reg128 data = read_xmm128s(r);
  242. safe_write32(addr, data.u32[0]);
  243. }
  244. void instr_660F11_reg(int32_t r1, int32_t r2) {
  245. // movupd xmm/m128, xmm
  246. mov_r_r128(r1, r2);
  247. }
  248. void instr_660F11_mem(int32_t addr, int32_t r) {
  249. // movupd xmm/m128, xmm
  250. mov_r_m128(addr, r);
  251. }
  252. void instr_F20F11_reg(int32_t r1, int32_t r2) {
  253. // movsd xmm/m64, xmm
  254. task_switch_test_mmx();
  255. union reg128 data = read_xmm128s(r2);
  256. union reg128 orig = read_xmm128s(r1);
  257. write_xmm128(r1, data.u32[0], data.u32[1], orig.u32[2], orig.u32[3]);
  258. }
  259. void instr_F20F11_mem(int32_t addr, int32_t r) {
  260. // movsd xmm/m64, xmm
  261. task_switch_test_mmx();
  262. union reg64 data = read_xmm64s(r);
  263. safe_write64(addr, data.u64[0]);
  264. }
  265. void instr_0F12_mem(int32_t addr, int32_t r) {
  266. // movlps xmm, m64
  267. task_switch_test_mmx();
  268. union reg64 data = safe_read64s(addr);
  269. union reg128 orig = read_xmm128s(r);
  270. write_xmm128(r, data.u32[0], data.u32[1], orig.u32[2], orig.u32[3]);
  271. }
  272. void instr_0F12_reg(int32_t r1, int32_t r2) {
  273. // movhlps xmm, xmm
  274. task_switch_test_mmx();
  275. union reg128 data = read_xmm128s(r1);
  276. union reg128 orig = read_xmm128s(r2);
  277. write_xmm128(r2, data.u32[2], data.u32[3], orig.u32[2], orig.u32[3]);
  278. }
  279. void instr_660F12_reg(int32_t r1, int32_t r) { trigger_ud(); }
  280. void instr_660F12_mem(int32_t addr, int32_t r) {
  281. // movlpd xmm, m64
  282. task_switch_test_mmx();
  283. union reg64 data = safe_read64s(addr);
  284. write_xmm64(r, data);
  285. }
  286. void instr_F20F12_mem(int32_t addr, int32_t r) { unimplemented_sse(); }
  287. void instr_F20F12_reg(int32_t r1, int32_t r2) { unimplemented_sse(); }
  288. void instr_F30F12_mem(int32_t addr, int32_t r) { unimplemented_sse(); }
  289. void instr_F30F12_reg(int32_t r1, int32_t r2) { unimplemented_sse(); }
  290. void instr_0F13_mem(int32_t addr, int32_t r) {
  291. // movlps m64, xmm
  292. movl_r128_m64(addr, r);
  293. }
  294. void instr_0F13_reg(int32_t r1, int32_t r2) { unimplemented_sse(); }
  295. void instr_660F13_reg(int32_t r1, int32_t r) { trigger_ud(); }
  296. void instr_660F13_mem(int32_t addr, int32_t r) {
  297. // movlpd xmm/m64, xmm
  298. movl_r128_m64(addr, r);
  299. }
  300. void instr_0F14(union reg64 source, int32_t r) {
  301. // unpcklps xmm, xmm/m128
  302. // XXX: Aligned access or #gp
  303. task_switch_test_mmx();
  304. union reg64 destination = read_xmm64s(r);
  305. write_xmm128(
  306. r,
  307. destination.u32[0],
  308. source.u32[0],
  309. destination.u32[1],
  310. source.u32[1]
  311. );
  312. }
  313. DEFINE_SSE_SPLIT(instr_0F14, safe_read64s, read_xmm64s)
  314. void instr_660F14(union reg64 source, int32_t r) {
  315. // unpcklpd xmm, xmm/m128
  316. // XXX: Aligned access or #gp
  317. task_switch_test_mmx();
  318. union reg64 destination = read_xmm64s(r);
  319. write_xmm128(
  320. r,
  321. destination.u32[0],
  322. destination.u32[1],
  323. source.u32[0],
  324. source.u32[1]
  325. );
  326. }
  327. DEFINE_SSE_SPLIT(instr_660F14, safe_read64s, read_xmm64s)
  328. void instr_0F15(union reg128 source, int32_t r) {
  329. // unpckhps xmm, xmm/m128
  330. // XXX: Aligned access or #gp
  331. task_switch_test_mmx();
  332. union reg128 destination = read_xmm128s(r);
  333. write_xmm128(
  334. r,
  335. destination.u32[2],
  336. source.u32[2],
  337. destination.u32[3],
  338. source.u32[3]
  339. );
  340. }
  341. DEFINE_SSE_SPLIT(instr_0F15, safe_read128s, read_xmm128s)
  342. void instr_660F15(union reg128 source, int32_t r) {
  343. // unpckhpd xmm, xmm/m128
  344. // XXX: Aligned access or #gp
  345. task_switch_test_mmx();
  346. union reg128 destination = read_xmm128s(r);
  347. write_xmm128(
  348. r,
  349. destination.u32[2],
  350. destination.u32[3],
  351. source.u32[2],
  352. source.u32[3]
  353. );
  354. }
  355. DEFINE_SSE_SPLIT(instr_660F15, safe_read128s, read_xmm128s)
  356. void instr_0F16_mem(int32_t addr, int32_t r) {
  357. // movhps xmm, m64
  358. movh_m64_r128(addr, r);
  359. }
  360. void instr_0F16_reg(int32_t r1, int32_t r2) {
  361. // movlhps xmm, xmm
  362. task_switch_test_mmx();
  363. union reg128 data = read_xmm128s(r1);
  364. union reg128 orig = read_xmm128s(r2);
  365. write_xmm128(r2, orig.u32[0], orig.u32[1], data.u32[0], data.u32[1]);
  366. }
  367. void instr_660F16_mem(int32_t addr, int32_t r) {
  368. // movhpd xmm, m64
  369. movh_m64_r128(addr, r);
  370. }
  371. void instr_660F16_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  372. void instr_0F17_mem(int32_t addr, int32_t r) {
  373. // movhps m64, xmm
  374. movh_r128_m64(addr, r);
  375. }
  376. void instr_0F17_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  377. void instr_660F17_mem(int32_t addr, int32_t r) {
  378. // movhpd m64, xmm
  379. movh_r128_m64(addr, r);
  380. }
  381. void instr_660F17_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  382. void instr_0F18_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  383. void instr_0F18_mem(int32_t addr, int32_t r) {
  384. // prefetch
  385. // nop for us
  386. }
  387. void instr_0F19() { unimplemented_sse(); }
  388. void instr_0F1A() { unimplemented_sse(); }
  389. void instr_0F1B() { unimplemented_sse(); }
  390. void instr_0F1C() { unimplemented_sse(); }
  391. void instr_0F1D() { unimplemented_sse(); }
  392. void instr_0F1E() { unimplemented_sse(); }
  393. void instr_0F1F_reg(int32_t r1, int32_t r2) {
  394. // multi-byte nop
  395. }
  396. void instr_0F1F_mem(int32_t addr, int32_t r) {
  397. // multi-byte nop
  398. }
  399. void instr_0F20(int32_t r, int32_t creg) {
  400. if(cpl[0])
  401. {
  402. trigger_gp(0);
  403. }
  404. switch(creg)
  405. {
  406. case 0:
  407. write_reg32(r, cr[0]);
  408. break;
  409. case 2:
  410. write_reg32(r, cr[2]);
  411. break;
  412. case 3:
  413. write_reg32(r, cr[3]);
  414. break;
  415. case 4:
  416. write_reg32(r, cr[4]);
  417. break;
  418. default:
  419. dbg_log("%d", creg);
  420. todo();
  421. trigger_ud();
  422. }
  423. }
  424. void instr_0F21(int32_t r, int32_t dreg_index) {
  425. if(cpl[0])
  426. {
  427. trigger_gp(0);
  428. }
  429. if(dreg_index == 4 || dreg_index == 5)
  430. {
  431. if(cr[4] & CR4_DE)
  432. {
  433. dbg_log("#ud mov dreg 4/5 with cr4.DE set");
  434. trigger_ud();
  435. }
  436. else
  437. {
  438. // DR4 and DR5 refer to DR6 and DR7 respectively
  439. dreg_index += 2;
  440. }
  441. }
  442. write_reg32(r, dreg[dreg_index]);
  443. //dbg_log("read dr%d: %x", dreg_index, dreg[dreg_index]);
  444. }
  445. void instr_0F22(int32_t r, int32_t creg) {
  446. if(cpl[0])
  447. {
  448. trigger_gp(0);
  449. }
  450. int32_t data = read_reg32(r);
  451. // mov cr, addr
  452. switch(creg)
  453. {
  454. case 0:
  455. //dbg_log("cr0 <- %x", data);
  456. set_cr0(data);
  457. break;
  458. case 2:
  459. dbg_log("cr2 <- %x", data);
  460. cr[2] = data;
  461. break;
  462. case 3:
  463. //dbg_log("cr3 <- %x", data);
  464. data &= ~0b111111100111;
  465. dbg_assert_message((data & 0xFFF) == 0, "TODO");
  466. cr[3] = data;
  467. clear_tlb();
  468. //dump_page_directory();
  469. break;
  470. case 4:
  471. dbg_log("cr4 <- %d", cr[4]);
  472. if(data & (1 << 11 | 1 << 12 | 1 << 15 | 1 << 16 | 1 << 19 | 0xFFC00000))
  473. {
  474. trigger_gp(0);
  475. }
  476. if((cr[4] ^ data) & CR4_PGE)
  477. {
  478. if(data & CR4_PGE)
  479. {
  480. // The PGE bit has been enabled. The global TLB is
  481. // still empty, so we only have to copy it over
  482. clear_tlb();
  483. }
  484. else
  485. {
  486. // Clear the global TLB
  487. full_clear_tlb();
  488. }
  489. }
  490. cr[4] = data;
  491. page_size_extensions[0] = (cr[4] & CR4_PSE) ? PSE_ENABLED : 0;
  492. if(cr[4] & CR4_PAE)
  493. {
  494. //throw debug.unimpl("PAE");
  495. assert(false);
  496. }
  497. break;
  498. default:
  499. dbg_log("%d", creg);
  500. todo();
  501. trigger_ud();
  502. }
  503. }
  504. void instr_0F23(int32_t r, int32_t dreg_index) {
  505. if(cpl[0])
  506. {
  507. trigger_gp(0);
  508. }
  509. if(dreg_index == 4 || dreg_index == 5)
  510. {
  511. if(cr[4] & CR4_DE)
  512. {
  513. dbg_log("#ud mov dreg 4/5 with cr4.DE set");
  514. trigger_ud();
  515. }
  516. else
  517. {
  518. // DR4 and DR5 refer to DR6 and DR7 respectively
  519. dreg_index += 2;
  520. }
  521. }
  522. dreg[dreg_index] = read_reg32(r);
  523. //dbg_log("write dr%d: %x", dreg_index, dreg[dreg_index]);
  524. }
  525. void instr_0F24() { undefined_instruction(); }
  526. void instr_0F25() { undefined_instruction(); }
  527. void instr_0F26() { undefined_instruction(); }
  528. void instr_0F27() { undefined_instruction(); }
  529. void instr_0F28(union reg128 source, int32_t r) {
  530. // movaps xmm, xmm/m128
  531. // XXX: Aligned read or #gp
  532. mov_rm_r128(source, r);
  533. }
  534. DEFINE_SSE_SPLIT(instr_0F28, safe_read128s, read_xmm128s)
  535. void instr_660F28(union reg128 source, int32_t r) {
  536. // movapd xmm, xmm/m128
  537. // XXX: Aligned read or #gp
  538. // Note: Same as movdqa (660F6F)
  539. mov_rm_r128(source, r);
  540. }
  541. DEFINE_SSE_SPLIT(instr_660F28, safe_read128s, read_xmm128s)
  542. void instr_0F29_mem(int32_t addr, int32_t r) {
  543. // movaps m128, xmm
  544. task_switch_test_mmx();
  545. union reg128 data = read_xmm128s(r);
  546. // XXX: Aligned write or #gp
  547. safe_write128(addr, data);
  548. }
  549. void instr_0F29_reg(int32_t r1, int32_t r2) {
  550. // movaps xmm, xmm
  551. mov_r_r128(r1, r2);
  552. }
  553. void instr_660F29_mem(int32_t addr, int32_t r) {
  554. // movapd m128, xmm
  555. task_switch_test_mmx();
  556. union reg128 data = read_xmm128s(r);
  557. // XXX: Aligned write or #gp
  558. safe_write128(addr, data);
  559. }
  560. void instr_660F29_reg(int32_t r1, int32_t r2) {
  561. // movapd xmm, xmm
  562. mov_r_r128(r1, r2);
  563. }
  564. void instr_0F2A() { unimplemented_sse(); }
  565. void instr_0F2B_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  566. void instr_0F2B_mem(int32_t addr, int32_t r) {
  567. // movntps m128, xmm
  568. // XXX: Aligned write or #gp
  569. mov_r_m128(addr, r);
  570. }
  571. void instr_660F2B_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  572. void instr_660F2B_mem(int32_t addr, int32_t r) {
  573. // movntpd m128, xmm
  574. // XXX: Aligned write or #gp
  575. mov_r_m128(addr, r);
  576. }
  577. void instr_0F2C_mem(int32_t addr, int32_t r) { unimplemented_sse(); }
  578. void instr_0F2C_reg(int32_t r1, int32_t r2) { unimplemented_sse(); }
  579. void instr_660F2C_mem(int32_t addr, int32_t r) { unimplemented_sse(); }
  580. void instr_660F2C_reg(int32_t r1, int32_t r2) { unimplemented_sse(); }
  581. void instr_F20F2C(union reg64 source, int32_t r) {
  582. // cvttsd2si r32, xmm/m64
  583. // emscripten bug causes this ported instruction to throw "integer result unpresentable"
  584. // https://github.com/kripken/emscripten/issues/5433
  585. task_switch_test_mmx();
  586. #if 0
  587. union reg64 source = read_xmm_mem64s();
  588. double f = source.f64[0];
  589. if(f <= 0x7FFFFFFF && f >= -0x80000000)
  590. {
  591. int32_t si = (int32_t) f;
  592. write_g32(si);
  593. }
  594. else
  595. {
  596. write_g32(0x80000000);
  597. }
  598. #else
  599. write_reg32(r, convert_f64_to_i32(source.f64[0]));
  600. #endif
  601. }
  602. DEFINE_SSE_SPLIT(instr_F20F2C, safe_read64s, read_xmm64s)
  603. void instr_F30F2C_mem(int32_t addr, int32_t r) { unimplemented_sse(); }
  604. void instr_F30F2C_reg(int32_t r1, int32_t r2) { unimplemented_sse(); }
  605. void instr_0F2D() { unimplemented_sse(); }
  606. void instr_0F2E() { unimplemented_sse(); }
  607. void instr_0F2F() { unimplemented_sse(); }
  608. // wrmsr
  609. void instr_0F30() {
  610. // wrmsr - write maschine specific register
  611. if(cpl[0])
  612. {
  613. trigger_gp(0);
  614. }
  615. int32_t index = reg32s[ECX];
  616. int32_t low = reg32s[EAX];
  617. int32_t high = reg32s[EDX];
  618. if(index != IA32_SYSENTER_ESP)
  619. {
  620. dbg_log("wrmsr ecx=%x data=%x:%x", index, high, low);
  621. }
  622. switch(index)
  623. {
  624. case IA32_SYSENTER_CS:
  625. sysenter_cs[0] = low & 0xFFFF;
  626. break;
  627. case IA32_SYSENTER_EIP:
  628. sysenter_eip[0] = low;
  629. break;
  630. case IA32_SYSENTER_ESP:
  631. sysenter_esp[0] = low;
  632. break;
  633. case IA32_APIC_BASE_MSR:
  634. {
  635. dbg_assert_message(high == 0, "Changing APIC address (high 32 bits) not supported");
  636. int32_t address = low & ~(IA32_APIC_BASE_BSP | IA32_APIC_BASE_EXTD | IA32_APIC_BASE_EN);
  637. dbg_assert_message(address == APIC_ADDRESS, "Changing APIC address not supported");
  638. dbg_assert_message((low & IA32_APIC_BASE_EXTD) == 0, "x2apic not supported");
  639. *apic_enabled = (low & IA32_APIC_BASE_EN) == IA32_APIC_BASE_EN;
  640. }
  641. break;
  642. case IA32_TIME_STAMP_COUNTER:
  643. set_tsc(low, high);
  644. break;
  645. case IA32_BIOS_SIGN_ID:
  646. break;
  647. case MSR_MISC_FEATURE_ENABLES:
  648. // Linux 4, see: https://patchwork.kernel.org/patch/9528279/
  649. break;
  650. case IA32_MISC_ENABLE: // Enable Misc. Processor Features
  651. break;
  652. case IA32_MCG_CAP:
  653. // netbsd
  654. break;
  655. case IA32_KERNEL_GS_BASE:
  656. // Only used in 64 bit mode (by SWAPGS), but set by kvm-unit-test
  657. dbg_log("GS Base written");
  658. break;
  659. default:
  660. dbg_log("Unknown msr: %x", index);
  661. assert(false);
  662. }
  663. }
  664. void instr_0F31() {
  665. // rdtsc - read timestamp counter
  666. if(!cpl[0] || !(cr[4] & CR4_TSD))
  667. {
  668. uint64_t tsc = read_tsc();
  669. reg32s[EAX] = tsc;
  670. reg32s[EDX] = tsc >> 32;
  671. //dbg_log("rdtsc edx:eax=%x:%x", reg32s[EDX], reg32s[EAX]);
  672. }
  673. else
  674. {
  675. trigger_gp(0);
  676. }
  677. }
  678. void instr_0F32() {
  679. // rdmsr - read maschine specific register
  680. if(cpl[0])
  681. {
  682. trigger_gp(0);
  683. }
  684. int32_t index = reg32s[ECX];
  685. dbg_log("rdmsr ecx=%x", index);
  686. int32_t low = 0;
  687. int32_t high = 0;
  688. switch(index)
  689. {
  690. case IA32_SYSENTER_CS:
  691. low = sysenter_cs[0];
  692. break;
  693. case IA32_SYSENTER_EIP:
  694. low = sysenter_eip[0];
  695. break;
  696. case IA32_SYSENTER_ESP:
  697. low = sysenter_esp[0];
  698. break;
  699. case IA32_TIME_STAMP_COUNTER:
  700. {
  701. uint64_t tsc = read_tsc();
  702. low = tsc;
  703. high = tsc >> 32;
  704. }
  705. break;
  706. case IA32_PLATFORM_ID:
  707. break;
  708. case IA32_APIC_BASE_MSR:
  709. if(ENABLE_ACPI)
  710. {
  711. low = APIC_ADDRESS;
  712. if(*apic_enabled)
  713. {
  714. low |= IA32_APIC_BASE_EN;
  715. }
  716. }
  717. break;
  718. case IA32_BIOS_SIGN_ID:
  719. break;
  720. case MSR_PLATFORM_INFO:
  721. low = 1 << 8;
  722. break;
  723. case MSR_MISC_FEATURE_ENABLES:
  724. break;
  725. case IA32_MISC_ENABLE: // Enable Misc. Processor Features
  726. low = 1 << 0; // fast string
  727. break;
  728. case IA32_RTIT_CTL:
  729. // linux4
  730. break;
  731. case MSR_SMI_COUNT:
  732. break;
  733. case IA32_MCG_CAP:
  734. // netbsd
  735. break;
  736. case MSR_PKG_C2_RESIDENCY:
  737. break;
  738. default:
  739. dbg_log("Unknown msr: %x", index);
  740. assert(false);
  741. }
  742. reg32s[EAX] = low;
  743. reg32s[EDX] = high;
  744. }
  745. void instr_0F33() {
  746. // rdpmc
  747. todo();
  748. }
  749. void instr_0F34() {
  750. // sysenter
  751. int32_t seg = sysenter_cs[0] & 0xFFFC;
  752. if(!protected_mode[0] || seg == 0)
  753. {
  754. trigger_gp(0);
  755. }
  756. if(CPU_LOG_VERBOSE)
  757. {
  758. //dbg_log("sysenter cs:eip=" + h(seg , 4) + ":" + h(sysenter_eip[0], 8) +
  759. // " ss:esp=" + h(seg + 8, 4) + ":" + h(sysenter_esp[0], 8));
  760. }
  761. flags[0] &= ~FLAG_VM & ~FLAG_INTERRUPT;
  762. instruction_pointer[0] = sysenter_eip[0];
  763. reg32s[ESP] = sysenter_esp[0];
  764. sreg[CS] = seg;
  765. segment_is_null[CS] = 0;
  766. segment_limits[CS] = -1;
  767. segment_offsets[CS] = 0;
  768. update_cs_size(true);
  769. cpl[0] = 0;
  770. cpl_changed();
  771. sreg[SS] = seg + 8;
  772. segment_is_null[SS] = 0;
  773. segment_limits[SS] = -1;
  774. segment_offsets[SS] = 0;
  775. stack_size_32[0] = true;
  776. }
  777. void instr_0F35() {
  778. // sysexit
  779. int32_t seg = sysenter_cs[0] & 0xFFFC;
  780. if(!protected_mode[0] || cpl[0] || seg == 0)
  781. {
  782. trigger_gp(0);
  783. }
  784. if(CPU_LOG_VERBOSE)
  785. {
  786. //dbg_log("sysexit cs:eip=" + h(seg + 16, 4) + ":" + h(reg32s[EDX], 8) +
  787. // " ss:esp=" + h(seg + 24, 4) + ":" + h(reg32s[ECX], 8));
  788. }
  789. instruction_pointer[0] = reg32s[EDX];
  790. reg32s[ESP] = reg32s[ECX];
  791. sreg[CS] = seg + 16 | 3;
  792. segment_is_null[CS] = 0;
  793. segment_limits[CS] = -1;
  794. segment_offsets[CS] = 0;
  795. update_cs_size(true);
  796. cpl[0] = 3;
  797. cpl_changed();
  798. sreg[SS] = seg + 24 | 3;
  799. segment_is_null[SS] = 0;
  800. segment_limits[SS] = -1;
  801. segment_offsets[SS] = 0;
  802. stack_size_32[0] = true;
  803. }
  804. void instr_0F36() { undefined_instruction(); }
  805. void instr_0F37() {
  806. // getsec
  807. todo();
  808. }
  809. // sse3+
  810. void instr_0F38() { unimplemented_sse(); }
  811. void instr_0F39() { unimplemented_sse(); }
  812. void instr_0F3A() { unimplemented_sse(); }
  813. void instr_0F3B() { unimplemented_sse(); }
  814. void instr_0F3C() { unimplemented_sse(); }
  815. void instr_0F3D() { unimplemented_sse(); }
  816. void instr_0F3E() { unimplemented_sse(); }
  817. void instr_0F3F() { unimplemented_sse(); }
  818. // cmov
  819. DEFINE_MODRM_INSTR_READ16(instr16_0F40, cmovcc16( test_o(), ___, r))
  820. DEFINE_MODRM_INSTR_READ32(instr32_0F40, cmovcc32( test_o(), ___, r))
  821. DEFINE_MODRM_INSTR_READ16(instr16_0F41, cmovcc16(!test_o(), ___, r))
  822. DEFINE_MODRM_INSTR_READ32(instr32_0F41, cmovcc32(!test_o(), ___, r))
  823. DEFINE_MODRM_INSTR_READ16(instr16_0F42, cmovcc16( test_b(), ___, r))
  824. DEFINE_MODRM_INSTR_READ32(instr32_0F42, cmovcc32( test_b(), ___, r))
  825. DEFINE_MODRM_INSTR_READ16(instr16_0F43, cmovcc16(!test_b(), ___, r))
  826. DEFINE_MODRM_INSTR_READ32(instr32_0F43, cmovcc32(!test_b(), ___, r))
  827. DEFINE_MODRM_INSTR_READ16(instr16_0F44, cmovcc16( test_z(), ___, r))
  828. DEFINE_MODRM_INSTR_READ32(instr32_0F44, cmovcc32( test_z(), ___, r))
  829. DEFINE_MODRM_INSTR_READ16(instr16_0F45, cmovcc16(!test_z(), ___, r))
  830. DEFINE_MODRM_INSTR_READ32(instr32_0F45, cmovcc32(!test_z(), ___, r))
  831. DEFINE_MODRM_INSTR_READ16(instr16_0F46, cmovcc16( test_be(), ___, r))
  832. DEFINE_MODRM_INSTR_READ32(instr32_0F46, cmovcc32( test_be(), ___, r))
  833. DEFINE_MODRM_INSTR_READ16(instr16_0F47, cmovcc16(!test_be(), ___, r))
  834. DEFINE_MODRM_INSTR_READ32(instr32_0F47, cmovcc32(!test_be(), ___, r))
  835. DEFINE_MODRM_INSTR_READ16(instr16_0F48, cmovcc16( test_s(), ___, r))
  836. DEFINE_MODRM_INSTR_READ32(instr32_0F48, cmovcc32( test_s(), ___, r))
  837. DEFINE_MODRM_INSTR_READ16(instr16_0F49, cmovcc16(!test_s(), ___, r))
  838. DEFINE_MODRM_INSTR_READ32(instr32_0F49, cmovcc32(!test_s(), ___, r))
  839. DEFINE_MODRM_INSTR_READ16(instr16_0F4A, cmovcc16( test_p(), ___, r))
  840. DEFINE_MODRM_INSTR_READ32(instr32_0F4A, cmovcc32( test_p(), ___, r))
  841. DEFINE_MODRM_INSTR_READ16(instr16_0F4B, cmovcc16(!test_p(), ___, r))
  842. DEFINE_MODRM_INSTR_READ32(instr32_0F4B, cmovcc32(!test_p(), ___, r))
  843. DEFINE_MODRM_INSTR_READ16(instr16_0F4C, cmovcc16( test_l(), ___, r))
  844. DEFINE_MODRM_INSTR_READ32(instr32_0F4C, cmovcc32( test_l(), ___, r))
  845. DEFINE_MODRM_INSTR_READ16(instr16_0F4D, cmovcc16(!test_l(), ___, r))
  846. DEFINE_MODRM_INSTR_READ32(instr32_0F4D, cmovcc32(!test_l(), ___, r))
  847. DEFINE_MODRM_INSTR_READ16(instr16_0F4E, cmovcc16( test_le(), ___, r))
  848. DEFINE_MODRM_INSTR_READ32(instr32_0F4E, cmovcc32( test_le(), ___, r))
  849. DEFINE_MODRM_INSTR_READ16(instr16_0F4F, cmovcc16(!test_le(), ___, r))
  850. DEFINE_MODRM_INSTR_READ32(instr32_0F4F, cmovcc32(!test_le(), ___, r))
  851. void instr_0F50_reg(int32_t r1, int32_t r2) {
  852. // movmskps r, xmm
  853. task_switch_test_mmx();
  854. union reg128 source = read_xmm128s(r1);
  855. int32_t data = source.u32[0] >> 31 | (source.u32[1] >> 31) << 1 |
  856. (source.u32[2] >> 31) << 2 | (source.u32[3] >> 31) << 3;
  857. write_reg32(r2, data);
  858. }
  859. void instr_0F50_mem(int32_t addr, int32_t r1) { trigger_ud(); }
  860. void instr_660F50_reg(int32_t r1, int32_t r2) {
  861. // movmskpd r, xmm
  862. task_switch_test_mmx();
  863. union reg128 source = read_xmm128s(r1);
  864. int32_t data = (source.u32[1] >> 31) | (source.u32[3] >> 31) << 1;
  865. write_reg32(r2, data);
  866. }
  867. void instr_660F50_mem(int32_t addr, int32_t r1) { trigger_ud(); }
  868. void instr_0F51() { unimplemented_sse(); }
  869. void instr_0F52() { unimplemented_sse(); }
  870. void instr_0F53() { unimplemented_sse(); }
  871. void instr_0F54(union reg128 source, int32_t r) {
  872. // andps xmm, xmm/mem128
  873. // XXX: Aligned access or #gp
  874. pand_r128(source, r);
  875. }
  876. DEFINE_SSE_SPLIT(instr_0F54, safe_read128s, read_xmm128s)
  877. void instr_660F54(union reg128 source, int32_t r) {
  878. // andpd xmm, xmm/mem128
  879. // XXX: Aligned access or #gp
  880. pand_r128(source, r);
  881. }
  882. DEFINE_SSE_SPLIT(instr_660F54, safe_read128s, read_xmm128s)
  883. void instr_0F55(union reg128 source, int32_t r) {
  884. // andnps xmm, xmm/mem128
  885. // XXX: Aligned access or #gp
  886. pandn_r128(source, r);
  887. }
  888. DEFINE_SSE_SPLIT(instr_0F55, safe_read128s, read_xmm128s)
  889. void instr_660F55(union reg128 source, int32_t r) {
  890. // andnpd xmm, xmm/mem128
  891. // XXX: Aligned access or #gp
  892. pandn_r128(source, r);
  893. }
  894. DEFINE_SSE_SPLIT(instr_660F55, safe_read128s, read_xmm128s)
  895. void instr_0F56(union reg128 source, int32_t r) {
  896. // orps xmm, xmm/mem128
  897. // XXX: Aligned access or #gp
  898. por_r128(source, r);
  899. }
  900. DEFINE_SSE_SPLIT(instr_0F56, safe_read128s, read_xmm128s)
  901. void instr_660F56(union reg128 source, int32_t r) {
  902. // orpd xmm, xmm/mem128
  903. // XXX: Aligned access or #gp
  904. por_r128(source, r);
  905. }
  906. DEFINE_SSE_SPLIT(instr_660F56, safe_read128s, read_xmm128s)
  907. void instr_0F57(union reg128 source, int32_t r) {
  908. // xorps xmm, xmm/mem128
  909. // XXX: Aligned access or #gp
  910. pxor_r128(source, r);
  911. }
  912. DEFINE_SSE_SPLIT(instr_0F57, safe_read128s, read_xmm128s)
  913. void instr_660F57(union reg128 source, int32_t r) {
  914. // xorpd xmm, xmm/mem128
  915. // XXX: Aligned access or #gp
  916. pxor_r128(source, r);
  917. }
  918. DEFINE_SSE_SPLIT(instr_660F57, safe_read128s, read_xmm128s)
  919. void instr_0F58() { unimplemented_sse(); }
  920. void instr_0F59() { unimplemented_sse(); }
  921. void instr_0F5A() { unimplemented_sse(); }
  922. void instr_0F5B() { unimplemented_sse(); }
  923. void instr_0F5C() { unimplemented_sse(); }
  924. void instr_0F5D() { unimplemented_sse(); }
  925. void instr_0F5E() { unimplemented_sse(); }
  926. void instr_0F5F() { unimplemented_sse(); }
  927. void instr_0F60(int32_t source, int32_t r) {
  928. // punpcklbw mm, mm/m32
  929. task_switch_test_mmx();
  930. union reg64 destination = read_mmx64s(r);
  931. int32_t byte0 = destination.u8[0];
  932. int32_t byte1 = source & 0xFF;
  933. int32_t byte2 = destination.u8[1];
  934. int32_t byte3 = (source >> 8) & 0xFF;
  935. int32_t byte4 = destination.u8[2];
  936. int32_t byte5 = (source >> 16) & 0xFF;
  937. int32_t byte6 = destination.u8[3];
  938. int32_t byte7 = source >> 24;
  939. int32_t low = byte0 | byte1 << 8 | byte2 << 16 | byte3 << 24;
  940. int32_t high = byte4 | byte5 << 8 | byte6 << 16 | byte7 << 24;
  941. write_mmx64(r, low, high);
  942. }
  943. DEFINE_SSE_SPLIT(instr_0F60, safe_read32s, read_mmx32s)
  944. void instr_660F60(union reg64 source, int32_t r) {
  945. // punpcklbw xmm, xmm/m128
  946. // XXX: Aligned access or #gp
  947. task_switch_test_mmx();
  948. union reg64 destination = read_xmm64s(r);
  949. write_xmm128(
  950. r,
  951. destination.u8[0] | source.u8[0] << 8 | destination.u8[1] << 16 | source.u8[1] << 24,
  952. destination.u8[2] | source.u8[2] << 8 | destination.u8[3] << 16 | source.u8[3] << 24,
  953. destination.u8[4] | source.u8[4] << 8 | destination.u8[5] << 16 | source.u8[5] << 24,
  954. destination.u8[6] | source.u8[6] << 8 | destination.u8[7] << 16 | source.u8[7] << 24
  955. );
  956. }
  957. DEFINE_SSE_SPLIT(instr_660F60, safe_read64s, read_xmm64s)
  958. void instr_0F61(int32_t source, int32_t r) {
  959. // punpcklwd mm, mm/m32
  960. task_switch_test_mmx();
  961. union reg64 destination = read_mmx64s(r);
  962. int32_t word0 = destination.u16[0];
  963. int32_t word1 = source & 0xFFFF;
  964. int32_t word2 = destination.u16[1];
  965. int32_t word3 = source >> 16;
  966. int32_t low = word0 | word1 << 16;
  967. int32_t high = word2 | word3 << 16;
  968. write_mmx64(r, low, high);
  969. }
  970. DEFINE_SSE_SPLIT(instr_0F61, safe_read32s, read_mmx32s)
  971. void instr_660F61(union reg64 source, int32_t r) {
  972. // punpcklwd xmm, xmm/m128
  973. // XXX: Aligned access or #gp
  974. task_switch_test_mmx();
  975. union reg64 destination = read_xmm64s(r);
  976. write_xmm128(
  977. r,
  978. destination.u16[0] | source.u16[0] << 16,
  979. destination.u16[1] | source.u16[1] << 16,
  980. destination.u16[2] | source.u16[2] << 16,
  981. destination.u16[3] | source.u16[3] << 16
  982. );
  983. }
  984. DEFINE_SSE_SPLIT(instr_660F61, safe_read64s, read_xmm64s)
  985. void instr_0F62(int32_t source, int32_t r) {
  986. // punpckldq mm, mm/m32
  987. task_switch_test_mmx();
  988. union reg64 destination = read_mmx64s(r);
  989. write_mmx64(r, destination.u32[0], source);
  990. }
  991. DEFINE_SSE_SPLIT(instr_0F62, safe_read32s, read_mmx32s)
  992. void instr_660F62(union reg128 source, int32_t r) {
  993. // punpckldq xmm, xmm/m128
  994. // XXX: Aligned access or #gp
  995. task_switch_test_mmx();
  996. union reg128 destination = read_xmm128s(r);
  997. write_xmm128(
  998. r,
  999. destination.u32[0],
  1000. source.u32[0],
  1001. destination.u32[1],
  1002. source.u32[1]
  1003. );
  1004. }
  1005. DEFINE_SSE_SPLIT(instr_660F62, safe_read128s, read_xmm128s)
  1006. void instr_0F63(union reg64 source, int32_t r) {
  1007. // packsswb mm, mm/m64
  1008. task_switch_test_mmx();
  1009. union reg64 destination = read_mmx64s(r);
  1010. int32_t low = saturate_sw_to_sb(destination.u16[0]) |
  1011. saturate_sw_to_sb(destination.u16[1]) << 8 |
  1012. saturate_sw_to_sb(destination.u16[2]) << 16 |
  1013. saturate_sw_to_sb(destination.u16[3]) << 24;
  1014. int32_t high = saturate_sw_to_sb(source.u16[0]) |
  1015. saturate_sw_to_sb(source.u16[1]) << 8 |
  1016. saturate_sw_to_sb(source.u16[2]) << 16 |
  1017. saturate_sw_to_sb(source.u16[3]) << 24;
  1018. write_mmx64(r, low, high);
  1019. }
  1020. DEFINE_SSE_SPLIT(instr_0F63, safe_read64s, read_mmx64s)
  1021. void instr_660F63(union reg128 source, int32_t r) {
  1022. // packsswb xmm, xmm/m128
  1023. // XXX: Aligned access or #gp
  1024. task_switch_test_mmx();
  1025. union reg128 destination = read_xmm128s(r);
  1026. int32_t dword0 = saturate_sw_to_sb(destination.u16[0]) |
  1027. saturate_sw_to_sb(destination.u16[1]) << 8 |
  1028. saturate_sw_to_sb(destination.u16[2]) << 16 |
  1029. saturate_sw_to_sb(destination.u16[3]) << 24;
  1030. int32_t dword1 = saturate_sw_to_sb(destination.u16[4]) |
  1031. saturate_sw_to_sb(destination.u16[5]) << 8 |
  1032. saturate_sw_to_sb(destination.u16[6]) << 16 |
  1033. saturate_sw_to_sb(destination.u16[7]) << 24;
  1034. int32_t dword2 = saturate_sw_to_sb(source.u16[0]) |
  1035. saturate_sw_to_sb(source.u16[1]) << 8 |
  1036. saturate_sw_to_sb(source.u16[2]) << 16 |
  1037. saturate_sw_to_sb(source.u16[3]) << 24;
  1038. int32_t dword3 = saturate_sw_to_sb(source.u16[4]) |
  1039. saturate_sw_to_sb(source.u16[5]) << 8 |
  1040. saturate_sw_to_sb(source.u16[6]) << 16 |
  1041. saturate_sw_to_sb(source.u16[7]) << 24;
  1042. write_xmm128(r, dword0, dword1, dword2, dword3);
  1043. }
  1044. DEFINE_SSE_SPLIT(instr_660F63, safe_read128s, read_xmm128s)
  1045. void instr_0F64(union reg64 source, int32_t r) {
  1046. // pcmpgtb mm, mm/m64
  1047. task_switch_test_mmx();
  1048. union reg64 destination = read_mmx64s(r);
  1049. union reg64 result = { { 0 } };
  1050. for(uint32_t i = 0; i < 8; i++)
  1051. {
  1052. result.u8[i] = destination.i8[i] > source.i8[i] ? 0xFF : 0;
  1053. }
  1054. write_mmx_reg64(r, result);
  1055. }
  1056. DEFINE_SSE_SPLIT(instr_0F64, safe_read64s, read_mmx64s)
  1057. void instr_660F64(union reg128 source, int32_t r) {
  1058. // pcmpgtb xmm, xmm/m128
  1059. // XXX: Aligned access or #gp
  1060. task_switch_test_mmx();
  1061. union reg128 destination = read_xmm128s(r);
  1062. union reg128 result = { { 0 } };
  1063. for(int32_t i = 0; i < 16; i++)
  1064. {
  1065. result.i8[i] = destination.i8[i] > source.i8[i] ? 0xFF : 0;
  1066. }
  1067. write_xmm_reg128(r, result);
  1068. }
  1069. DEFINE_SSE_SPLIT(instr_660F64, safe_read128s, read_xmm128s)
  1070. void instr_0F65(union reg64 source, int32_t r) {
  1071. // pcmpgtw mm, mm/m64
  1072. task_switch_test_mmx();
  1073. union reg64 destination = read_mmx64s(r);
  1074. int32_t word0 = destination.i16[0] > source.i16[0] ? 0xFFFF : 0;
  1075. int32_t word1 = destination.i16[1] > source.i16[1] ? 0xFFFF : 0;
  1076. int32_t word2 = destination.i16[2] > source.i16[2] ? 0xFFFF : 0;
  1077. int32_t word3 = destination.i16[3] > source.i16[3] ? 0xFFFF : 0;
  1078. int32_t low = word0 | word1 << 16;
  1079. int32_t high = word2 | word3 << 16;
  1080. write_mmx64(r, low, high);
  1081. }
  1082. DEFINE_SSE_SPLIT(instr_0F65, safe_read64s, read_mmx64s)
  1083. void instr_660F65(union reg128 source, int32_t r) {
  1084. // pcmpgtw xmm, xmm/m128
  1085. // XXX: Aligned access or #gp
  1086. task_switch_test_mmx();
  1087. union reg128 destination = read_xmm128s(r);
  1088. union reg128 result = { { 0 } };
  1089. for(int32_t i = 0; i < 8; i++)
  1090. {
  1091. result.u16[i] = destination.i16[i] > source.i16[i] ? 0xFFFF : 0;
  1092. }
  1093. write_xmm_reg128(r, result);
  1094. }
  1095. DEFINE_SSE_SPLIT(instr_660F65, safe_read128s, read_xmm128s)
  1096. void instr_0F66(union reg64 source, int32_t r) {
  1097. // pcmpgtd mm, mm/m64
  1098. task_switch_test_mmx();
  1099. union reg64 destination = read_mmx64s(r);
  1100. int32_t low = destination.i32[0] > source.i32[0] ? -1 : 0;
  1101. int32_t high = destination.i32[1] > source.i32[1] ? -1 : 0;
  1102. write_mmx64(r, low, high);
  1103. }
  1104. DEFINE_SSE_SPLIT(instr_0F66, safe_read64s, read_mmx64s)
  1105. void instr_660F66(union reg128 source, int32_t r) {
  1106. // pcmpgtd xmm, xmm/m128
  1107. // XXX: Aligned access or #gp
  1108. task_switch_test_mmx();
  1109. union reg128 destination = read_xmm128s(r);
  1110. write_xmm128(
  1111. r,
  1112. destination.i32[0] > source.i32[0] ? -1 : 0,
  1113. destination.i32[1] > source.i32[1] ? -1 : 0,
  1114. destination.i32[2] > source.i32[2] ? -1 : 0,
  1115. destination.i32[3] > source.i32[3] ? -1 : 0
  1116. );
  1117. }
  1118. DEFINE_SSE_SPLIT(instr_660F66, safe_read128s, read_xmm128s)
  1119. void instr_0F67(union reg64 source, int32_t r) {
  1120. // packuswb mm, mm/m64
  1121. task_switch_test_mmx();
  1122. union reg64 destination = read_mmx64s(r);
  1123. uint32_t low = saturate_sw_to_ub(destination.u16[0]) |
  1124. saturate_sw_to_ub(destination.u16[1]) << 8 |
  1125. saturate_sw_to_ub(destination.u16[2]) << 16 |
  1126. saturate_sw_to_ub(destination.u16[3]) << 24;
  1127. uint32_t high = saturate_sw_to_ub(source.u16[0]) |
  1128. saturate_sw_to_ub(source.u16[1]) << 8 |
  1129. saturate_sw_to_ub(source.u16[2]) << 16 |
  1130. saturate_sw_to_ub(source.u16[3]) << 24;
  1131. write_mmx64(r, low, high);
  1132. }
  1133. DEFINE_SSE_SPLIT(instr_0F67, safe_read64s, read_mmx64s)
  1134. void instr_660F67(union reg128 source, int32_t r) {
  1135. // packuswb xmm, xmm/m128
  1136. // XXX: Aligned access or #gp
  1137. task_switch_test_mmx();
  1138. union reg128 destination = read_xmm128s(r);
  1139. union reg128 result;
  1140. for(int32_t i = 0; i < 8; i++)
  1141. {
  1142. result.u8[i] = saturate_sw_to_ub(destination.u16[i]);
  1143. result.u8[i | 8] = saturate_sw_to_ub(source.u16[i]);
  1144. }
  1145. write_xmm_reg128(r, result);
  1146. }
  1147. DEFINE_SSE_SPLIT(instr_660F67, safe_read128s, read_xmm128s)
  1148. void instr_0F68(union reg64 source, int32_t r) {
  1149. // punpckhbw mm, mm/m64
  1150. task_switch_test_mmx();
  1151. union reg64 destination = read_mmx64s(r);
  1152. int32_t byte0 = destination.u8[4];
  1153. int32_t byte1 = source.u8[4];
  1154. int32_t byte2 = destination.u8[5];
  1155. int32_t byte3 = source.u8[5];
  1156. int32_t byte4 = destination.u8[6];
  1157. int32_t byte5 = source.u8[6];
  1158. int32_t byte6 = destination.u8[7];
  1159. int32_t byte7 = source.u8[7];
  1160. int32_t low = byte0 | byte1 << 8 | byte2 << 16 | byte3 << 24;
  1161. int32_t high = byte4 | byte5 << 8 | byte6 << 16 | byte7 << 24;
  1162. write_mmx64(r, low, high);
  1163. }
  1164. DEFINE_SSE_SPLIT(instr_0F68, safe_read64s, read_mmx64s)
  1165. void instr_660F68(union reg128 source, int32_t r) {
  1166. // punpckhbw xmm, xmm/m128
  1167. // XXX: Aligned access or #gp
  1168. task_switch_test_mmx();
  1169. union reg128 destination = read_xmm128s(r);
  1170. write_xmm128(
  1171. r,
  1172. destination.u8[ 8] | source.u8[ 8] << 8 | destination.u8[ 9] << 16 | source.u8[ 9] << 24,
  1173. destination.u8[10] | source.u8[10] << 8 | destination.u8[11] << 16 | source.u8[11] << 24,
  1174. destination.u8[12] | source.u8[12] << 8 | destination.u8[13] << 16 | source.u8[13] << 24,
  1175. destination.u8[14] | source.u8[14] << 8 | destination.u8[15] << 16 | source.u8[15] << 24
  1176. );
  1177. }
  1178. DEFINE_SSE_SPLIT(instr_660F68, safe_read128s, read_xmm128s)
  1179. void instr_0F69(union reg64 source, int32_t r) {
  1180. // punpckhwd mm, mm/m64
  1181. task_switch_test_mmx();
  1182. union reg64 destination = read_mmx64s(r);
  1183. int32_t word0 = destination.u16[2];
  1184. int32_t word1 = source.u16[2];
  1185. int32_t word2 = destination.u16[3];
  1186. int32_t word3 = source.u16[3];
  1187. int32_t low = word0 | word1 << 16;
  1188. int32_t high = word2 | word3 << 16;
  1189. write_mmx64(r, low, high);
  1190. }
  1191. DEFINE_SSE_SPLIT(instr_0F69, safe_read64s, read_mmx64s)
  1192. void instr_660F69(union reg128 source, int32_t r) {
  1193. // punpckhwd xmm, xmm/m128
  1194. // XXX: Aligned access or #gp
  1195. task_switch_test_mmx();
  1196. union reg128 destination = read_xmm128s(r);
  1197. int32_t dword0 = destination.u16[4] | source.u16[4] << 16;
  1198. int32_t dword1 = destination.u16[5] | source.u16[5] << 16;
  1199. int32_t dword2 = destination.u16[6] | source.u16[6] << 16;
  1200. int32_t dword3 = destination.u16[7] | source.u16[7] << 16;
  1201. write_xmm128(r, dword0, dword1, dword2, dword3);
  1202. }
  1203. DEFINE_SSE_SPLIT(instr_660F69, safe_read128s, read_xmm128s)
  1204. void instr_0F6A(union reg64 source, int32_t r) {
  1205. // punpckhdq mm, mm/m64
  1206. task_switch_test_mmx();
  1207. union reg64 destination = read_mmx64s(r);
  1208. write_mmx64(r, destination.u32[1], source.u32[1]);
  1209. }
  1210. DEFINE_SSE_SPLIT(instr_0F6A, safe_read64s, read_mmx64s)
  1211. void instr_660F6A(union reg128 source, int32_t r) {
  1212. // punpckhdq xmm, xmm/m128
  1213. // XXX: Aligned access or #gp
  1214. task_switch_test_mmx();
  1215. union reg128 destination = read_xmm128s(r);
  1216. write_xmm128(r, destination.u32[2], source.u32[2], destination.u32[3], source.u32[3]);
  1217. }
  1218. DEFINE_SSE_SPLIT(instr_660F6A, safe_read128s, read_xmm128s)
  1219. void instr_0F6B(union reg64 source, int32_t r) {
  1220. // packssdw mm, mm/m64
  1221. task_switch_test_mmx();
  1222. union reg64 destination = read_mmx64s(r);
  1223. int32_t low = saturate_sd_to_sw(destination.u32[0]) |
  1224. saturate_sd_to_sw(destination.u32[1]) << 16;
  1225. int32_t high = saturate_sd_to_sw(source.u32[0]) |
  1226. saturate_sd_to_sw(source.u32[1]) << 16;
  1227. write_mmx64(r, low, high);
  1228. }
  1229. DEFINE_SSE_SPLIT(instr_0F6B, safe_read64s, read_mmx64s)
  1230. void instr_660F6B(union reg128 source, int32_t r) {
  1231. // packssdw xmm, xmm/m128
  1232. // XXX: Aligned access or #gp
  1233. task_switch_test_mmx();
  1234. union reg128 destination = read_xmm128s(r);
  1235. int32_t dword0 = saturate_sd_to_sw(destination.u32[0]) |
  1236. saturate_sd_to_sw(destination.u32[1]) << 16;
  1237. int32_t dword1 = saturate_sd_to_sw(destination.u32[2]) |
  1238. saturate_sd_to_sw(destination.u32[3]) << 16;
  1239. int32_t dword2 = saturate_sd_to_sw(source.u32[0]) |
  1240. saturate_sd_to_sw(source.u32[1]) << 16;
  1241. int32_t dword3 = saturate_sd_to_sw(source.u32[2]) |
  1242. saturate_sd_to_sw(source.u32[3]) << 16;
  1243. write_xmm128(r, dword0, dword1, dword2, dword3);
  1244. }
  1245. DEFINE_SSE_SPLIT(instr_660F6B, safe_read128s, read_xmm128s)
  1246. void instr_0F6C_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1247. void instr_0F6C_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  1248. void instr_660F6C(union reg128 source, int32_t r) {
  1249. // punpcklqdq xmm, xmm/m128
  1250. // XXX: Aligned access or #gp
  1251. task_switch_test_mmx();
  1252. union reg128 destination = read_xmm128s(r);
  1253. write_xmm128(r, destination.u32[0], destination.u32[1], source.u32[0], source.u32[1]);
  1254. }
  1255. DEFINE_SSE_SPLIT(instr_660F6C, safe_read128s, read_xmm128s)
  1256. void instr_0F6D_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1257. void instr_0F6D_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  1258. void instr_660F6D(union reg128 source, int32_t r) {
  1259. // punpckhqdq xmm, xmm/m128
  1260. // XXX: Aligned access or #gp
  1261. task_switch_test_mmx();
  1262. union reg128 destination = read_xmm128s(r);
  1263. write_xmm128(r, destination.u32[2], destination.u32[3], source.u32[2], source.u32[3]);
  1264. }
  1265. DEFINE_SSE_SPLIT(instr_660F6D, safe_read128s, read_xmm128s)
  1266. void instr_0F6E(int32_t source, int32_t r) {
  1267. // movd mm, r/m32
  1268. task_switch_test_mmx();
  1269. write_mmx64(r, source, 0);
  1270. }
  1271. DEFINE_SSE_SPLIT(instr_0F6E, safe_read32s, read_reg32)
  1272. void instr_660F6E(int32_t source, int32_t r) {
  1273. // movd mm, r/m32
  1274. task_switch_test_mmx();
  1275. write_xmm128(r, source, 0, 0, 0);
  1276. }
  1277. DEFINE_SSE_SPLIT(instr_660F6E, safe_read32s, read_reg32)
  1278. void instr_0F6F(union reg64 source, int32_t r) {
  1279. // movq mm, mm/m64
  1280. task_switch_test_mmx();
  1281. write_mmx64(r, source.u32[0], source.u32[1]);
  1282. }
  1283. DEFINE_SSE_SPLIT(instr_0F6F, safe_read64s, read_mmx64s)
  1284. void instr_660F6F(union reg128 source, int32_t r) {
  1285. // movdqa xmm, xmm/mem128
  1286. // XXX: Aligned access or #gp
  1287. // XXX: Aligned read or #gp
  1288. mov_rm_r128(source, r);
  1289. }
  1290. DEFINE_SSE_SPLIT(instr_660F6F, safe_read128s, read_xmm128s)
  1291. void instr_F30F6F(union reg128 source, int32_t r) {
  1292. // movdqu xmm, xmm/m128
  1293. mov_rm_r128(source, r);
  1294. }
  1295. DEFINE_SSE_SPLIT(instr_F30F6F, safe_read128s, read_xmm128s)
  1296. void instr_0F70(union reg64 source, int32_t r, int32_t imm8) {
  1297. // pshufw mm1, mm2/m64, imm8
  1298. task_switch_test_mmx();
  1299. int32_t word0_shift = imm8 & 0b11;
  1300. uint32_t word0 = source.u32[word0_shift >> 1] >> ((word0_shift & 1) << 4) & 0xFFFF;
  1301. int32_t word1_shift = (imm8 >> 2) & 0b11;
  1302. uint32_t word1 = source.u32[word1_shift >> 1] >> ((word1_shift & 1) << 4);
  1303. int32_t low = word0 | word1 << 16;
  1304. int32_t word2_shift = (imm8 >> 4) & 0b11;
  1305. uint32_t word2 = source.u32[word2_shift >> 1] >> ((word2_shift & 1) << 4) & 0xFFFF;
  1306. uint32_t word3_shift = (imm8 >> 6);
  1307. uint32_t word3 = source.u32[word3_shift >> 1] >> ((word3_shift & 1) << 4);
  1308. int32_t high = word2 | word3 << 16;
  1309. write_mmx64(r, low, high);
  1310. }
  1311. DEFINE_SSE_SPLIT_IMM(instr_0F70, safe_read64s, read_mmx64s)
  1312. void instr_660F70(union reg128 source, int32_t r, int32_t imm8) {
  1313. // pshufd xmm, xmm/mem128
  1314. // XXX: Aligned access or #gp
  1315. task_switch_test_mmx();
  1316. write_xmm128(
  1317. r,
  1318. source.u32[imm8 & 3],
  1319. source.u32[imm8 >> 2 & 3],
  1320. source.u32[imm8 >> 4 & 3],
  1321. source.u32[imm8 >> 6 & 3]
  1322. );
  1323. }
  1324. DEFINE_SSE_SPLIT_IMM(instr_660F70, safe_read128s, read_xmm128s)
  1325. void instr_F20F70(union reg128 source, int32_t r, int32_t imm8) {
  1326. // pshuflw xmm, xmm/m128, imm8
  1327. // XXX: Aligned access or #gp
  1328. task_switch_test_mmx();
  1329. write_xmm128(
  1330. r,
  1331. source.u16[imm8 & 3] | source.u16[imm8 >> 2 & 3] << 16,
  1332. source.u16[imm8 >> 4 & 3] | source.u16[imm8 >> 6 & 3] << 16,
  1333. source.u32[2],
  1334. source.u32[3]
  1335. );
  1336. }
  1337. DEFINE_SSE_SPLIT_IMM(instr_F20F70, safe_read128s, read_xmm128s)
  1338. void instr_F30F70(union reg128 source, int32_t r, int32_t imm8) {
  1339. // pshufhw xmm, xmm/m128, imm8
  1340. // XXX: Aligned access or #gp
  1341. task_switch_test_mmx();
  1342. write_xmm128(
  1343. r,
  1344. source.u32[0],
  1345. source.u32[1],
  1346. source.u16[imm8 & 3 | 4] | source.u16[imm8 >> 2 & 3 | 4] << 16,
  1347. source.u16[imm8 >> 4 & 3 | 4] | source.u16[imm8 >> 6 & 3 | 4] << 16
  1348. );
  1349. }
  1350. DEFINE_SSE_SPLIT_IMM(instr_F30F70, safe_read128s, read_xmm128s)
  1351. void instr_0F71_2_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1352. void instr_0F71_4_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1353. void instr_0F71_6_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1354. void instr_0F71_2_reg(int32_t r, int32_t imm8) {
  1355. // psrlw mm, imm8
  1356. psrlw_r64(r, imm8);
  1357. }
  1358. void instr_0F71_4_reg(int32_t r, int32_t imm8) {
  1359. // psraw mm, imm8
  1360. psraw_r64(r, imm8);
  1361. }
  1362. void instr_0F71_6_reg(int32_t r, int32_t imm8) {
  1363. // psllw mm, imm8
  1364. psllw_r64(r, imm8);
  1365. }
  1366. void instr_660F71_2_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1367. void instr_660F71_4_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1368. void instr_660F71_6_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1369. void instr_660F71_2_reg(int32_t r, int32_t imm8) {
  1370. // psrlw xmm, imm8
  1371. psrlw_r128(r, imm8);
  1372. }
  1373. void instr_660F71_4_reg(int32_t r, int32_t imm8) {
  1374. // psraw xmm, imm8
  1375. psraw_r128(r, imm8);
  1376. }
  1377. void instr_660F71_6_reg(int32_t r, int32_t imm8) {
  1378. // psllw xmm, imm8
  1379. psllw_r128(r, imm8);
  1380. }
  1381. void instr_0F72_2_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1382. void instr_0F72_4_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1383. void instr_0F72_6_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1384. void instr_0F72_2_reg(int32_t r, int32_t imm8) {
  1385. // psrld mm, imm8
  1386. psrld_r64(r, imm8);
  1387. }
  1388. void instr_0F72_4_reg(int32_t r, int32_t imm8) {
  1389. // psrad mm, imm8
  1390. psrad_r64(r, imm8);
  1391. }
  1392. void instr_0F72_6_reg(int32_t r, int32_t imm8) {
  1393. // pslld mm, imm8
  1394. pslld_r64(r, imm8);
  1395. }
  1396. void instr_660F72_2_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1397. void instr_660F72_4_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1398. void instr_660F72_6_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1399. void instr_660F72_2_reg(int32_t r, int32_t imm8) {
  1400. // psrld xmm, imm8
  1401. psrld_r128(r, imm8);
  1402. }
  1403. void instr_660F72_4_reg(int32_t r, int32_t imm8) {
  1404. // psrad xmm, imm8
  1405. psrad_r128(r, imm8);
  1406. }
  1407. void instr_660F72_6_reg(int32_t r, int32_t imm8) {
  1408. // pslld xmm, imm8
  1409. pslld_r128(r, imm8);
  1410. }
  1411. void instr_0F73_2_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1412. void instr_0F73_3_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1413. void instr_0F73_3_reg(int32_t addr, int32_t r) { trigger_ud(); }
  1414. void instr_0F73_6_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1415. void instr_0F73_7_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1416. void instr_0F73_7_reg(int32_t addr, int32_t r) { trigger_ud(); }
  1417. void instr_0F73_2_reg(int32_t r, int32_t imm8) {
  1418. // psrlq mm, imm8
  1419. psrlq_r64(r, imm8);
  1420. }
  1421. void instr_0F73_6_reg(int32_t r, int32_t imm8) {
  1422. // psllq mm, imm8
  1423. psllq_r64(r, imm8);
  1424. }
  1425. void instr_660F73_2_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1426. void instr_660F73_3_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1427. void instr_660F73_6_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1428. void instr_660F73_7_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1429. void instr_660F73_2_reg(int32_t r, int32_t imm8) {
  1430. // psrlq xmm, imm8
  1431. psrlq_r128(r, imm8);
  1432. }
  1433. void instr_660F73_3_reg(int32_t r, int32_t imm8) {
  1434. // psrldq xmm, imm8
  1435. task_switch_test_mmx();
  1436. union reg128 destination = read_xmm128s(r);
  1437. if(imm8 == 0)
  1438. {
  1439. return;
  1440. }
  1441. union reg128 result = { { 0 } };
  1442. uint32_t shift = imm8 > 15 ? 128 : imm8 << 3;
  1443. if(shift <= 63)
  1444. {
  1445. result.u64[0] = destination.u64[0] >> shift | destination.u64[1] >> (64 - shift);
  1446. result.u64[1] = destination.u64[1] >> shift;
  1447. }
  1448. else if(shift <= 127)
  1449. {
  1450. result.u64[0] = destination.u64[1] >> (shift - 64);
  1451. result.u64[1] = 0;
  1452. }
  1453. write_xmm_reg128(r, result);
  1454. }
  1455. void instr_660F73_6_reg(int32_t r, int32_t imm8) {
  1456. // psllq xmm, imm8
  1457. psllq_r128(r, imm8);
  1458. }
  1459. void instr_660F73_7_reg(int32_t r, int32_t imm8) {
  1460. // pslldq xmm, imm8
  1461. task_switch_test_mmx();
  1462. union reg128 destination = read_xmm128s(r);
  1463. if(imm8 == 0)
  1464. {
  1465. return;
  1466. }
  1467. union reg128 result = { { 0 } };
  1468. uint32_t shift = imm8 > 15 ? 128 : imm8 << 3;
  1469. if(shift <= 63)
  1470. {
  1471. result.u64[0] = destination.u64[0] << shift;
  1472. result.u64[1] = destination.u64[1] << shift | destination.u64[0] >> (64 - shift);
  1473. }
  1474. else if(shift <= 127)
  1475. {
  1476. result.u64[0] = 0;
  1477. result.u64[1] = destination.u64[0] << (shift - 64);
  1478. }
  1479. write_xmm_reg128(r, result);
  1480. }
  1481. void instr_0F74(union reg64 source, int32_t r) {
  1482. // pcmpeqb mm, mm/m64
  1483. task_switch_test_mmx();
  1484. union reg64 destination = read_mmx64s(r);
  1485. union reg64 result = { { 0 } };
  1486. for(uint32_t i = 0; i < 8; i++)
  1487. {
  1488. result.u8[i] = destination.i8[i] == source.i8[i] ? 0xFF : 0;
  1489. }
  1490. write_mmx_reg64(r, result);
  1491. }
  1492. DEFINE_SSE_SPLIT(instr_0F74, safe_read64s, read_mmx64s)
  1493. void instr_660F74(union reg128 source, int32_t r) {
  1494. // pcmpeqb xmm, xmm/m128
  1495. // XXX: Aligned access or #gp
  1496. task_switch_test_mmx();
  1497. union reg128 destination = read_xmm128s(r);
  1498. union reg128 result;
  1499. for(int32_t i = 0; i < 16; i++)
  1500. {
  1501. result.u8[i] = source.u8[i] == destination.u8[i] ? 0xFF : 0;
  1502. }
  1503. write_xmm_reg128(r, result);
  1504. }
  1505. DEFINE_SSE_SPLIT(instr_660F74, safe_read128s, read_xmm128s)
  1506. void instr_0F75(union reg64 source, int32_t r) {
  1507. // pcmpeqw mm, mm/m64
  1508. task_switch_test_mmx();
  1509. union reg64 destination = read_mmx64s(r);
  1510. int32_t word0 = destination.u16[0] == source.u16[0] ? 0xFFFF : 0;
  1511. int32_t word1 = destination.u16[1] == source.u16[1] ? 0xFFFF : 0;
  1512. int32_t word2 = destination.u16[2] == source.u16[2] ? 0xFFFF : 0;
  1513. int32_t word3 = destination.u16[3] == source.u16[3] ? 0xFFFF : 0;
  1514. int32_t low = word0 | word1 << 16;
  1515. int32_t high = word2 | word3 << 16;
  1516. write_mmx64(r, low, high);
  1517. }
  1518. DEFINE_SSE_SPLIT(instr_0F75, safe_read64s, read_mmx64s)
  1519. void instr_660F75(union reg128 source, int32_t r) {
  1520. // pcmpeqw xmm, xmm/m128
  1521. // XXX: Aligned access or #gp
  1522. task_switch_test_mmx();
  1523. union reg128 destination = read_xmm128s(r);
  1524. union reg128 result;
  1525. for(int32_t i = 0; i < 8; i++)
  1526. {
  1527. result.u16[i] = source.u16[i] == destination.u16[i] ? 0xFFFF : 0;
  1528. }
  1529. write_xmm_reg128(r, result);
  1530. }
  1531. DEFINE_SSE_SPLIT(instr_660F75, safe_read128s, read_xmm128s)
  1532. void instr_0F76(union reg64 source, int32_t r) {
  1533. // pcmpeqd mm, mm/m64
  1534. task_switch_test_mmx();
  1535. union reg64 destination = read_mmx64s(r);
  1536. int32_t low = destination.u32[0] == source.u32[0] ? -1 : 0;
  1537. int32_t high = destination.u32[1] == source.u32[1] ? -1 : 0;
  1538. write_mmx64(r, low, high);
  1539. }
  1540. DEFINE_SSE_SPLIT(instr_0F76, safe_read64s, read_mmx64s)
  1541. void instr_660F76(union reg128 source, int32_t r) {
  1542. // pcmpeqd xmm, xmm/m128
  1543. // XXX: Aligned access or #gp
  1544. task_switch_test_mmx();
  1545. union reg128 destination = read_xmm128s(r);
  1546. write_xmm128(
  1547. r,
  1548. source.u32[0] == destination.u32[0] ? -1 : 0,
  1549. source.u32[1] == destination.u32[1] ? -1 : 0,
  1550. source.u32[2] == destination.u32[2] ? -1 : 0,
  1551. source.u32[3] == destination.u32[3] ? -1 : 0
  1552. );
  1553. }
  1554. DEFINE_SSE_SPLIT(instr_660F76, safe_read128s, read_xmm128s)
  1555. void instr_0F77() {
  1556. // emms
  1557. if(cr[0] & (CR0_EM | CR0_TS)) {
  1558. if(cr[0] & CR0_TS) {
  1559. trigger_nm();
  1560. }
  1561. else {
  1562. trigger_ud();
  1563. }
  1564. }
  1565. fpu_set_tag_word(0xFFFF);
  1566. }
  1567. void instr_0F78() { unimplemented_sse(); }
  1568. void instr_0F79() { unimplemented_sse(); }
  1569. void instr_0F7A() { unimplemented_sse(); }
  1570. void instr_0F7B() { unimplemented_sse(); }
  1571. void instr_0F7C() { unimplemented_sse(); }
  1572. void instr_0F7D() { unimplemented_sse(); }
  1573. int32_t instr_0F7E(int32_t r) {
  1574. // movd r/m32, mm
  1575. task_switch_test_mmx();
  1576. union reg64 data = read_mmx64s(r);
  1577. return data.u32[0];
  1578. }
  1579. DEFINE_SSE_SPLIT_WRITE(instr_0F7E, safe_write32, write_reg32)
  1580. int32_t instr_660F7E(int32_t r) {
  1581. // movd r/m32, xmm
  1582. task_switch_test_mmx();
  1583. union reg64 data = read_xmm64s(r);
  1584. return data.u32[0];
  1585. }
  1586. DEFINE_SSE_SPLIT_WRITE(instr_660F7E, safe_write32, write_reg32)
  1587. void instr_F30F7E_mem(int32_t addr, int32_t r) {
  1588. // movq xmm, xmm/mem64
  1589. task_switch_test_mmx();
  1590. union reg64 data = safe_read64s(addr);
  1591. write_xmm128(r, data.u32[0], data.u32[1], 0, 0);
  1592. }
  1593. void instr_F30F7E_reg(int32_t r1, int32_t r2) {
  1594. // movq xmm, xmm/mem64
  1595. task_switch_test_mmx();
  1596. union reg64 data = read_xmm64s(r1);
  1597. write_xmm128(r2, data.u32[0], data.u32[1], 0, 0);
  1598. }
  1599. void instr_0F7F_mem(int32_t addr, int32_t r) {
  1600. // movq mm/m64, mm
  1601. mov_r_m64(addr, r);
  1602. }
  1603. void instr_0F7F_reg(int32_t r1, int32_t r2) {
  1604. // movq mm/m64, mm
  1605. task_switch_test_mmx();
  1606. union reg64 data = read_mmx64s(r2);
  1607. write_mmx64(r1, data.u32[0], data.u32[1]);
  1608. }
  1609. void instr_660F7F_mem(int32_t addr, int32_t r) {
  1610. // movdqa xmm/m128, xmm
  1611. // XXX: Aligned write or #gp
  1612. mov_r_m128(addr, r);
  1613. }
  1614. void instr_660F7F_reg(int32_t r1, int32_t r2) {
  1615. // movdqa xmm/m128, xmm
  1616. // XXX: Aligned access or #gp
  1617. mov_r_r128(r1, r2);
  1618. }
  1619. void instr_F30F7F_mem(int32_t addr, int32_t r) {
  1620. // movdqu xmm/m128, xmm
  1621. mov_r_m128(addr, r);
  1622. }
  1623. void instr_F30F7F_reg(int32_t r1, int32_t r2) {
  1624. // movdqu xmm/m128, xmm
  1625. mov_r_r128(r1, r2);
  1626. }
  1627. // jmpcc
  1628. void instr16_0F80(int32_t imm) { jmpcc16( test_o(), imm); }
  1629. void instr32_0F80(int32_t imm) { jmpcc32( test_o(), imm); }
  1630. void instr16_0F81(int32_t imm) { jmpcc16(!test_o(), imm); }
  1631. void instr32_0F81(int32_t imm) { jmpcc32(!test_o(), imm); }
  1632. void instr16_0F82(int32_t imm) { jmpcc16( test_b(), imm); }
  1633. void instr32_0F82(int32_t imm) { jmpcc32( test_b(), imm); }
  1634. void instr16_0F83(int32_t imm) { jmpcc16(!test_b(), imm); }
  1635. void instr32_0F83(int32_t imm) { jmpcc32(!test_b(), imm); }
  1636. void instr16_0F84(int32_t imm) { jmpcc16( test_z(), imm); }
  1637. void instr32_0F84(int32_t imm) { jmpcc32( test_z(), imm); }
  1638. void instr16_0F85(int32_t imm) { jmpcc16(!test_z(), imm); }
  1639. void instr32_0F85(int32_t imm) { jmpcc32(!test_z(), imm); }
  1640. void instr16_0F86(int32_t imm) { jmpcc16( test_be(), imm); }
  1641. void instr32_0F86(int32_t imm) { jmpcc32( test_be(), imm); }
  1642. void instr16_0F87(int32_t imm) { jmpcc16(!test_be(), imm); }
  1643. void instr32_0F87(int32_t imm) { jmpcc32(!test_be(), imm); }
  1644. void instr16_0F88(int32_t imm) { jmpcc16( test_s(), imm); }
  1645. void instr32_0F88(int32_t imm) { jmpcc32( test_s(), imm); }
  1646. void instr16_0F89(int32_t imm) { jmpcc16(!test_s(), imm); }
  1647. void instr32_0F89(int32_t imm) { jmpcc32(!test_s(), imm); }
  1648. void instr16_0F8A(int32_t imm) { jmpcc16( test_p(), imm); }
  1649. void instr32_0F8A(int32_t imm) { jmpcc32( test_p(), imm); }
  1650. void instr16_0F8B(int32_t imm) { jmpcc16(!test_p(), imm); }
  1651. void instr32_0F8B(int32_t imm) { jmpcc32(!test_p(), imm); }
  1652. void instr16_0F8C(int32_t imm) { jmpcc16( test_l(), imm); }
  1653. void instr32_0F8C(int32_t imm) { jmpcc32( test_l(), imm); }
  1654. void instr16_0F8D(int32_t imm) { jmpcc16(!test_l(), imm); }
  1655. void instr32_0F8D(int32_t imm) { jmpcc32(!test_l(), imm); }
  1656. void instr16_0F8E(int32_t imm) { jmpcc16( test_le(), imm); }
  1657. void instr32_0F8E(int32_t imm) { jmpcc32( test_le(), imm); }
  1658. void instr16_0F8F(int32_t imm) { jmpcc16(!test_le(), imm); }
  1659. void instr32_0F8F(int32_t imm) { jmpcc32(!test_le(), imm); }
  1660. // setcc
  1661. void instr_0F90_reg(int32_t r, int32_t unused) { setcc_reg( test_o(), r); }
  1662. void instr_0F91_reg(int32_t r, int32_t unused) { setcc_reg(!test_o(), r); }
  1663. void instr_0F92_reg(int32_t r, int32_t unused) { setcc_reg( test_b(), r); }
  1664. void instr_0F93_reg(int32_t r, int32_t unused) { setcc_reg(!test_b(), r); }
  1665. void instr_0F94_reg(int32_t r, int32_t unused) { setcc_reg( test_z(), r); }
  1666. void instr_0F95_reg(int32_t r, int32_t unused) { setcc_reg(!test_z(), r); }
  1667. void instr_0F96_reg(int32_t r, int32_t unused) { setcc_reg( test_be(), r); }
  1668. void instr_0F97_reg(int32_t r, int32_t unused) { setcc_reg(!test_be(), r); }
  1669. void instr_0F98_reg(int32_t r, int32_t unused) { setcc_reg( test_s(), r); }
  1670. void instr_0F99_reg(int32_t r, int32_t unused) { setcc_reg(!test_s(), r); }
  1671. void instr_0F9A_reg(int32_t r, int32_t unused) { setcc_reg( test_p(), r); }
  1672. void instr_0F9B_reg(int32_t r, int32_t unused) { setcc_reg(!test_p(), r); }
  1673. void instr_0F9C_reg(int32_t r, int32_t unused) { setcc_reg( test_l(), r); }
  1674. void instr_0F9D_reg(int32_t r, int32_t unused) { setcc_reg(!test_l(), r); }
  1675. void instr_0F9E_reg(int32_t r, int32_t unused) { setcc_reg( test_le(), r); }
  1676. void instr_0F9F_reg(int32_t r, int32_t unused) { setcc_reg(!test_le(), r); }
  1677. void instr_0F90_mem(int32_t addr, int32_t unused) { setcc_mem( test_o(), addr); }
  1678. void instr_0F91_mem(int32_t addr, int32_t unused) { setcc_mem(!test_o(), addr); }
  1679. void instr_0F92_mem(int32_t addr, int32_t unused) { setcc_mem( test_b(), addr); }
  1680. void instr_0F93_mem(int32_t addr, int32_t unused) { setcc_mem(!test_b(), addr); }
  1681. void instr_0F94_mem(int32_t addr, int32_t unused) { setcc_mem( test_z(), addr); }
  1682. void instr_0F95_mem(int32_t addr, int32_t unused) { setcc_mem(!test_z(), addr); }
  1683. void instr_0F96_mem(int32_t addr, int32_t unused) { setcc_mem( test_be(), addr); }
  1684. void instr_0F97_mem(int32_t addr, int32_t unused) { setcc_mem(!test_be(), addr); }
  1685. void instr_0F98_mem(int32_t addr, int32_t unused) { setcc_mem( test_s(), addr); }
  1686. void instr_0F99_mem(int32_t addr, int32_t unused) { setcc_mem(!test_s(), addr); }
  1687. void instr_0F9A_mem(int32_t addr, int32_t unused) { setcc_mem( test_p(), addr); }
  1688. void instr_0F9B_mem(int32_t addr, int32_t unused) { setcc_mem(!test_p(), addr); }
  1689. void instr_0F9C_mem(int32_t addr, int32_t unused) { setcc_mem( test_l(), addr); }
  1690. void instr_0F9D_mem(int32_t addr, int32_t unused) { setcc_mem(!test_l(), addr); }
  1691. void instr_0F9E_mem(int32_t addr, int32_t unused) { setcc_mem( test_le(), addr); }
  1692. void instr_0F9F_mem(int32_t addr, int32_t unused) { setcc_mem(!test_le(), addr); }
  1693. void instr16_0FA0() { push16(sreg[FS]); }
  1694. void instr32_0FA0() { push32(sreg[FS]); }
  1695. void instr16_0FA1() {
  1696. switch_seg(FS, safe_read16(get_stack_pointer(0)));
  1697. adjust_stack_reg(2);
  1698. }
  1699. void instr32_0FA1() {
  1700. switch_seg(FS, safe_read32s(get_stack_pointer(0)) & 0xFFFF);
  1701. adjust_stack_reg(4);
  1702. }
  1703. void instr_0FA2() { cpuid(); }
  1704. void instr16_0FA3_reg(int32_t r1, int32_t r2) { bt_reg(read_reg16(r1), read_reg16(r2) & 15); }
  1705. void instr16_0FA3_mem(int32_t addr, int32_t r) { bt_mem(addr, read_reg16(r) << 16 >> 16); }
  1706. void instr32_0FA3_reg(int32_t r1, int32_t r2) { bt_reg(read_reg32(r1), read_reg32(r2) & 31); }
  1707. void instr32_0FA3_mem(int32_t addr, int32_t r) { bt_mem(addr, read_reg32(r)); }
  1708. DEFINE_MODRM_INSTR_IMM_READ_WRITE_16(instr16_0FA4, shld16(___, read_reg16(r), imm & 31))
  1709. DEFINE_MODRM_INSTR_IMM_READ_WRITE_32(instr32_0FA4, shld32(___, read_reg32(r), imm & 31))
  1710. DEFINE_MODRM_INSTR_READ_WRITE_16(instr16_0FA5, shld16(___, read_reg16(r), reg8[CL] & 31))
  1711. DEFINE_MODRM_INSTR_READ_WRITE_32(instr32_0FA5, shld32(___, read_reg32(r), reg8[CL] & 31))
  1712. void instr_0FA6() {
  1713. // obsolete cmpxchg (os/2)
  1714. trigger_ud();
  1715. }
  1716. void instr_0FA7() { undefined_instruction(); }
  1717. void instr16_0FA8() { push16(sreg[GS]); }
  1718. void instr32_0FA8() { push32(sreg[GS]); }
  1719. void instr16_0FA9() {
  1720. switch_seg(GS, safe_read16(get_stack_pointer(0)));
  1721. adjust_stack_reg(2);
  1722. }
  1723. void instr32_0FA9() {
  1724. switch_seg(GS, safe_read32s(get_stack_pointer(0)) & 0xFFFF);
  1725. adjust_stack_reg(4);
  1726. }
  1727. void instr_0FAA() {
  1728. // rsm
  1729. todo();
  1730. }
  1731. void instr16_0FAB_reg(int32_t r1, int32_t r2) { write_reg16(r1, bts_reg(read_reg16(r1), read_reg16(r2) & 15)); }
  1732. void instr16_0FAB_mem(int32_t addr, int32_t r) { bts_mem(addr, read_reg16(r) << 16 >> 16); }
  1733. void instr32_0FAB_reg(int32_t r1, int32_t r2) { write_reg32(r1, bts_reg(read_reg32(r1), read_reg32(r2) & 31)); }
  1734. void instr32_0FAB_mem(int32_t addr, int32_t r) { bts_mem(addr, read_reg32(r)); }
  1735. DEFINE_MODRM_INSTR_IMM_READ_WRITE_16(instr16_0FAC, shrd16(___, read_reg16(r), imm & 31))
  1736. DEFINE_MODRM_INSTR_IMM_READ_WRITE_32(instr32_0FAC, shrd32(___, read_reg32(r), imm & 31))
  1737. DEFINE_MODRM_INSTR_READ_WRITE_16(instr16_0FAD, shrd16(___, read_reg16(r), reg8[CL] & 31))
  1738. DEFINE_MODRM_INSTR_READ_WRITE_32(instr32_0FAD, shrd32(___, read_reg32(r), reg8[CL] & 31))
  1739. void instr_0FAE_0_reg(int32_t r) { trigger_ud(); }
  1740. void instr_0FAE_0_mem(int32_t addr) {
  1741. fxsave(addr);
  1742. }
  1743. void instr_0FAE_1_reg(int32_t r) { trigger_ud(); }
  1744. void instr_0FAE_1_mem(int32_t addr) {
  1745. fxrstor(addr);
  1746. }
  1747. void instr_0FAE_2_reg(int32_t r) { trigger_ud(); }
  1748. void instr_0FAE_2_mem(int32_t addr) {
  1749. // ldmxcsr
  1750. int32_t new_mxcsr = safe_read32s(addr);
  1751. if(new_mxcsr & ~MXCSR_MASK)
  1752. {
  1753. dbg_log("Invalid mxcsr bits: %x", (new_mxcsr & ~MXCSR_MASK));
  1754. assert(false);
  1755. trigger_gp(0);
  1756. }
  1757. *mxcsr = new_mxcsr;
  1758. }
  1759. void instr_0FAE_3_reg(int32_t r) { trigger_ud(); }
  1760. void instr_0FAE_3_mem(int32_t addr) {
  1761. // stmxcsr
  1762. safe_write32(addr, *mxcsr);
  1763. }
  1764. void instr_0FAE_4_reg(int32_t r) { trigger_ud(); }
  1765. void instr_0FAE_4_mem(int32_t addr) {
  1766. // xsave
  1767. todo();
  1768. }
  1769. void instr_0FAE_5_reg(int32_t r) {
  1770. // lfence
  1771. dbg_assert_message(r == 0, "Unexpected lfence encoding");
  1772. }
  1773. void instr_0FAE_5_mem(int32_t addr) {
  1774. // xrstor
  1775. todo();
  1776. }
  1777. void instr_0FAE_6_reg(int32_t r) {
  1778. // mfence
  1779. dbg_assert_message(r == 0, "Unexpected mfence encoding");
  1780. }
  1781. void instr_0FAE_6_mem(int32_t addr) {
  1782. dbg_assert_message(false, "0fae/5 #ud");
  1783. trigger_ud();
  1784. }
  1785. void instr_0FAE_7_reg(int32_t r) {
  1786. // sfence
  1787. dbg_assert_message(r == 0, "Unexpected sfence encoding");
  1788. }
  1789. void instr_0FAE_7_mem(int32_t addr) {
  1790. // clflush
  1791. todo();
  1792. }
  1793. DEFINE_MODRM_INSTR_READ16(instr16_0FAF, write_reg16(r, imul_reg16(read_reg16(r) << 16 >> 16, ___ << 16 >> 16)))
  1794. DEFINE_MODRM_INSTR_READ32(instr32_0FAF, write_reg32(r, imul_reg32(read_reg32(r), ___)))
  1795. void instr_0FB0_reg(int32_t r1, int32_t r2) {
  1796. // cmpxchg8
  1797. int32_t data = read_reg8(r1);
  1798. cmp8(reg8[AL], data);
  1799. if(getzf())
  1800. {
  1801. write_reg8(r1, read_reg8(r2));
  1802. }
  1803. else
  1804. {
  1805. reg8[AL] = data;
  1806. }
  1807. }
  1808. void instr_0FB0_mem(int32_t addr, int32_t r) {
  1809. // cmpxchg8
  1810. writable_or_pagefault(addr, 1);
  1811. int32_t data = safe_read8(addr);
  1812. cmp8(reg8[AL], data);
  1813. if(getzf())
  1814. {
  1815. safe_write8(addr, read_reg8(r));
  1816. }
  1817. else
  1818. {
  1819. safe_write8(addr, data);
  1820. reg8[AL] = data;
  1821. }
  1822. }
  1823. void instr16_0FB1_reg(int32_t r1, int32_t r2) {
  1824. // cmpxchg16
  1825. int32_t data = read_reg16(r1);
  1826. cmp16(reg16[AX], data);
  1827. if(getzf())
  1828. {
  1829. write_reg16(r1, read_reg16(r2));
  1830. }
  1831. else
  1832. {
  1833. reg16[AX] = data;
  1834. }
  1835. }
  1836. void instr16_0FB1_mem(int32_t addr, int32_t r) {
  1837. // cmpxchg16
  1838. writable_or_pagefault(addr, 2);
  1839. int32_t data = safe_read16(addr);
  1840. cmp16(reg16[AX], data);
  1841. if(getzf())
  1842. {
  1843. safe_write16(addr, read_reg16(r));
  1844. }
  1845. else
  1846. {
  1847. safe_write16(addr, data);
  1848. reg16[AX] = data;
  1849. }
  1850. }
  1851. void instr32_0FB1_reg(int32_t r1, int32_t r2) {
  1852. // cmpxchg32
  1853. int32_t data = read_reg32(r1);
  1854. cmp32(reg32s[EAX], data);
  1855. if(getzf())
  1856. {
  1857. write_reg32(r1, read_reg32(r2));
  1858. }
  1859. else
  1860. {
  1861. reg32s[EAX] = data;
  1862. }
  1863. }
  1864. void instr32_0FB1_mem(int32_t addr, int32_t r) {
  1865. // cmpxchg32
  1866. writable_or_pagefault(addr, 4);
  1867. int32_t data = safe_read32s(addr);
  1868. cmp32(reg32s[EAX], data);
  1869. if(getzf())
  1870. {
  1871. safe_write32(addr, read_reg32(r));
  1872. }
  1873. else
  1874. {
  1875. safe_write32(addr, data);
  1876. reg32s[EAX] = data;
  1877. }
  1878. }
  1879. // lss
  1880. void instr16_0FB2_reg(int32_t unused, int32_t unused2) { trigger_ud(); }
  1881. void instr16_0FB2_mem(int32_t addr, int32_t r) {
  1882. lss16(addr, get_reg16_index(r), SS);
  1883. }
  1884. void instr32_0FB2_reg(int32_t unused, int32_t unused2) { trigger_ud(); }
  1885. void instr32_0FB2_mem(int32_t addr, int32_t r) {
  1886. lss32(addr, r, SS);
  1887. }
  1888. void instr16_0FB3_reg(int32_t r1, int32_t r2) { write_reg16(r1, btr_reg(read_reg16(r1), read_reg16(r2) & 15)); }
  1889. void instr16_0FB3_mem(int32_t addr, int32_t r) { btr_mem(addr, read_reg16(r) << 16 >> 16); }
  1890. void instr32_0FB3_reg(int32_t r1, int32_t r2) { write_reg32(r1, btr_reg(read_reg32(r1), read_reg32(r2) & 31)); }
  1891. void instr32_0FB3_mem(int32_t addr, int32_t r) { btr_mem(addr, read_reg32(r)); }
  1892. // lfs, lgs
  1893. void instr16_0FB4_reg(int32_t unused, int32_t unused2) { trigger_ud(); }
  1894. void instr16_0FB4_mem(int32_t addr, int32_t r) {
  1895. lss16(addr, get_reg16_index(r), FS);
  1896. }
  1897. void instr32_0FB4_reg(int32_t unused, int32_t unused2) { trigger_ud(); }
  1898. void instr32_0FB4_mem(int32_t addr, int32_t r) {
  1899. lss32(addr, r, FS);
  1900. }
  1901. void instr16_0FB5_reg(int32_t unused, int32_t unused2) { trigger_ud(); }
  1902. void instr16_0FB5_mem(int32_t addr, int32_t r) {
  1903. lss16(addr, get_reg16_index(r), GS);
  1904. }
  1905. void instr32_0FB5_reg(int32_t unused, int32_t unused2) { trigger_ud(); }
  1906. void instr32_0FB5_mem(int32_t addr, int32_t r) {
  1907. lss32(addr, r, GS);
  1908. }
  1909. // movzx
  1910. DEFINE_MODRM_INSTR_READ8(instr16_0FB6, write_reg16(r, ___))
  1911. DEFINE_MODRM_INSTR_READ8(instr32_0FB6, write_reg32(r, ___))
  1912. DEFINE_MODRM_INSTR_READ16(instr16_0FB7, write_reg16(r, ___))
  1913. DEFINE_MODRM_INSTR_READ16(instr32_0FB7, write_reg32(r, ___))
  1914. void instr16_0FB8_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  1915. void instr16_0FB8_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1916. DEFINE_MODRM_INSTR_READ16(instr16_F30FB8, write_reg16(r, popcnt(___)))
  1917. void instr32_0FB8_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  1918. void instr32_0FB8_mem(int32_t addr, int32_t r) { trigger_ud(); }
  1919. DEFINE_MODRM_INSTR_READ32(instr32_F30FB8, write_reg32(r, popcnt(___)))
  1920. void instr_0FB9() {
  1921. // UD2
  1922. trigger_ud();
  1923. }
  1924. void instr16_0FBA_4_reg(int32_t r, int32_t imm) {
  1925. bt_reg(read_reg16(r), imm & 15);
  1926. }
  1927. void instr16_0FBA_4_mem(int32_t addr, int32_t imm) {
  1928. bt_mem(addr, imm & 15);
  1929. }
  1930. void instr16_0FBA_5_reg(int32_t r, int32_t imm) {
  1931. write_reg16(r, bts_reg(read_reg16(r), imm & 15));
  1932. }
  1933. void instr16_0FBA_5_mem(int32_t addr, int32_t imm) {
  1934. bts_mem(addr, imm & 15);
  1935. }
  1936. void instr16_0FBA_6_reg(int32_t r, int32_t imm) {
  1937. write_reg16(r, btr_reg(read_reg16(r), imm & 15));
  1938. }
  1939. void instr16_0FBA_6_mem(int32_t addr, int32_t imm) {
  1940. btr_mem(addr, imm & 15);
  1941. }
  1942. void instr16_0FBA_7_reg(int32_t r, int32_t imm) {
  1943. write_reg16(r, btc_reg(read_reg16(r), imm & 15));
  1944. }
  1945. void instr16_0FBA_7_mem(int32_t addr, int32_t imm) {
  1946. btc_mem(addr, imm & 15);
  1947. }
  1948. void instr32_0FBA_4_reg(int32_t r, int32_t imm) {
  1949. bt_reg(read_reg32(r), imm & 31);
  1950. }
  1951. void instr32_0FBA_4_mem(int32_t addr, int32_t imm) {
  1952. bt_mem(addr, imm & 31);
  1953. }
  1954. void instr32_0FBA_5_reg(int32_t r, int32_t imm) {
  1955. write_reg32(r, bts_reg(read_reg32(r), imm & 31));
  1956. }
  1957. void instr32_0FBA_5_mem(int32_t addr, int32_t imm) {
  1958. bts_mem(addr, imm & 31);
  1959. }
  1960. void instr32_0FBA_6_reg(int32_t r, int32_t imm) {
  1961. write_reg32(r, btr_reg(read_reg32(r), imm & 31));
  1962. }
  1963. void instr32_0FBA_6_mem(int32_t addr, int32_t imm) {
  1964. btr_mem(addr, imm & 31);
  1965. }
  1966. void instr32_0FBA_7_reg(int32_t r, int32_t imm) {
  1967. write_reg32(r, btc_reg(read_reg32(r), imm & 31));
  1968. }
  1969. void instr32_0FBA_7_mem(int32_t addr, int32_t imm) {
  1970. btc_mem(addr, imm & 31);
  1971. }
  1972. void instr16_0FBB_reg(int32_t r1, int32_t r2) { write_reg16(r1, btc_reg(read_reg16(r1), read_reg16(r2) & 15)); }
  1973. void instr16_0FBB_mem(int32_t addr, int32_t r) { btc_mem(addr, read_reg16(r) << 16 >> 16); }
  1974. void instr32_0FBB_reg(int32_t r1, int32_t r2) { write_reg32(r1, btc_reg(read_reg32(r1), read_reg32(r2) & 31)); }
  1975. void instr32_0FBB_mem(int32_t addr, int32_t r) { btc_mem(addr, read_reg32(r)); }
  1976. DEFINE_MODRM_INSTR_READ16(instr16_0FBC, write_reg16(r, bsf16(read_reg16(r), ___)))
  1977. DEFINE_MODRM_INSTR_READ32(instr32_0FBC, write_reg32(r, bsf32(read_reg32(r), ___)))
  1978. DEFINE_MODRM_INSTR_READ16(instr16_0FBD, write_reg16(r, bsr16(read_reg16(r), ___)))
  1979. DEFINE_MODRM_INSTR_READ32(instr32_0FBD, write_reg32(r, bsr32(read_reg32(r), ___)))
  1980. // movsx
  1981. DEFINE_MODRM_INSTR_READ8(instr16_0FBE, write_reg16(r, ___ << 24 >> 24))
  1982. DEFINE_MODRM_INSTR_READ8(instr32_0FBE, write_reg32(r, ___ << 24 >> 24))
  1983. DEFINE_MODRM_INSTR_READ16(instr16_0FBF, write_reg16(r, ___ << 16 >> 16))
  1984. DEFINE_MODRM_INSTR_READ16(instr32_0FBF, write_reg32(r, ___ << 16 >> 16))
  1985. DEFINE_MODRM_INSTR_READ_WRITE_8(instr_0FC0, xadd8(___, get_reg8_index(r)))
  1986. DEFINE_MODRM_INSTR_READ_WRITE_16(instr16_0FC1, xadd16(___, get_reg16_index(r)))
  1987. DEFINE_MODRM_INSTR_READ_WRITE_32(instr32_0FC1, xadd32(___, r))
  1988. void instr_0FC2() { unimplemented_sse(); }
  1989. void instr_0FC3_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  1990. void instr_0FC3_mem(int32_t addr, int32_t r) {
  1991. // movnti
  1992. safe_write32(addr, read_reg32(r));
  1993. }
  1994. void instr_0FC4(int32_t source, int32_t r, int32_t imm8) {
  1995. // pinsrw mm, r32/m16, imm8
  1996. task_switch_test_mmx();
  1997. union reg64 destination = read_mmx64s(r);
  1998. uint32_t index = imm8 & 3;
  1999. destination.u16[index] = source & 0xffff;
  2000. write_mmx_reg64(r, destination);
  2001. }
  2002. DEFINE_SSE_SPLIT_IMM(instr_0FC4, read16, read_reg32)
  2003. void instr_660FC4(int32_t source, int32_t r, int32_t imm8) {
  2004. // pinsrw xmm, r32/m16, imm8
  2005. task_switch_test_mmx();
  2006. union reg128 destination = read_xmm128s(r);
  2007. uint32_t index = imm8 & 7;
  2008. destination.u16[index] = source & 0xffff;
  2009. write_xmm_reg128(r, destination);
  2010. }
  2011. DEFINE_SSE_SPLIT_IMM(instr_660FC4, read16, read_reg32)
  2012. void instr_0FC5_mem(int32_t addr, int32_t r, int32_t imm8) { trigger_ud(); }
  2013. void instr_0FC5_reg(int32_t r1, int32_t r2, int32_t imm8) {
  2014. // pextrw r32, mm, imm8
  2015. task_switch_test_mmx();
  2016. union reg64 data = read_mmx64s(r1);
  2017. uint32_t index = imm8 & 3;
  2018. uint32_t result = data.u16[index];
  2019. write_reg32(r2, result);
  2020. }
  2021. void instr_660FC5_mem(int32_t addr, int32_t r, int32_t imm8) { trigger_ud(); }
  2022. void instr_660FC5_reg(int32_t r1, int32_t r2, int32_t imm8) {
  2023. // pextrw r32, xmm, imm8
  2024. task_switch_test_mmx();
  2025. union reg128 data = read_xmm128s(r1);
  2026. uint32_t index = imm8 & 7;
  2027. uint32_t result = data.u16[index];
  2028. write_reg32(r2, result);
  2029. }
  2030. void instr_0FC6() { unimplemented_sse(); }
  2031. void instr_0FC7_1_reg(int32_t r) { trigger_ud(); }
  2032. void instr_0FC7_1_mem(int32_t addr) {
  2033. // cmpxchg8b
  2034. writable_or_pagefault(addr, 8);
  2035. int32_t m64_low = safe_read32s(addr);
  2036. int32_t m64_high = safe_read32s(addr + 4);
  2037. if(reg32s[EAX] == m64_low &&
  2038. reg32s[EDX] == m64_high)
  2039. {
  2040. flags[0] |= FLAG_ZERO;
  2041. safe_write32(addr, reg32s[EBX]);
  2042. safe_write32(addr + 4, reg32s[ECX]);
  2043. }
  2044. else
  2045. {
  2046. flags[0] &= ~FLAG_ZERO;
  2047. reg32s[EAX] = m64_low;
  2048. reg32s[EDX] = m64_high;
  2049. safe_write32(addr, m64_low);
  2050. safe_write32(addr + 4, m64_high);
  2051. }
  2052. flags_changed[0] &= ~FLAG_ZERO;
  2053. }
  2054. void instr_0FC7_6_reg(int32_t r) {
  2055. // rdrand
  2056. int32_t has_rand = has_rand_int();
  2057. int32_t rand = 0;
  2058. if(has_rand)
  2059. {
  2060. rand = get_rand_int();
  2061. }
  2062. write_reg_osize(r, rand);
  2063. flags[0] &= ~FLAGS_ALL;
  2064. flags[0] |= has_rand;
  2065. flags_changed[0] = 0;
  2066. }
  2067. void instr_0FC7_6_mem(int32_t addr) {
  2068. todo();
  2069. trigger_ud();
  2070. }
  2071. void instr_0FC8() { bswap(EAX); }
  2072. void instr_0FC9() { bswap(ECX); }
  2073. void instr_0FCA() { bswap(EDX); }
  2074. void instr_0FCB() { bswap(EBX); }
  2075. void instr_0FCC() { bswap(ESP); }
  2076. void instr_0FCD() { bswap(EBP); }
  2077. void instr_0FCE() { bswap(ESI); }
  2078. void instr_0FCF() { bswap(EDI); }
  2079. void instr_0FD0() { unimplemented_sse(); }
  2080. void instr_0FD1(union reg64 source, int32_t r) {
  2081. // psrlw mm, mm/m64
  2082. psrlw_r64(r, source.u32[0]);
  2083. }
  2084. DEFINE_SSE_SPLIT(instr_0FD1, safe_read64s, read_mmx64s)
  2085. void instr_660FD1(union reg128 source, int32_t r) {
  2086. // psrlw xmm, xmm/m128
  2087. // XXX: Aligned access or #gp
  2088. psrlw_r128(r, source.u32[0]);
  2089. }
  2090. DEFINE_SSE_SPLIT(instr_660FD1, safe_read128s, read_xmm128s)
  2091. void instr_0FD2(union reg64 source, int32_t r) {
  2092. // psrld mm, mm/m64
  2093. psrld_r64(r, source.u32[0]);
  2094. }
  2095. DEFINE_SSE_SPLIT(instr_0FD2, safe_read64s, read_mmx64s)
  2096. void instr_660FD2(union reg128 source, int32_t r) {
  2097. // psrld xmm, xmm/m128
  2098. // XXX: Aligned access or #gp
  2099. psrld_r128(r, source.u32[0]);
  2100. }
  2101. DEFINE_SSE_SPLIT(instr_660FD2, safe_read128s, read_xmm128s)
  2102. void instr_0FD3(union reg64 source, int32_t r) {
  2103. // psrlq mm, mm/m64
  2104. psrlq_r64(r, source.u32[0]);
  2105. }
  2106. DEFINE_SSE_SPLIT(instr_0FD3, safe_read64s, read_mmx64s)
  2107. void instr_660FD3(union reg128 source, int32_t r) {
  2108. // psrlq xmm, mm/m64
  2109. psrlq_r128(r, source.u32[0]);
  2110. }
  2111. DEFINE_SSE_SPLIT(instr_660FD3, safe_read128s, read_xmm128s)
  2112. void instr_0FD4(union reg64 source, int32_t r) {
  2113. // paddq mm, mm/m64
  2114. task_switch_test_mmx();
  2115. union reg64 destination = read_mmx64s(r);
  2116. destination.u64[0] += source.u64[0];
  2117. write_mmx_reg64(r, destination);
  2118. }
  2119. DEFINE_SSE_SPLIT(instr_0FD4, safe_read64s, read_mmx64s)
  2120. void instr_660FD4(union reg128 source, int32_t r) {
  2121. // paddq xmm, xmm/m128
  2122. // XXX: Aligned access or #gp
  2123. task_switch_test_mmx();
  2124. union reg128 destination = read_xmm128s(r);
  2125. destination.u64[0] += source.u64[0];
  2126. destination.u64[1] += source.u64[1];
  2127. write_xmm_reg128(r, destination);
  2128. }
  2129. DEFINE_SSE_SPLIT(instr_660FD4, safe_read128s, read_xmm128s)
  2130. void instr_0FD5(union reg64 source, int32_t r) {
  2131. // pmullw mm, mm/m64
  2132. task_switch_test_mmx();
  2133. union reg64 destination = read_mmx64s(r);
  2134. int32_t word0 = (destination.u16[0] * source.u16[0]) & 0xFFFF;
  2135. int32_t word1 = (destination.u16[1] * source.u16[1]) & 0xFFFF;
  2136. int32_t word2 = (destination.u16[2] * source.u16[2]) & 0xFFFF;
  2137. int32_t word3 = (destination.u16[3] * source.u16[3]) & 0xFFFF;
  2138. int32_t low = word0 | word1 << 16;
  2139. int32_t high = word2 | word3 << 16;
  2140. write_mmx64(r, low, high);
  2141. }
  2142. DEFINE_SSE_SPLIT(instr_0FD5, safe_read64s, read_mmx64s)
  2143. void instr_660FD5(union reg128 source, int32_t r) {
  2144. // pmullw xmm, xmm/m128
  2145. // XXX: Aligned access or #gp
  2146. task_switch_test_mmx();
  2147. union reg128 destination = read_xmm128s(r);
  2148. write_xmm128(
  2149. r,
  2150. source.u16[0] * destination.u16[0] & 0xFFFF | source.u16[1] * destination.u16[1] << 16,
  2151. source.u16[2] * destination.u16[2] & 0xFFFF | source.u16[3] * destination.u16[3] << 16,
  2152. source.u16[4] * destination.u16[4] & 0xFFFF | source.u16[5] * destination.u16[5] << 16,
  2153. source.u16[6] * destination.u16[6] & 0xFFFF | source.u16[7] * destination.u16[7] << 16
  2154. );
  2155. }
  2156. DEFINE_SSE_SPLIT(instr_660FD5, safe_read128s, read_xmm128s)
  2157. void instr_0FD6_mem(int32_t addr, int32_t r) { trigger_ud(); }
  2158. void instr_0FD6_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  2159. void instr_660FD6_mem(int32_t addr, int32_t r) {
  2160. // movq xmm/m64, xmm
  2161. movl_r128_m64(addr, r);
  2162. }
  2163. void instr_660FD6_reg(int32_t r1, int32_t r2) {
  2164. // movq xmm/m64, xmm
  2165. task_switch_test_mmx();
  2166. union reg64 data = read_xmm64s(r2);
  2167. write_xmm128(r1, data.u32[0], data.u32[1], 0, 0);
  2168. }
  2169. void instr_F20FD6_mem(int32_t addr, int32_t r) { trigger_ud(); }
  2170. void instr_F20FD6_reg(int32_t r1, int32_t r2) {
  2171. // movdq2q mm, xmm
  2172. task_switch_test_mmx();
  2173. union reg128 source = read_xmm128s(r1);
  2174. write_mmx64(r2, source.u32[0], source.u32[1]);
  2175. }
  2176. void instr_F30FD6_mem(int32_t addr, int32_t r) { trigger_ud(); }
  2177. void instr_F30FD6_reg(int32_t r1, int32_t r2) {
  2178. // movq2dq xmm, mm
  2179. task_switch_test_mmx();
  2180. union reg64 source = read_mmx64s(r1);
  2181. write_xmm128(r2, source.u32[0], source.u32[1], 0, 0);
  2182. }
  2183. void instr_0FD7_mem(int32_t addr, int32_t r) { trigger_ud(); }
  2184. void instr_0FD7_reg(int32_t r1, int32_t r2) {
  2185. // pmovmskb r, mm
  2186. task_switch_test_mmx();
  2187. union reg64 x = read_mmx64s(r1);
  2188. uint32_t result =
  2189. x.u8[0] >> 7 << 0 | x.u8[1] >> 7 << 1 | x.u8[2] >> 7 << 2 | x.u8[3] >> 7 << 3 |
  2190. x.u8[4] >> 7 << 4 | x.u8[5] >> 7 << 5 | x.u8[6] >> 7 << 6 | x.u8[7] >> 7 << 7;
  2191. write_reg32(r2, result);
  2192. }
  2193. void instr_660FD7_mem(int32_t addr, int32_t r) { trigger_ud(); }
  2194. void instr_660FD7_reg(int32_t r1, int32_t r2) {
  2195. // pmovmskb reg, xmm
  2196. task_switch_test_mmx();
  2197. union reg128 x = read_xmm128s(r1);
  2198. int32_t result =
  2199. x.u8[0] >> 7 << 0 | x.u8[1] >> 7 << 1 | x.u8[2] >> 7 << 2 | x.u8[3] >> 7 << 3 |
  2200. x.u8[4] >> 7 << 4 | x.u8[5] >> 7 << 5 | x.u8[6] >> 7 << 6 | x.u8[7] >> 7 << 7 |
  2201. x.u8[8] >> 7 << 8 | x.u8[9] >> 7 << 9 | x.u8[10] >> 7 << 10 | x.u8[11] >> 7 << 11 |
  2202. x.u8[12] >> 7 << 12 | x.u8[13] >> 7 << 13 | x.u8[14] >> 7 << 14 | x.u8[15] >> 7 << 15;
  2203. write_reg32(r2, result);
  2204. }
  2205. void instr_0FD8(union reg64 source, int32_t r) {
  2206. // psubusb mm, mm/m64
  2207. task_switch_test_mmx();
  2208. union reg64 destination = read_mmx64s(r);
  2209. union reg64 result = { { 0 } };
  2210. for(uint32_t i = 0; i < 8; i++)
  2211. {
  2212. result.u8[i] = saturate_sd_to_ub(destination.u8[i] - source.u8[i]);
  2213. }
  2214. write_mmx_reg64(r, result);
  2215. }
  2216. DEFINE_SSE_SPLIT(instr_0FD8, safe_read64s, read_mmx64s)
  2217. void instr_660FD8(union reg128 source, int32_t r) {
  2218. // psubusb xmm, xmm/m128
  2219. task_switch_test_mmx();
  2220. union reg128 destination = read_xmm128s(r);
  2221. union reg128 result;
  2222. for(uint32_t i = 0; i < 16; i++)
  2223. {
  2224. result.u8[i] = saturate_sd_to_ub(destination.u8[i] - source.u8[i]);
  2225. }
  2226. write_xmm_reg128(r, result);
  2227. }
  2228. DEFINE_SSE_SPLIT(instr_660FD8, safe_read128s, read_xmm128s)
  2229. void instr_0FD9(union reg64 source, int32_t r) {
  2230. // psubusw mm, mm/m64
  2231. task_switch_test_mmx();
  2232. union reg64 destination = read_mmx64s(r);
  2233. int32_t word0 = saturate_uw(destination.u16[0] - source.u16[0]);
  2234. int32_t word1 = saturate_uw(destination.u16[1] - source.u16[1]);
  2235. int32_t word2 = saturate_uw(destination.u16[2] - source.u16[2]);
  2236. int32_t word3 = saturate_uw(destination.u16[3] - source.u16[3]);
  2237. int32_t low = word0 | word1 << 16;
  2238. int32_t high = word2 | word3 << 16;
  2239. write_mmx64(r, low, high);
  2240. }
  2241. DEFINE_SSE_SPLIT(instr_0FD9, safe_read64s, read_mmx64s)
  2242. void instr_660FD9(union reg128 source, int32_t r) {
  2243. // psubusw xmm, xmm/m128
  2244. task_switch_test_mmx();
  2245. union reg128 destination = read_xmm128s(r);
  2246. union reg128 result;
  2247. for(uint32_t i = 0; i < 8; i++)
  2248. {
  2249. result.u16[i] = saturate_uw(destination.u16[i] - source.u16[i]);
  2250. }
  2251. write_xmm_reg128(r, result);
  2252. }
  2253. DEFINE_SSE_SPLIT(instr_660FD9, safe_read128s, read_xmm128s)
  2254. void instr_0FDA(union reg64 source, int32_t r) {
  2255. // pminub mm, mm/m64
  2256. task_switch_test_mmx();
  2257. union reg64 destination = read_mmx64s(r);
  2258. union reg64 result;
  2259. for(uint32_t i = 0; i < 8; i++)
  2260. {
  2261. result.u8[i] = source.u8[i] < destination.u8[i] ? source.u8[i] : destination.u8[i];
  2262. }
  2263. write_mmx_reg64(r, result);
  2264. }
  2265. DEFINE_SSE_SPLIT(instr_0FDA, safe_read64s, read_mmx64s)
  2266. void instr_660FDA(union reg128 source, int32_t r) {
  2267. // pminub xmm, xmm/m128
  2268. // XXX: Aligned access or #gp
  2269. task_switch_test_mmx();
  2270. union reg128 destination = read_xmm128s(r);
  2271. union reg128 result;
  2272. for(uint32_t i = 0; i < 16; i++)
  2273. {
  2274. result.u8[i] = source.u8[i] < destination.u8[i] ? source.u8[i] : destination.u8[i];
  2275. }
  2276. write_xmm_reg128(r, result);
  2277. }
  2278. DEFINE_SSE_SPLIT(instr_660FDA, safe_read128s, read_xmm128s)
  2279. void instr_0FDB(union reg64 source, int32_t r) {
  2280. // pand mm, mm/m64
  2281. task_switch_test_mmx();
  2282. union reg64 destination = read_mmx64s(r);
  2283. union reg64 result = { { 0 } };
  2284. result.u64[0] = source.u64[0] & destination.u64[0];
  2285. write_mmx_reg64(r, result);
  2286. }
  2287. DEFINE_SSE_SPLIT(instr_0FDB, safe_read64s, read_mmx64s)
  2288. void instr_660FDB(union reg128 source, int32_t r) {
  2289. // pand xmm, xmm/m128
  2290. // XXX: Aligned access or #gp
  2291. pand_r128(source, r);
  2292. }
  2293. DEFINE_SSE_SPLIT(instr_660FDB, safe_read128s, read_xmm128s)
  2294. void instr_0FDC(union reg64 source, int32_t r) {
  2295. // paddusb mm, mm/m64
  2296. task_switch_test_mmx();
  2297. union reg64 destination = read_mmx64s(r);
  2298. union reg64 result = { { 0 } };
  2299. for(uint32_t i = 0; i < 8; i++)
  2300. {
  2301. result.u8[i] = saturate_ud_to_ub(destination.u8[i] + source.u8[i]);
  2302. }
  2303. write_mmx_reg64(r, result);
  2304. }
  2305. DEFINE_SSE_SPLIT(instr_0FDC, safe_read64s, read_mmx64s)
  2306. void instr_660FDC(union reg128 source, int32_t r) {
  2307. // paddusb xmm, xmm/m128
  2308. // XXX: Aligned access or #gp
  2309. task_switch_test_mmx();
  2310. union reg128 destination = read_xmm128s(r);
  2311. union reg128 result;
  2312. for(uint32_t i = 0; i < 16; i++)
  2313. {
  2314. result.u8[i] = saturate_ud_to_ub(source.u8[i] + destination.u8[i]);
  2315. }
  2316. write_xmm_reg128(r, result);
  2317. }
  2318. DEFINE_SSE_SPLIT(instr_660FDC, safe_read128s, read_xmm128s)
  2319. void instr_0FDD(union reg64 source, int32_t r) {
  2320. // paddusw mm, mm/m64
  2321. task_switch_test_mmx();
  2322. union reg64 destination = read_mmx64s(r);
  2323. int32_t word0 = saturate_uw(destination.u16[0] + source.u16[0]);
  2324. int32_t word1 = saturate_uw(destination.u16[1] + source.u16[1]);
  2325. int32_t word2 = saturate_uw(destination.u16[2] + source.u16[2]);
  2326. int32_t word3 = saturate_uw(destination.u16[3] + source.u16[3]);
  2327. int32_t low = word0 | word1 << 16;
  2328. int32_t high = word2 | word3 << 16;
  2329. write_mmx64(r, low, high);
  2330. }
  2331. DEFINE_SSE_SPLIT(instr_0FDD, safe_read64s, read_mmx64s)
  2332. void instr_660FDD(union reg128 source, int32_t r) {
  2333. // paddusw xmm, xmm/m128
  2334. // XXX: Aligned access or #gp
  2335. task_switch_test_mmx();
  2336. union reg128 destination = read_xmm128s(r);
  2337. write_xmm128(
  2338. r,
  2339. saturate_uw(source.u16[0] + destination.u16[0]) | saturate_uw(source.u16[1] + destination.u16[1]) << 16,
  2340. saturate_uw(source.u16[2] + destination.u16[2]) | saturate_uw(source.u16[3] + destination.u16[3]) << 16,
  2341. saturate_uw(source.u16[4] + destination.u16[4]) | saturate_uw(source.u16[5] + destination.u16[5]) << 16,
  2342. saturate_uw(source.u16[6] + destination.u16[6]) | saturate_uw(source.u16[7] + destination.u16[7]) << 16
  2343. );
  2344. }
  2345. DEFINE_SSE_SPLIT(instr_660FDD, safe_read128s, read_xmm128s)
  2346. void instr_0FDE(union reg64 source, int32_t r) {
  2347. // pmaxub mm, mm/m64
  2348. task_switch_test_mmx();
  2349. union reg64 destination = read_mmx64s(r);
  2350. union reg64 result;
  2351. for(uint32_t i = 0; i < 8; i++)
  2352. {
  2353. result.u8[i] = source.u8[i] > destination.u8[i] ? source.u8[i] : destination.u8[i];
  2354. }
  2355. write_mmx_reg64(r, result);
  2356. }
  2357. DEFINE_SSE_SPLIT(instr_0FDE, safe_read64s, read_mmx64s)
  2358. void instr_660FDE(union reg128 source, int32_t r) {
  2359. // pmaxub xmm, xmm/m128
  2360. // XXX: Aligned access or #gp
  2361. task_switch_test_mmx();
  2362. union reg128 destination = read_xmm128s(r);
  2363. union reg128 result;
  2364. for(uint32_t i = 0; i < 16; i++)
  2365. {
  2366. result.u8[i] = source.u8[i] > destination.u8[i] ? source.u8[i] : destination.u8[i];
  2367. }
  2368. write_xmm_reg128(r, result);
  2369. }
  2370. DEFINE_SSE_SPLIT(instr_660FDE, safe_read128s, read_xmm128s)
  2371. void instr_0FDF(union reg64 source, int32_t r) {
  2372. // pandn mm, mm/m64
  2373. task_switch_test_mmx();
  2374. union reg64 destination = read_mmx64s(r);
  2375. union reg64 result = { { 0 } };
  2376. result.u64[0] = source.u64[0] & ~destination.u64[0];
  2377. write_mmx_reg64(r, result);
  2378. }
  2379. DEFINE_SSE_SPLIT(instr_0FDF, safe_read64s, read_mmx64s)
  2380. void instr_660FDF(union reg128 source, int32_t r) {
  2381. // pandn xmm, xmm/m128
  2382. // XXX: Aligned access or #gp
  2383. pandn_r128(source, r);
  2384. }
  2385. DEFINE_SSE_SPLIT(instr_660FDF, safe_read128s, read_xmm128s)
  2386. void instr_0FE0(union reg64 source, int32_t r) {
  2387. // pavgb mm, mm/m64
  2388. task_switch_test_mmx();
  2389. union reg64 destination = read_mmx64s(r);
  2390. union reg64 result = { { 0 } };
  2391. for(uint32_t i = 0; i < 8; i++)
  2392. {
  2393. result.u8[i] = (destination.u8[i] + source.u8[i] + 1) >> 1;
  2394. }
  2395. write_mmx_reg64(r, result);
  2396. }
  2397. DEFINE_SSE_SPLIT(instr_0FE0, safe_read64s, read_mmx64s)
  2398. void instr_660FE0(union reg128 source, int32_t r) {
  2399. // pavgb xmm, xmm/m128
  2400. // XXX: Aligned access or #gp
  2401. task_switch_test_mmx();
  2402. union reg128 destination = read_xmm128s(r);
  2403. union reg128 result;
  2404. for(uint32_t i = 0; i < 16; i++)
  2405. {
  2406. result.u8[i] = (destination.u8[i] + source.u8[i] + 1) >> 1;
  2407. }
  2408. write_xmm_reg128(r, result);
  2409. }
  2410. DEFINE_SSE_SPLIT(instr_660FE0, safe_read128s, read_xmm128s)
  2411. void instr_0FE1(union reg64 source, int32_t r) {
  2412. // psraw mm, mm/m64
  2413. psraw_r64(r, source.u32[0]);
  2414. }
  2415. DEFINE_SSE_SPLIT(instr_0FE1, safe_read64s, read_mmx64s)
  2416. void instr_660FE1(union reg128 source, int32_t r) {
  2417. // psraw xmm, xmm/m128
  2418. // XXX: Aligned access or #gp
  2419. psraw_r128(r, source.u32[0]);
  2420. }
  2421. DEFINE_SSE_SPLIT(instr_660FE1, safe_read128s, read_xmm128s)
  2422. void instr_0FE2(union reg64 source, int32_t r) {
  2423. // psrad mm, mm/m64
  2424. psrad_r64(r, source.u32[0]);
  2425. }
  2426. DEFINE_SSE_SPLIT(instr_0FE2, safe_read64s, read_mmx64s)
  2427. void instr_660FE2(union reg128 source, int32_t r) {
  2428. // psrad xmm, xmm/m128
  2429. // XXX: Aligned access or #gp
  2430. psrad_r128(r, source.u32[0]);
  2431. }
  2432. DEFINE_SSE_SPLIT(instr_660FE2, safe_read128s, read_xmm128s)
  2433. void instr_0FE3(union reg64 source, int32_t r) {
  2434. // pavgw mm, mm/m64
  2435. task_switch_test_mmx();
  2436. union reg64 destination = read_mmx64s(r);
  2437. destination.u16[0] = (destination.u16[0] + source.u16[0] + 1) >> 1;
  2438. destination.u16[1] = (destination.u16[1] + source.u16[1] + 1) >> 1;
  2439. destination.u16[2] = (destination.u16[2] + source.u16[2] + 1) >> 1;
  2440. destination.u16[3] = (destination.u16[3] + source.u16[3] + 1) >> 1;
  2441. write_mmx_reg64(r, destination);
  2442. }
  2443. DEFINE_SSE_SPLIT(instr_0FE3, safe_read64s, read_mmx64s)
  2444. void instr_660FE3(union reg128 source, int32_t r) {
  2445. // pavgw xmm, xmm/m128
  2446. // XXX: Aligned access or #gp
  2447. task_switch_test_mmx();
  2448. union reg128 destination = read_xmm128s(r);
  2449. destination.u16[0] = (destination.u16[0] + source.u16[0] + 1) >> 1;
  2450. destination.u16[1] = (destination.u16[1] + source.u16[1] + 1) >> 1;
  2451. destination.u16[2] = (destination.u16[2] + source.u16[2] + 1) >> 1;
  2452. destination.u16[3] = (destination.u16[3] + source.u16[3] + 1) >> 1;
  2453. destination.u16[4] = (destination.u16[4] + source.u16[4] + 1) >> 1;
  2454. destination.u16[5] = (destination.u16[5] + source.u16[5] + 1) >> 1;
  2455. destination.u16[6] = (destination.u16[6] + source.u16[6] + 1) >> 1;
  2456. destination.u16[7] = (destination.u16[7] + source.u16[7] + 1) >> 1;
  2457. write_xmm_reg128(r, destination);
  2458. }
  2459. DEFINE_SSE_SPLIT(instr_660FE3, safe_read128s, read_xmm128s)
  2460. void instr_0FE4(union reg64 source, int32_t r) {
  2461. // pmulhuw mm, mm/m64
  2462. task_switch_test_mmx();
  2463. union reg64 destination = read_mmx64s(r);
  2464. write_mmx64(
  2465. r,
  2466. (source.u16[0] * destination.u16[0] >> 16) & 0xFFFF | source.u16[1] * destination.u16[1] & 0xFFFF0000,
  2467. (source.u16[2] * destination.u16[2] >> 16) & 0xFFFF | source.u16[3] * destination.u16[3] & 0xFFFF0000
  2468. );
  2469. }
  2470. DEFINE_SSE_SPLIT(instr_0FE4, safe_read64s, read_mmx64s)
  2471. void instr_660FE4(union reg128 source, int32_t r) {
  2472. // pmulhuw xmm, xmm/m128
  2473. // XXX: Aligned access or #gp
  2474. task_switch_test_mmx();
  2475. union reg128 destination = read_xmm128s(r);
  2476. write_xmm128(
  2477. r,
  2478. (source.u16[0] * destination.u16[0] >> 16) & 0xFFFF | source.u16[1] * destination.u16[1] & 0xFFFF0000,
  2479. (source.u16[2] * destination.u16[2] >> 16) & 0xFFFF | source.u16[3] * destination.u16[3] & 0xFFFF0000,
  2480. (source.u16[4] * destination.u16[4] >> 16) & 0xFFFF | source.u16[5] * destination.u16[5] & 0xFFFF0000,
  2481. (source.u16[6] * destination.u16[6] >> 16) & 0xFFFF | source.u16[7] * destination.u16[7] & 0xFFFF0000
  2482. );
  2483. }
  2484. DEFINE_SSE_SPLIT(instr_660FE4, safe_read128s, read_xmm128s)
  2485. void instr_0FE5(union reg64 source, int32_t r) {
  2486. // pmulhw mm, mm/m64
  2487. task_switch_test_mmx();
  2488. union reg64 destination = read_mmx64s(r);
  2489. uint32_t word0 = ((destination.i16[0] * source.i16[0]) >> 16) & 0xFFFF;
  2490. uint32_t word1 = ((destination.i16[1] * source.i16[1]) >> 16) & 0xFFFF;
  2491. uint32_t word2 = ((destination.i16[2] * source.i16[2]) >> 16) & 0xFFFF;
  2492. uint32_t word3 = ((destination.i16[3] * source.i16[3]) >> 16) & 0xFFFF;
  2493. int32_t low = word0 | (word1 << 16);
  2494. int32_t high = word2 | (word3 << 16);
  2495. write_mmx64(r, low, high);
  2496. }
  2497. DEFINE_SSE_SPLIT(instr_0FE5, safe_read64s, read_mmx64s)
  2498. void instr_660FE5(union reg128 source, int32_t r) {
  2499. // pmulhw xmm, xmm/m128
  2500. // XXX: Aligned access or #gp
  2501. task_switch_test_mmx();
  2502. union reg128 destination = read_xmm128s(r);
  2503. int32_t dword0 = ((destination.i16[0] * source.i16[0]) >> 16) & 0xFFFF |
  2504. ((destination.i16[1] * source.i16[1]) & 0xFFFF0000);
  2505. int32_t dword1 = ((destination.i16[2] * source.i16[2]) >> 16) & 0xFFFF |
  2506. ((destination.i16[3] * source.i16[3]) & 0xFFFF0000);
  2507. int32_t dword2 = ((destination.i16[4] * source.i16[4]) >> 16) & 0xFFFF |
  2508. ((destination.i16[5] * source.i16[5]) & 0xFFFF0000);
  2509. int32_t dword3 = ((destination.i16[6] * source.i16[6]) >> 16) & 0xFFFF |
  2510. ((destination.i16[7] * source.i16[7]) & 0xFFFF0000);
  2511. write_xmm128(r, dword0, dword1, dword2, dword3);
  2512. }
  2513. DEFINE_SSE_SPLIT(instr_660FE5, safe_read128s, read_xmm128s)
  2514. void instr_0FE6_mem(int32_t addr, int32_t r) { trigger_ud(); }
  2515. void instr_0FE6_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  2516. void instr_660FE6_mem(int32_t addr, int32_t r) { unimplemented_sse(); }
  2517. void instr_660FE6_reg(int32_t r1, int32_t r2) { unimplemented_sse(); }
  2518. void instr_F20FE6_mem(int32_t addr, int32_t r) { unimplemented_sse(); }
  2519. void instr_F20FE6_reg(int32_t r1, int32_t r2) { unimplemented_sse(); }
  2520. void instr_F30FE6_mem(int32_t addr, int32_t r) { unimplemented_sse(); }
  2521. void instr_F30FE6_reg(int32_t r1, int32_t r2) { unimplemented_sse(); }
  2522. void instr_0FE7_mem(int32_t addr, int32_t r) {
  2523. // movntq m64, mm
  2524. mov_r_m64(addr, r);
  2525. }
  2526. void instr_0FE7_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  2527. void instr_660FE7_reg(int32_t r1, int32_t r2) { trigger_ud(); }
  2528. void instr_660FE7_mem(int32_t addr, int32_t r) {
  2529. // movntdq m128, xmm
  2530. mov_r_m128(addr, r);
  2531. }
  2532. void instr_0FE8(union reg64 source, int32_t r) {
  2533. // psubsb mm, mm/m64
  2534. task_switch_test_mmx();
  2535. union reg64 destination = read_mmx64s(r);
  2536. union reg64 result = { { 0 } };
  2537. for(uint32_t i = 0; i < 8; i++)
  2538. {
  2539. result.u8[i] = saturate_sd_to_sb(destination.i8[i] - source.i8[i]);
  2540. }
  2541. write_mmx_reg64(r, result);
  2542. }
  2543. DEFINE_SSE_SPLIT(instr_0FE8, safe_read64s, read_mmx64s)
  2544. void instr_660FE8(union reg128 source, int32_t r) {
  2545. // psubsb xmm, xmm/m128
  2546. // XXX: Aligned access or #gp
  2547. task_switch_test_mmx();
  2548. union reg128 destination = read_xmm128s(r);
  2549. union reg128 result;
  2550. for(uint32_t i = 0; i < 16; i++)
  2551. {
  2552. result.i8[i] = saturate_sd_to_sb(destination.i8[i] - source.i8[i]);
  2553. }
  2554. write_xmm_reg128(r, result);
  2555. }
  2556. DEFINE_SSE_SPLIT(instr_660FE8, safe_read128s, read_xmm128s)
  2557. void instr_0FE9(union reg64 source, int32_t r) {
  2558. // psubsw mm, mm/m64
  2559. task_switch_test_mmx();
  2560. union reg64 destination = read_mmx64s(r);
  2561. int32_t word0 = saturate_sd_to_sw(destination.i16[0] - source.i16[0]);
  2562. int32_t word1 = saturate_sd_to_sw(destination.i16[1] - source.i16[1]);
  2563. int32_t word2 = saturate_sd_to_sw(destination.i16[2] - source.i16[2]);
  2564. int32_t word3 = saturate_sd_to_sw(destination.i16[3] - source.i16[3]);
  2565. int32_t low = word0 | word1 << 16;
  2566. int32_t high = word2 | word3 << 16;
  2567. write_mmx64(r, low, high);
  2568. }
  2569. DEFINE_SSE_SPLIT(instr_0FE9, safe_read64s, read_mmx64s)
  2570. void instr_660FE9(union reg128 source, int32_t r) {
  2571. // psubsw xmm, xmm/m128
  2572. // XXX: Aligned access or #gp
  2573. task_switch_test_mmx();
  2574. union reg128 destination = read_xmm128s(r);
  2575. int32_t dword0 = saturate_sd_to_sw(destination.i16[0] - source.i16[0]) |
  2576. saturate_sd_to_sw(destination.i16[1] - source.i16[1]) << 16;
  2577. int32_t dword1 = saturate_sd_to_sw(destination.i16[2] - source.i16[2]) |
  2578. saturate_sd_to_sw(destination.i16[3] - source.i16[3]) << 16;
  2579. int32_t dword2 = saturate_sd_to_sw(destination.i16[4] - source.i16[4]) |
  2580. saturate_sd_to_sw(destination.i16[5] - source.i16[5]) << 16;
  2581. int32_t dword3 = saturate_sd_to_sw(destination.i16[6] - source.i16[6]) |
  2582. saturate_sd_to_sw(destination.i16[7] - source.i16[7]) << 16;
  2583. write_xmm128(r, dword0, dword1, dword2, dword3);
  2584. }
  2585. DEFINE_SSE_SPLIT(instr_660FE9, safe_read128s, read_xmm128s)
  2586. void instr_0FEA(union reg64 source, int32_t r) {
  2587. // pminsw mm, mm/m64
  2588. task_switch_test_mmx();
  2589. union reg64 destination = read_mmx64s(r);
  2590. union reg64 result;
  2591. for(uint32_t i = 0; i < 4; i++)
  2592. {
  2593. result.i16[i] = destination.i16[i] < source.i16[i] ? destination.i16[i] : source.i16[i];
  2594. }
  2595. write_mmx_reg64(r, result);
  2596. }
  2597. DEFINE_SSE_SPLIT(instr_0FEA, safe_read64s, read_mmx64s)
  2598. void instr_660FEA(union reg128 source, int32_t r) {
  2599. // pminsw xmm, xmm/m128
  2600. // XXX: Aligned access or #gp
  2601. task_switch_test_mmx();
  2602. union reg128 destination = read_xmm128s(r);
  2603. union reg128 result;
  2604. for(uint32_t i = 0; i < 8; i++)
  2605. {
  2606. result.i16[i] = destination.i16[i] < source.i16[i] ? destination.i16[i] : source.i16[i];
  2607. }
  2608. write_xmm_reg128(r, result);
  2609. }
  2610. DEFINE_SSE_SPLIT(instr_660FEA, safe_read128s, read_xmm128s)
  2611. void instr_0FEB(union reg64 source, int32_t r) {
  2612. // por mm, mm/m64
  2613. task_switch_test_mmx();
  2614. union reg64 destination = read_mmx64s(r);
  2615. union reg64 result = { { 0 } };
  2616. result.u64[0] = source.u64[0] | destination.u64[0];
  2617. write_mmx_reg64(r, result);
  2618. }
  2619. DEFINE_SSE_SPLIT(instr_0FEB, safe_read64s, read_mmx64s)
  2620. void instr_660FEB(union reg128 source, int32_t r) {
  2621. // por xmm, xmm/m128
  2622. // XXX: Aligned access or #gp
  2623. por_r128(source, r);
  2624. }
  2625. DEFINE_SSE_SPLIT(instr_660FEB, safe_read128s, read_xmm128s)
  2626. void instr_0FEC(union reg64 source, int32_t r) {
  2627. // paddsb mm, mm/m64
  2628. task_switch_test_mmx();
  2629. union reg64 destination = read_mmx64s(r);
  2630. union reg64 result = { { 0 } };
  2631. for(uint32_t i = 0; i < 8; i++)
  2632. {
  2633. result.u8[i] = saturate_sd_to_sb(destination.i8[i] + source.i8[i]);
  2634. }
  2635. write_mmx_reg64(r, result);
  2636. }
  2637. DEFINE_SSE_SPLIT(instr_0FEC, safe_read64s, read_mmx64s)
  2638. void instr_660FEC(union reg128 source, int32_t r) {
  2639. // paddsb xmm, xmm/m128
  2640. // XXX: Aligned access or #gp
  2641. task_switch_test_mmx();
  2642. union reg128 destination = read_xmm128s(r);
  2643. union reg128 result;
  2644. for(uint32_t i = 0; i < 16; i++)
  2645. {
  2646. result.i8[i] = saturate_sd_to_sb(destination.i8[i] + source.i8[i]);
  2647. }
  2648. write_xmm_reg128(r, result);
  2649. }
  2650. DEFINE_SSE_SPLIT(instr_660FEC, safe_read128s, read_xmm128s)
  2651. void instr_0FED(union reg64 source, int32_t r) {
  2652. // paddsw mm, mm/m64
  2653. task_switch_test_mmx();
  2654. union reg64 destination = read_mmx64s(r);
  2655. int32_t word0 = saturate_sd_to_sw(destination.i16[0] + source.i16[0]);
  2656. int32_t word1 = saturate_sd_to_sw(destination.i16[1] + source.i16[1]);
  2657. int32_t word2 = saturate_sd_to_sw(destination.i16[2] + source.i16[2]);
  2658. int32_t word3 = saturate_sd_to_sw(destination.i16[3] + source.i16[3]);
  2659. int32_t low = word0 | word1 << 16;
  2660. int32_t high = word2 | word3 << 16;
  2661. write_mmx64(r, low, high);
  2662. }
  2663. DEFINE_SSE_SPLIT(instr_0FED, safe_read64s, read_mmx64s)
  2664. void instr_660FED(union reg128 source, int32_t r) {
  2665. // paddsw xmm, xmm/m128
  2666. // XXX: Aligned access or #gp
  2667. task_switch_test_mmx();
  2668. union reg128 destination = read_xmm128s(r);
  2669. int32_t dword0 = saturate_sd_to_sw(destination.i16[0] + source.i16[0]) |
  2670. saturate_sd_to_sw(destination.i16[1] + source.i16[1]) << 16;
  2671. int32_t dword1 = saturate_sd_to_sw(destination.i16[2] + source.i16[2]) |
  2672. saturate_sd_to_sw(destination.i16[3] + source.i16[3]) << 16;
  2673. int32_t dword2 = saturate_sd_to_sw(destination.i16[4] + source.i16[4]) |
  2674. saturate_sd_to_sw(destination.i16[5] + source.i16[5]) << 16;
  2675. int32_t dword3 = saturate_sd_to_sw(destination.i16[6] + source.i16[6]) |
  2676. saturate_sd_to_sw(destination.i16[7] + source.i16[7]) << 16;
  2677. write_xmm128(r, dword0, dword1, dword2, dword3);
  2678. }
  2679. DEFINE_SSE_SPLIT(instr_660FED, safe_read128s, read_xmm128s)
  2680. void instr_0FEE(union reg64 source, int32_t r) {
  2681. // pmaxsw mm, mm/m64
  2682. task_switch_test_mmx();
  2683. union reg64 destination = read_mmx64s(r);
  2684. union reg64 result;
  2685. for(uint32_t i = 0; i < 4; i++)
  2686. {
  2687. result.i16[i] = destination.i16[i] >= source.i16[i] ? destination.i16[i] : source.i16[i];
  2688. }
  2689. write_mmx_reg64(r, result);
  2690. }
  2691. DEFINE_SSE_SPLIT(instr_0FEE, safe_read64s, read_mmx64s)
  2692. void instr_660FEE(union reg128 source, int32_t r) {
  2693. // pmaxsw xmm, xmm/m128
  2694. // XXX: Aligned access or #gp
  2695. task_switch_test_mmx();
  2696. union reg128 destination = read_xmm128s(r);
  2697. union reg128 result;
  2698. for(uint32_t i = 0; i < 8; i++)
  2699. {
  2700. result.i16[i] = destination.i16[i] >= source.i16[i] ? destination.i16[i] : source.i16[i];
  2701. }
  2702. write_xmm_reg128(r, result);
  2703. }
  2704. DEFINE_SSE_SPLIT(instr_660FEE, safe_read128s, read_xmm128s)
  2705. void instr_0FEF(union reg64 source, int32_t r) {
  2706. // pxor mm, mm/m64
  2707. task_switch_test_mmx();
  2708. union reg64 destination = read_mmx64s(r);
  2709. union reg64 result = { { 0 } };
  2710. result.u64[0] = source.u64[0] ^ destination.u64[0];
  2711. write_mmx_reg64(r, result);
  2712. }
  2713. DEFINE_SSE_SPLIT(instr_0FEF, safe_read64s, read_mmx64s)
  2714. void instr_660FEF(union reg128 source, int32_t r) {
  2715. // pxor xmm, xmm/m128
  2716. // XXX: Aligned access or #gp
  2717. pxor_r128(source, r);
  2718. }
  2719. DEFINE_SSE_SPLIT(instr_660FEF, safe_read128s, read_xmm128s)
  2720. void instr_0FF0() { unimplemented_sse(); }
  2721. void instr_0FF1(union reg64 source, int32_t r) {
  2722. // psllw mm, mm/m64
  2723. psllw_r64(r, source.u32[0]);
  2724. }
  2725. DEFINE_SSE_SPLIT(instr_0FF1, safe_read64s, read_mmx64s)
  2726. void instr_660FF1(union reg128 source, int32_t r) {
  2727. // psllw xmm, xmm/m128
  2728. // XXX: Aligned access or #gp
  2729. psllw_r128(r, source.u32[0]);
  2730. }
  2731. DEFINE_SSE_SPLIT(instr_660FF1, safe_read128s, read_xmm128s)
  2732. void instr_0FF2(union reg64 source, int32_t r) {
  2733. // pslld mm, mm/m64
  2734. pslld_r64(r, source.u32[0]);
  2735. }
  2736. DEFINE_SSE_SPLIT(instr_0FF2, safe_read64s, read_mmx64s)
  2737. void instr_660FF2(union reg128 source, int32_t r) {
  2738. // pslld xmm, xmm/m128
  2739. // XXX: Aligned access or #gp
  2740. pslld_r128(r, source.u32[0]);
  2741. }
  2742. DEFINE_SSE_SPLIT(instr_660FF2, safe_read128s, read_xmm128s)
  2743. void instr_0FF3(union reg64 source, int32_t r) {
  2744. // psllq mm, mm/m64
  2745. psllq_r64(r, source.u32[0]);
  2746. }
  2747. DEFINE_SSE_SPLIT(instr_0FF3, safe_read64s, read_mmx64s)
  2748. void instr_660FF3(union reg128 source, int32_t r) {
  2749. // psllq xmm, xmm/m128
  2750. // XXX: Aligned access or #gp
  2751. psllq_r128(r, source.u32[0]);
  2752. }
  2753. DEFINE_SSE_SPLIT(instr_660FF3, safe_read128s, read_xmm128s)
  2754. void instr_0FF4(union reg64 source, int32_t r) {
  2755. // pmuludq mm, mm/m64
  2756. task_switch_test_mmx();
  2757. union reg64 destination = read_mmx64s(r);
  2758. destination.u64[0] = (uint64_t) source.u32[0] * (uint64_t) destination.u32[0];
  2759. write_mmx_reg64(r, destination);
  2760. }
  2761. DEFINE_SSE_SPLIT(instr_0FF4, safe_read64s, read_mmx64s)
  2762. void instr_660FF4(union reg128 source, int32_t r) {
  2763. // pmuludq xmm, xmm/m128
  2764. // XXX: Aligned access or #gp
  2765. task_switch_test_mmx();
  2766. union reg128 destination = read_xmm128s(r);
  2767. destination.u64[0] = (uint64_t) source.u32[0] * (uint64_t) destination.u32[0];
  2768. destination.u64[1] = (uint64_t) source.u32[2] * (uint64_t) destination.u32[2];
  2769. write_xmm_reg128(r, destination);
  2770. }
  2771. DEFINE_SSE_SPLIT(instr_660FF4, safe_read128s, read_xmm128s)
  2772. void instr_0FF5(union reg64 source, int32_t r) {
  2773. // pmaddwd mm, mm/m64
  2774. task_switch_test_mmx();
  2775. union reg64 destination = read_mmx64s(r);
  2776. int32_t mul0 = destination.i16[0] * source.i16[0];
  2777. int32_t mul1 = destination.i16[1] * source.i16[1];
  2778. int32_t mul2 = destination.i16[2] * source.i16[2];
  2779. int32_t mul3 = destination.i16[3] * source.i16[3];
  2780. int32_t low = mul0 + mul1;
  2781. int32_t high = mul2 + mul3;
  2782. write_mmx64(r, low, high);
  2783. }
  2784. DEFINE_SSE_SPLIT(instr_0FF5, safe_read64s, read_mmx64s)
  2785. void instr_660FF5(union reg128 source, int32_t r) {
  2786. // pmaddwd xmm, xmm/m128
  2787. // XXX: Aligned access or #gp
  2788. task_switch_test_mmx();
  2789. union reg128 destination = read_xmm128s(r);
  2790. int32_t dword0 = (destination.i16[0] * source.i16[0]) +
  2791. (destination.i16[1] * source.i16[1]);
  2792. int32_t dword1 = (destination.i16[2] * source.i16[2]) +
  2793. (destination.i16[3] * source.i16[3]);
  2794. int32_t dword2 = (destination.i16[4] * source.i16[4]) +
  2795. (destination.i16[5] * source.i16[5]);
  2796. int32_t dword3 = (destination.i16[6] * source.i16[6]) +
  2797. (destination.i16[7] * source.i16[7]);
  2798. write_xmm128(r, dword0, dword1, dword2, dword3);
  2799. }
  2800. DEFINE_SSE_SPLIT(instr_660FF5, safe_read128s, read_xmm128s)
  2801. void instr_0FF6(union reg64 source, int32_t r) {
  2802. // psadbw mm, mm/m64
  2803. task_switch_test_mmx();
  2804. union reg64 destination = read_mmx64s(r);
  2805. uint32_t sum = 0;
  2806. for(uint32_t i = 0; i < 8; i++)
  2807. {
  2808. sum += abs(destination.u8[i] - source.u8[i]);
  2809. }
  2810. write_mmx64(r, sum, 0);
  2811. }
  2812. DEFINE_SSE_SPLIT(instr_0FF6, safe_read64s, read_mmx64s)
  2813. void instr_660FF6(union reg128 source, int32_t r) {
  2814. // psadbw xmm, xmm/m128
  2815. // XXX: Aligned access or #gp
  2816. task_switch_test_mmx();
  2817. union reg128 destination = read_xmm128s(r);
  2818. uint32_t sum0 = 0;
  2819. uint32_t sum1 = 0;
  2820. for(uint32_t i = 0; i < 8; i++)
  2821. {
  2822. sum0 += abs(destination.u8[i] - source.u8[i]);
  2823. sum1 += abs(destination.u8[i + 8] - source.u8[i + 8]);
  2824. }
  2825. write_xmm128(r, sum0, 0, sum1, 0);
  2826. }
  2827. DEFINE_SSE_SPLIT(instr_660FF6, safe_read128s, read_xmm128s)
  2828. void instr_0FF7_mem(int32_t addr, int32_t r) { trigger_ud(); }
  2829. void instr_0FF7_reg(int32_t r1, int32_t r2) {
  2830. // maskmovq mm, mm
  2831. task_switch_test_mmx();
  2832. union reg64 source = read_mmx64s(r2);
  2833. union reg64 mask = read_mmx64s(r1);
  2834. int32_t addr = get_seg_prefix(DS) + get_reg_asize(EDI);
  2835. writable_or_pagefault(addr, 8);
  2836. for(uint32_t i = 0; i < 8; i++)
  2837. {
  2838. if(mask.u8[i] & 0x80)
  2839. {
  2840. safe_write8(addr + i, source.u8[i]);
  2841. }
  2842. }
  2843. }
  2844. void instr_660FF7_mem(int32_t addr, int32_t r) { trigger_ud(); }
  2845. void instr_660FF7_reg(int32_t r1, int32_t r2) {
  2846. // maskmovdqu xmm, xmm
  2847. task_switch_test_mmx();
  2848. union reg128 source = read_xmm128s(r2);
  2849. union reg128 mask = read_xmm128s(r1);
  2850. int32_t addr = get_seg_prefix(DS) + get_reg_asize(EDI);
  2851. writable_or_pagefault(addr, 16);
  2852. for(uint32_t i = 0; i < 16; i++)
  2853. {
  2854. if(mask.u8[i] & 0x80)
  2855. {
  2856. safe_write8(addr + i, source.u8[i]);
  2857. }
  2858. }
  2859. }
  2860. void instr_0FF8(union reg64 source, int32_t r) {
  2861. // psubb mm, mm/m64
  2862. task_switch_test_mmx();
  2863. union reg64 destination = read_mmx64s(r);
  2864. union reg64 result = { { 0 } };
  2865. for(uint32_t i = 0; i < 8; i++)
  2866. {
  2867. result.u8[i] = (destination.i8[i] - source.i8[i]) & 0xFF;
  2868. }
  2869. write_mmx_reg64(r, result);
  2870. }
  2871. DEFINE_SSE_SPLIT(instr_0FF8, safe_read64s, read_mmx64s)
  2872. void instr_660FF8(union reg128 source, int32_t r) {
  2873. // psubb xmm, xmm/m128
  2874. // XXX: Aligned access or #gp
  2875. task_switch_test_mmx();
  2876. union reg128 destination = read_xmm128s(r);
  2877. union reg128 result = { { 0 } };
  2878. for(uint32_t i = 0; i < 16; i++)
  2879. {
  2880. result.i8[i] = (destination.i8[i] - source.i8[i]) & 0xFF;
  2881. }
  2882. write_xmm_reg128(r, result);
  2883. }
  2884. DEFINE_SSE_SPLIT(instr_660FF8, safe_read128s, read_xmm128s)
  2885. void instr_0FF9(union reg64 source, int32_t r) {
  2886. // psubw mm, mm/m64
  2887. task_switch_test_mmx();
  2888. union reg64 destination = read_mmx64s(r);
  2889. int32_t word0 = (destination.u32[0] - source.u32[0]) & 0xFFFF;
  2890. int32_t word1 = (((uint32_t) destination.u16[1]) - source.u16[1]) & 0xFFFF;
  2891. int32_t low = word0 | word1 << 16;
  2892. int32_t word2 = (destination.u32[1] - source.u32[1]) & 0xFFFF;
  2893. int32_t word3 = (((uint32_t) destination.u16[3]) - source.u16[3]) & 0xFFFF;
  2894. int32_t high = word2 | word3 << 16;
  2895. write_mmx64(r, low, high);
  2896. }
  2897. DEFINE_SSE_SPLIT(instr_0FF9, safe_read64s, read_mmx64s)
  2898. void instr_660FF9(union reg128 source, int32_t r) {
  2899. // psubw xmm, xmm/m128
  2900. // XXX: Aligned access or #gp
  2901. task_switch_test_mmx();
  2902. union reg128 destination = read_xmm128s(r);
  2903. union reg128 result = { { 0 } };
  2904. for(uint32_t i = 0; i < 8; i++)
  2905. {
  2906. result.i16[i] = (destination.i16[i] - source.i16[i]) & 0xFFFF;
  2907. }
  2908. write_xmm_reg128(r, result);
  2909. }
  2910. DEFINE_SSE_SPLIT(instr_660FF9, safe_read128s, read_xmm128s)
  2911. void instr_0FFA(union reg64 source, int32_t r) {
  2912. // psubd mm, mm/m64
  2913. task_switch_test_mmx();
  2914. union reg64 destination = read_mmx64s(r);
  2915. write_mmx64(
  2916. r,
  2917. destination.u32[0] - source.u32[0],
  2918. destination.u32[1] - source.u32[1]
  2919. );
  2920. }
  2921. DEFINE_SSE_SPLIT(instr_0FFA, safe_read64s, read_mmx64s)
  2922. void instr_660FFA(union reg128 source, int32_t r) {
  2923. // psubd xmm, xmm/m128
  2924. // XXX: Aligned access or #gp
  2925. task_switch_test_mmx();
  2926. union reg128 destination = read_xmm128s(r);
  2927. write_xmm128(
  2928. r,
  2929. destination.u32[0] - source.u32[0],
  2930. destination.u32[1] - source.u32[1],
  2931. destination.u32[2] - source.u32[2],
  2932. destination.u32[3] - source.u32[3]
  2933. );
  2934. }
  2935. DEFINE_SSE_SPLIT(instr_660FFA, safe_read128s, read_xmm128s)
  2936. void instr_0FFB(union reg64 source, int32_t r) {
  2937. // psubq mm, mm/m64
  2938. task_switch_test_mmx();
  2939. union reg64 destination = read_mmx64s(r);
  2940. destination.u64[0] = destination.u64[0] - source.u64[0];
  2941. write_mmx_reg64(r, destination);
  2942. }
  2943. DEFINE_SSE_SPLIT(instr_0FFB, safe_read64s, read_mmx64s)
  2944. void instr_660FFB(union reg128 source, int32_t r) {
  2945. // psubq xmm, xmm/m128
  2946. // XXX: Aligned access or #gp
  2947. task_switch_test_mmx();
  2948. union reg128 destination = read_xmm128s(r);
  2949. destination.u64[0] = destination.u64[0] - source.u64[0];
  2950. destination.u64[1] = destination.u64[1] - source.u64[1];
  2951. write_xmm_reg128(r, destination);
  2952. }
  2953. DEFINE_SSE_SPLIT(instr_660FFB, safe_read128s, read_xmm128s)
  2954. void instr_0FFC(union reg64 source, int32_t r) {
  2955. // paddb mm, mm/m64
  2956. task_switch_test_mmx();
  2957. union reg64 destination = read_mmx64s(r);
  2958. union reg64 result = { { 0 } };
  2959. for(uint32_t i = 0; i < 8; i++)
  2960. {
  2961. result.u8[i] = (destination.u8[i] + source.u8[i]) & 0xFF;
  2962. }
  2963. write_mmx_reg64(r, result);
  2964. }
  2965. DEFINE_SSE_SPLIT(instr_0FFC, safe_read64s, read_mmx64s)
  2966. void instr_660FFC(union reg128 source, int32_t r) {
  2967. // paddb xmm, xmm/m128
  2968. // XXX: Aligned access or #gp
  2969. task_switch_test_mmx();
  2970. union reg128 destination = read_xmm128s(r);
  2971. union reg128 result = { { 0 } };
  2972. for(uint32_t i = 0; i < 16; i++)
  2973. {
  2974. result.u8[i] = (destination.u8[i] + source.u8[i]) & 0xFF;
  2975. }
  2976. write_xmm_reg128(r, result);
  2977. }
  2978. DEFINE_SSE_SPLIT(instr_660FFC, safe_read128s, read_xmm128s)
  2979. void instr_0FFD(union reg64 source, int32_t r) {
  2980. // paddw mm, mm/m64
  2981. task_switch_test_mmx();
  2982. union reg64 destination = read_mmx64s(r);
  2983. int32_t word0 = (destination.u32[0] + source.u32[0]) & 0xFFFF;
  2984. int32_t word1 = (destination.u16[1] + source.u16[1]) & 0xFFFF;
  2985. int32_t low = word0 | word1 << 16;
  2986. int32_t word2 = (destination.u32[1] + source.u32[1]) & 0xFFFF;
  2987. int32_t word3 = (destination.u16[3] + source.u16[3]) & 0xFFFF;
  2988. int32_t high = word2 | word3 << 16;
  2989. write_mmx64(r, low, high);
  2990. }
  2991. DEFINE_SSE_SPLIT(instr_0FFD, safe_read64s, read_mmx64s)
  2992. void instr_660FFD(union reg128 source, int32_t r) {
  2993. // paddw xmm, xmm/m128
  2994. // XXX: Aligned access or #gp
  2995. task_switch_test_mmx();
  2996. union reg128 destination = read_xmm128s(r);
  2997. union reg128 result = { { 0 } };
  2998. for(uint32_t i = 0; i < 8; i++)
  2999. {
  3000. result.u16[i] = (destination.u16[i] + source.u16[i]) & 0xFFFF;
  3001. }
  3002. write_xmm_reg128(r, result);
  3003. }
  3004. DEFINE_SSE_SPLIT(instr_660FFD, safe_read128s, read_xmm128s)
  3005. void instr_0FFE(union reg64 source, int32_t r) {
  3006. // paddd mm, mm/m64
  3007. task_switch_test_mmx();
  3008. union reg64 destination = read_mmx64s(r);
  3009. int32_t low = destination.u32[0] + source.u32[0];
  3010. int32_t high = destination.u32[1] + source.u32[1];
  3011. write_mmx64(r, low, high);
  3012. }
  3013. DEFINE_SSE_SPLIT(instr_0FFE, safe_read64s, read_mmx64s)
  3014. void instr_660FFE(union reg128 source, int32_t r) {
  3015. // paddd xmm, xmm/m128
  3016. // XXX: Aligned access or #gp
  3017. task_switch_test_mmx();
  3018. union reg128 destination = read_xmm128s(r);
  3019. int32_t dword0 = destination.u32[0] + source.u32[0];
  3020. int32_t dword1 = destination.u32[1] + source.u32[1];
  3021. int32_t dword2 = destination.u32[2] + source.u32[2];
  3022. int32_t dword3 = destination.u32[3] + source.u32[3];
  3023. write_xmm128(r, dword0, dword1, dword2, dword3);
  3024. }
  3025. DEFINE_SSE_SPLIT(instr_660FFE, safe_read128s, read_xmm128s)
  3026. void instr_0FFF() {
  3027. // Windows 98
  3028. dbg_log("#ud: 0F FF");
  3029. trigger_ud();
  3030. }
  3031. void run_instruction0f_16(int32_t opcode)
  3032. {
  3033. #include "../../build/interpreter0f_16.c"
  3034. }
  3035. void run_instruction0f_32(int32_t opcode)
  3036. {
  3037. #include "../../build/interpreter0f_32.c"
  3038. }
  3039. #pragma clang diagnostic pop