sse_instr.rs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. use cpu::cpu::*;
  2. use cpu::global_pointers::mxcsr;
  3. #[no_mangle]
  4. pub unsafe fn mov_r_m64(addr: i32, r: i32) {
  5. // mov* m64, mm
  6. let data = read_mmx64s(r);
  7. return_on_pagefault!(safe_write64(addr, data));
  8. transition_fpu_to_mmx();
  9. }
  10. #[no_mangle]
  11. pub unsafe fn movl_r128_m64(addr: i32, r: i32) {
  12. // mov* m64, xmm
  13. let data = read_xmm64s(r);
  14. return_on_pagefault!(safe_write64(addr, data));
  15. }
  16. #[no_mangle]
  17. pub unsafe fn mov_r_r128(r1: i32, r2: i32) {
  18. // mov* xmm, xmm
  19. let data = read_xmm128s(r2);
  20. write_xmm_reg128(r1, data);
  21. }
  22. #[no_mangle]
  23. pub unsafe fn mov_r_m128(addr: i32, r: i32) {
  24. // mov* m128, xmm
  25. let data = read_xmm128s(r);
  26. return_on_pagefault!(safe_write128(addr, data));
  27. }
  28. #[no_mangle]
  29. pub unsafe fn mov_rm_r128(source: reg128, r: i32) {
  30. // mov* xmm, xmm/m128
  31. write_xmm_reg128(r, source);
  32. }
  33. #[no_mangle]
  34. pub unsafe fn movh_m64_r128(addr: i32, r: i32) {
  35. // movhp* xmm, m64
  36. let data = return_on_pagefault!(safe_read64s(addr));
  37. let orig = read_xmm128s(r);
  38. write_xmm128(
  39. r,
  40. orig.u32_0[0] as i32,
  41. orig.u32_0[1] as i32,
  42. data as i32,
  43. (data >> 32) as i32,
  44. );
  45. }
  46. #[no_mangle]
  47. pub unsafe fn movh_r128_m64(addr: i32, r: i32) {
  48. // movhp* m64, xmm
  49. let data = read_xmm128s(r);
  50. return_on_pagefault!(safe_write64(addr, data.u64_0[1]));
  51. }
  52. #[no_mangle]
  53. pub unsafe fn pand_r128(source: reg128, r: i32) {
  54. // pand xmm, xmm/m128
  55. // XXX: Aligned access or #gp
  56. let destination = read_xmm128s(r);
  57. let mut result = reg128 { i8_0: [0; 16] };
  58. result.u64_0[0] = source.u64_0[0] & destination.u64_0[0];
  59. result.u64_0[1] = source.u64_0[1] & destination.u64_0[1];
  60. write_xmm_reg128(r, result);
  61. }
  62. #[no_mangle]
  63. pub unsafe fn pandn_r128(source: reg128, r: i32) {
  64. // pandn xmm, xmm/m128
  65. // XXX: Aligned access or #gp
  66. let destination = read_xmm128s(r);
  67. let mut result = reg128 { i8_0: [0; 16] };
  68. result.u64_0[0] = source.u64_0[0] & !destination.u64_0[0];
  69. result.u64_0[1] = source.u64_0[1] & !destination.u64_0[1];
  70. write_xmm_reg128(r, result);
  71. }
  72. #[no_mangle]
  73. pub unsafe fn pxor_r128(source: reg128, r: i32) {
  74. // pxor xmm, xmm/m128
  75. // XXX: Aligned access or #gp
  76. let destination = read_xmm128s(r);
  77. let mut result = reg128 { i8_0: [0; 16] };
  78. result.u64_0[0] = source.u64_0[0] ^ destination.u64_0[0];
  79. result.u64_0[1] = source.u64_0[1] ^ destination.u64_0[1];
  80. write_xmm_reg128(r, result);
  81. }
  82. #[no_mangle]
  83. pub unsafe fn por_r128(source: reg128, r: i32) {
  84. // por xmm, xmm/m128
  85. // XXX: Aligned access or #gp
  86. let destination = read_xmm128s(r);
  87. let mut result = reg128 { i8_0: [0; 16] };
  88. result.u64_0[0] = source.u64_0[0] | destination.u64_0[0];
  89. result.u64_0[1] = source.u64_0[1] | destination.u64_0[1];
  90. write_xmm_reg128(r, result);
  91. }
  92. #[no_mangle]
  93. pub unsafe fn psrlw_r64(r: i32, shift: u64) {
  94. // psrlw mm, {shift}
  95. let destination: [u16; 4] = std::mem::transmute(read_mmx64s(r));
  96. let shift = if shift > 15 { 16 } else { shift };
  97. let mut result = [0; 4];
  98. for i in 0..4 {
  99. result[i] = ((destination[i] as u32) >> shift) as u16
  100. }
  101. write_mmx_reg64(r, std::mem::transmute(result));
  102. transition_fpu_to_mmx();
  103. }
  104. #[no_mangle]
  105. pub unsafe fn psraw_r64(r: i32, shift: u64) {
  106. // psraw mm, {shift}
  107. let destination: [i16; 4] = std::mem::transmute(read_mmx64s(r));
  108. let shift = if shift > 15 { 16 } else { shift };
  109. let mut result = [0; 4];
  110. for i in 0..4 {
  111. result[i] = (destination[i] as i32 >> shift) as i16
  112. }
  113. write_mmx_reg64(r, std::mem::transmute(result));
  114. transition_fpu_to_mmx();
  115. }
  116. #[no_mangle]
  117. pub unsafe fn psllw_r64(r: i32, shift: u64) {
  118. // psllw mm, {shift}
  119. let destination: [i16; 4] = std::mem::transmute(read_mmx64s(r));
  120. let mut result = [0; 4];
  121. if shift <= 15 {
  122. for i in 0..4 {
  123. result[i] = destination[i] << shift
  124. }
  125. }
  126. write_mmx_reg64(r, std::mem::transmute(result));
  127. transition_fpu_to_mmx();
  128. }
  129. #[no_mangle]
  130. pub unsafe fn psrld_r64(r: i32, shift: u64) {
  131. // psrld mm, {shift}
  132. let destination: [u32; 2] = std::mem::transmute(read_mmx64s(r));
  133. let mut result = [0; 2];
  134. if shift <= 31 {
  135. for i in 0..2 {
  136. result[i] = destination[i] >> shift;
  137. }
  138. }
  139. write_mmx_reg64(r, std::mem::transmute(result));
  140. transition_fpu_to_mmx();
  141. }
  142. #[no_mangle]
  143. pub unsafe fn psrad_r64(r: i32, shift: u64) {
  144. // psrad mm, {shift}
  145. let destination: [i32; 2] = std::mem::transmute(read_mmx64s(r));
  146. let shift = if shift > 31 { 31 } else { shift };
  147. let mut result = [0; 2];
  148. for i in 0..2 {
  149. result[i] = destination[i] >> shift;
  150. }
  151. write_mmx_reg64(r, std::mem::transmute(result));
  152. transition_fpu_to_mmx();
  153. }
  154. #[no_mangle]
  155. pub unsafe fn pslld_r64(r: i32, shift: u64) {
  156. // pslld mm, {shift}
  157. let destination: [i32; 2] = std::mem::transmute(read_mmx64s(r));
  158. let mut result = [0; 2];
  159. if shift <= 31 {
  160. for i in 0..2 {
  161. result[i] = destination[i] << shift;
  162. }
  163. }
  164. write_mmx_reg64(r, std::mem::transmute(result));
  165. transition_fpu_to_mmx();
  166. }
  167. #[no_mangle]
  168. pub unsafe fn psrlq_r64(r: i32, shift: u64) {
  169. // psrlq mm, {shift}
  170. let destination = read_mmx64s(r);
  171. let mut result = 0;
  172. if shift <= 63 {
  173. result = destination >> shift
  174. }
  175. write_mmx_reg64(r, result);
  176. transition_fpu_to_mmx();
  177. }
  178. #[no_mangle]
  179. pub unsafe fn psllq_r64(r: i32, shift: u64) {
  180. // psllq mm, {shift}
  181. let destination = read_mmx64s(r);
  182. let mut result = 0;
  183. if shift <= 63 {
  184. result = destination << shift
  185. }
  186. write_mmx_reg64(r, result);
  187. transition_fpu_to_mmx();
  188. }
  189. #[no_mangle]
  190. pub unsafe fn psrlw_r128(r: i32, shift: u64) {
  191. // psrlw xmm, {shift}
  192. let destination = read_xmm128s(r);
  193. let mut dword0: i32 = 0;
  194. let mut dword1: i32 = 0;
  195. let mut dword2: i32 = 0;
  196. let mut dword3: i32 = 0;
  197. if shift <= 15 {
  198. dword0 = destination.u16_0[0] as i32 >> shift | destination.u16_0[1] as i32 >> shift << 16;
  199. dword1 = destination.u16_0[2] as i32 >> shift | destination.u16_0[3] as i32 >> shift << 16;
  200. dword2 = destination.u16_0[4] as i32 >> shift | destination.u16_0[5] as i32 >> shift << 16;
  201. dword3 = destination.u16_0[6] as i32 >> shift | destination.u16_0[7] as i32 >> shift << 16
  202. }
  203. write_xmm128(r, dword0, dword1, dword2, dword3);
  204. }
  205. #[no_mangle]
  206. pub unsafe fn psraw_r128(r: i32, shift: u64) {
  207. // psraw xmm, {shift}
  208. let destination = read_xmm128s(r);
  209. let shift_clamped = (if shift > 15 { 16 } else { shift as u32 }) as i32;
  210. let dword0 = destination.i16_0[0] as i32 >> shift_clamped & 0xFFFF
  211. | destination.i16_0[1] as i32 >> shift_clamped << 16;
  212. let dword1 = destination.i16_0[2] as i32 >> shift_clamped & 0xFFFF
  213. | destination.i16_0[3] as i32 >> shift_clamped << 16;
  214. let dword2 = destination.i16_0[4] as i32 >> shift_clamped & 0xFFFF
  215. | destination.i16_0[5] as i32 >> shift_clamped << 16;
  216. let dword3 = destination.i16_0[6] as i32 >> shift_clamped & 0xFFFF
  217. | destination.i16_0[7] as i32 >> shift_clamped << 16;
  218. write_xmm128(r, dword0, dword1, dword2, dword3);
  219. }
  220. #[no_mangle]
  221. pub unsafe fn psllw_r128(r: i32, shift: u64) {
  222. // psllw xmm, {shift}
  223. let destination = read_xmm128s(r);
  224. let mut dword0: i32 = 0;
  225. let mut dword1: i32 = 0;
  226. let mut dword2: i32 = 0;
  227. let mut dword3: i32 = 0;
  228. if shift <= 15 {
  229. dword0 = (destination.u16_0[0] as i32) << shift & 0xFFFF
  230. | (destination.u16_0[1] as i32) << shift << 16;
  231. dword1 = (destination.u16_0[2] as i32) << shift & 0xFFFF
  232. | (destination.u16_0[3] as i32) << shift << 16;
  233. dword2 = (destination.u16_0[4] as i32) << shift & 0xFFFF
  234. | (destination.u16_0[5] as i32) << shift << 16;
  235. dword3 = (destination.u16_0[6] as i32) << shift & 0xFFFF
  236. | (destination.u16_0[7] as i32) << shift << 16
  237. }
  238. write_xmm128(r, dword0, dword1, dword2, dword3);
  239. }
  240. #[no_mangle]
  241. pub unsafe fn psrld_r128(r: i32, shift: u64) {
  242. // psrld xmm, {shift}
  243. let destination = read_xmm128s(r);
  244. let mut dword0: i32 = 0;
  245. let mut dword1: i32 = 0;
  246. let mut dword2: i32 = 0;
  247. let mut dword3: i32 = 0;
  248. if shift <= 31 {
  249. dword0 = (destination.u32_0[0] >> shift) as i32;
  250. dword1 = (destination.u32_0[1] >> shift) as i32;
  251. dword2 = (destination.u32_0[2] >> shift) as i32;
  252. dword3 = (destination.u32_0[3] >> shift) as i32
  253. }
  254. write_xmm128(r, dword0, dword1, dword2, dword3);
  255. }
  256. #[no_mangle]
  257. pub unsafe fn psrad_r128(r: i32, shift: u64) {
  258. // psrad xmm, {shift}
  259. let destination = read_xmm128s(r);
  260. let shift_clamped = (if shift > 31 { 31 } else { shift }) as i32;
  261. let dword0 = destination.i32_0[0] >> shift_clamped;
  262. let dword1 = destination.i32_0[1] >> shift_clamped;
  263. let dword2 = destination.i32_0[2] >> shift_clamped;
  264. let dword3 = destination.i32_0[3] >> shift_clamped;
  265. write_xmm128(r, dword0, dword1, dword2, dword3);
  266. }
  267. #[no_mangle]
  268. pub unsafe fn pslld_r128(r: i32, shift: u64) {
  269. // pslld xmm, {shift}
  270. let destination = read_xmm128s(r);
  271. let mut dword0: i32 = 0;
  272. let mut dword1: i32 = 0;
  273. let mut dword2: i32 = 0;
  274. let mut dword3: i32 = 0;
  275. if shift <= 31 {
  276. dword0 = destination.i32_0[0] << shift;
  277. dword1 = destination.i32_0[1] << shift;
  278. dword2 = destination.i32_0[2] << shift;
  279. dword3 = destination.i32_0[3] << shift
  280. }
  281. write_xmm128(r, dword0, dword1, dword2, dword3);
  282. }
  283. #[no_mangle]
  284. pub unsafe fn psrlq_r128(r: i32, shift: u64) {
  285. // psrlq xmm, {shift}
  286. let destination = read_xmm128s(r);
  287. let mut result = reg128 { i8_0: [0; 16] };
  288. if shift <= 63 {
  289. result.u64_0[0] = destination.u64_0[0] >> shift;
  290. result.u64_0[1] = destination.u64_0[1] >> shift
  291. }
  292. write_xmm_reg128(r, result);
  293. }
  294. #[no_mangle]
  295. pub unsafe fn psllq_r128(r: i32, shift: u64) {
  296. // psllq xmm, {shift}
  297. let destination = read_xmm128s(r);
  298. let mut result = reg128 { i8_0: [0; 16] };
  299. if shift <= 63 {
  300. result.u64_0[0] = destination.u64_0[0] << shift;
  301. result.u64_0[1] = destination.u64_0[1] << shift
  302. }
  303. write_xmm_reg128(r, result);
  304. }
  305. #[no_mangle]
  306. pub unsafe fn sse_comparison(op: i32, x: f64, y: f64) -> bool {
  307. // TODO: Signaling
  308. match op & 7 {
  309. 0 => return x == y,
  310. 1 => return x < y,
  311. 2 => return x <= y,
  312. 3 => return x.is_nan() || y.is_nan(),
  313. 4 => return x != y || x.is_nan() || y.is_nan(),
  314. 5 => return x >= y || x.is_nan() || y.is_nan(),
  315. 6 => return x > y || x.is_nan() || y.is_nan(),
  316. 7 => return !x.is_nan() && !y.is_nan(),
  317. _ => {
  318. dbg_assert!(false);
  319. return false;
  320. },
  321. };
  322. }
  323. #[no_mangle]
  324. pub unsafe fn sse_min(x: f64, y: f64) -> f64 {
  325. // if both x and y are 0 or x is nan, y is returned
  326. return if x < y { x } else { y };
  327. }
  328. #[no_mangle]
  329. pub unsafe fn sse_max(x: f64, y: f64) -> f64 {
  330. // if both x and y are 0 or x is nan, y is returned
  331. return if x > y { x } else { y };
  332. }
  333. #[no_mangle]
  334. pub unsafe fn sse_convert_with_truncation_f32_to_i32(x: f32) -> i32 {
  335. let x = x.trunc();
  336. if x >= -2147483648.0 && x < 2147483648.0 {
  337. return x as i64 as i32;
  338. }
  339. else {
  340. // TODO: Signal
  341. return -0x80000000;
  342. };
  343. }
  344. #[no_mangle]
  345. pub unsafe fn sse_convert_f32_to_i32(x: f32) -> i32 {
  346. let x = sse_integer_round(x as f64);
  347. if x >= -2147483648.0 && x < 2147483648.0 {
  348. return x as i64 as i32;
  349. }
  350. else {
  351. // TODO: Signal
  352. return -0x80000000;
  353. };
  354. }
  355. #[no_mangle]
  356. pub unsafe fn sse_convert_with_truncation_f64_to_i32(x: f64) -> i32 {
  357. let x = x.trunc();
  358. if x >= -2147483648.0 && x < 2147483648.0 {
  359. return x as i64 as i32;
  360. }
  361. else {
  362. // TODO: Signal
  363. return -0x80000000;
  364. };
  365. }
  366. #[no_mangle]
  367. pub unsafe fn sse_convert_f64_to_i32(x: f64) -> i32 {
  368. let x = sse_integer_round(x);
  369. if x >= -2147483648.0 && x < 2147483648.0 {
  370. return x as i64 as i32;
  371. }
  372. else {
  373. // TODO: Signal
  374. return -0x80000000;
  375. };
  376. }
  377. pub unsafe fn sse_integer_round(f: f64) -> f64 {
  378. // see fpu_integer_round
  379. let rc = *mxcsr >> MXCSR_RC_SHIFT & 3;
  380. if rc == 0 {
  381. // Round to nearest, or even if equidistant
  382. let mut rounded = f.round();
  383. let diff = rounded - f;
  384. if diff == 0.5 || diff == -0.5 {
  385. rounded = 2.0 * (f * 0.5).round()
  386. }
  387. return rounded;
  388. }
  389. else if rc == 1 || rc == 3 && f > 0.0 {
  390. // rc=3 is truncate -> floor for positive numbers
  391. return f.floor();
  392. }
  393. else {
  394. return f.ceil();
  395. };
  396. }