fpu.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * SIMD Floating Point.
  11. * Assembler support to get at the individual instructions
  12. * is in l64fpu.s.
  13. * There are opportunities to be lazier about saving and
  14. * restoring the state and allocating the storage needed.
  15. */
  16. #include "u.h"
  17. #include "../port/lib.h"
  18. #include "mem.h"
  19. #include "dat.h"
  20. #include "fns.h"
  21. #include "amd64.h"
  22. #include "ureg.h"
  23. enum { /* FCW, FSW and MXCSR */
  24. I = 0x00000001, /* Invalid-Operation */
  25. D = 0x00000002, /* Denormalized-Operand */
  26. Z = 0x00000004, /* Zero-Divide */
  27. O = 0x00000008, /* Overflow */
  28. U = 0x00000010, /* Underflow */
  29. P = 0x00000020, /* Precision */
  30. };
  31. enum { /* FCW */
  32. PCs = 0x00000000, /* Precision Control -Single */
  33. PCd = 0x00000200, /* -Double */
  34. PCde = 0x00000300, /* -Double Extended */
  35. RCn = 0x00000000, /* Rounding Control -Nearest */
  36. RCd = 0x00000400, /* -Down */
  37. RCu = 0x00000800, /* -Up */
  38. RCz = 0x00000C00, /* -Toward Zero */
  39. };
  40. enum { /* FSW */
  41. Sff = 0x00000040, /* Stack Fault Flag */
  42. Es = 0x00000080, /* Error Summary Status */
  43. C0 = 0x00000100, /* ZF - Condition Code Bits */
  44. C1 = 0x00000200, /* O/U# */
  45. C2 = 0x00000400, /* PF */
  46. C3 = 0x00004000, /* ZF */
  47. B = 0x00008000, /* Busy */
  48. };
  49. enum { /* MXCSR */
  50. Daz = 0x00000040, /* Denormals are Zeros */
  51. Im = 0x00000080, /* I Mask */
  52. Dm = 0x00000100, /* D Mask */
  53. Zm = 0x00000200, /* Z Mask */
  54. Om = 0x00000400, /* O Mask */
  55. Um = 0x00000800, /* U Mask */
  56. Pm = 0x00001000, /* P Mask */
  57. Rn = 0x00000000, /* Round to Nearest */
  58. Rd = 0x00002000, /* Round Down */
  59. Ru = 0x00004000, /* Round Up */
  60. Rz = 0x00006000, /* Round toward Zero */
  61. Fz = 0x00008000, /* Flush to Zero for Um */
  62. };
  63. enum { /* FPU.state */
  64. Init = 0, /* The FPU has not been used */
  65. Busy = 1, /* The FPU is being used */
  66. Idle = 2, /* The FPU has been used */
  67. Hold = 4, /* Handling an FPU note */
  68. };
  69. extern void _clts(void);
  70. extern void _fldcw(uint16_t*);
  71. extern void _fnclex(void);
  72. extern void _fninit(void);
  73. extern void _fxrstor(Fxsave*);
  74. extern void _fxsave(Fxsave*);
  75. extern void _fwait(void);
  76. extern void _ldmxcsr(uint32_t*);
  77. extern void _stts(void);
  78. int
  79. fpudevprocio(Proc* proc, void* a, int32_t n, uintptr_t offset, int write)
  80. {
  81. uint8_t *p;
  82. /*
  83. * Called from procdevtab.read and procdevtab.write
  84. * allow user process access to the FPU registers.
  85. * This is the only FPU routine which is called directly
  86. * from the port code; it would be nice to have dynamic
  87. * creation of entries in the device file trees...
  88. */
  89. if(offset >= sizeof(Fxsave))
  90. return 0;
  91. if((p = proc->FPU.fpusave) == nil)
  92. return 0;
  93. switch(write){
  94. default:
  95. if(offset+n > sizeof(Fxsave))
  96. n = sizeof(Fxsave) - offset;
  97. memmove(p+offset, a, n);
  98. break;
  99. case 0:
  100. if(offset+n > sizeof(Fxsave))
  101. n = sizeof(Fxsave) - offset;
  102. memmove(a, p+offset, n);
  103. break;
  104. }
  105. return n;
  106. }
  107. void
  108. fpunotify(Ureg* u)
  109. {
  110. Proc *up = externup();
  111. /*
  112. * Called when a note is about to be delivered to a
  113. * user process, usually at the end of a system call.
  114. * Note handlers are not allowed to use the FPU so
  115. * the state is marked (after saving if necessary) and
  116. * checked in the Device Not Available handler.
  117. */
  118. if(up->FPU.fpustate == Busy){
  119. _fxsave(up->FPU.fpusave);
  120. _stts();
  121. up->FPU.fpustate = Idle;
  122. }
  123. up->FPU.fpustate |= Hold;
  124. }
  125. void
  126. fpunoted(void)
  127. {
  128. Proc *up = externup();
  129. /*
  130. * Called from sysnoted() via the machine-dependent
  131. * noted() routine.
  132. * Clear the flag set above in fpunotify().
  133. */
  134. up->FPU.fpustate &= ~Hold;
  135. }
  136. void
  137. fpusysrfork(Ureg* u)
  138. {
  139. Proc *up = externup();
  140. /*
  141. * Called early in the non-interruptible path of
  142. * sysrfork() via the machine-dependent syscall() routine.
  143. * Save the state so that it can be easily copied
  144. * to the child process later.
  145. */
  146. if(up->FPU.fpustate != Busy)
  147. return;
  148. _fxsave(up->FPU.fpusave);
  149. _stts();
  150. up->FPU.fpustate = Idle;
  151. }
  152. void
  153. fpusysrforkchild(Proc* child, Proc* parent)
  154. {
  155. Proc *up = externup();
  156. /*
  157. * Called later in sysrfork() via the machine-dependent
  158. * sysrforkchild() routine.
  159. * Copy the parent FPU state to the child.
  160. */
  161. child->FPU.fpustate = parent->FPU.fpustate;
  162. child->FPU.fpusave = (void*)((PTR2UINT(up->FPU.fxsave) + 15) & ~15);
  163. if(child->FPU.fpustate == Init)
  164. return;
  165. memmove(child->FPU.fpusave, parent->FPU.fpusave, sizeof(Fxsave));
  166. }
  167. void
  168. fpuprocsave(Proc* p)
  169. {
  170. /*
  171. * Called from sched() and sleep() via the machine-dependent
  172. * procsave() routine.
  173. * About to go in to the scheduler.
  174. * If the process wasn't using the FPU
  175. * there's nothing to do.
  176. */
  177. if(p->FPU.fpustate != Busy)
  178. return;
  179. /*
  180. * The process is dead so clear and disable the FPU
  181. * and set the state for whoever gets this proc struct
  182. * next.
  183. */
  184. if(p->state == Moribund){
  185. _clts();
  186. _fnclex();
  187. _stts();
  188. p->FPU.fpustate = Init;
  189. return;
  190. }
  191. /*
  192. * Save the FPU state without handling pending
  193. * unmasked exceptions and disable. Postnote() can't
  194. * be called here as sleep() already has up->rlock,
  195. * so the handling of pending exceptions is delayed
  196. * until the process runs again and generates a
  197. * Device Not Available exception fault to activate
  198. * the FPU.
  199. */
  200. _fxsave(p->FPU.fpusave);
  201. _stts();
  202. p->FPU.fpustate = Idle;
  203. }
  204. void
  205. fpuprocrestore(Proc* p)
  206. {
  207. /*
  208. * The process has been rescheduled and is about to run.
  209. * Nothing to do here right now. If the process tries to use
  210. * the FPU again it will cause a Device Not Available
  211. * exception and the state will then be restored.
  212. */
  213. USED(p);
  214. }
  215. void
  216. fpusysprocsetup(Proc* p)
  217. {
  218. /*
  219. * Disable the FPU.
  220. * Called from sysexec() via sysprocsetup() to
  221. * set the FPU for the new process.
  222. */
  223. if(p->FPU.fpustate != Init){
  224. _clts();
  225. _fnclex();
  226. _stts();
  227. p->FPU.fpustate = Init;
  228. }
  229. }
  230. void
  231. acfpusysprocsetup(Proc *p)
  232. {
  233. if(p->FPU.fpustate == Init){
  234. /* The FPU is initialized in the TC but we must initialize
  235. * it in the AC.
  236. */
  237. p->FPU.fpustate = Idle;
  238. fpusysprocsetup(p);
  239. }
  240. }
  241. static char*
  242. fpunote(void)
  243. {
  244. Proc *up = externup();
  245. uint16_t fsw;
  246. Fxsave *fpusave;
  247. char *cm;
  248. /*
  249. * The Sff bit is sticky, meaning it should be explicitly
  250. * cleared or there's no way to tell if the exception was an
  251. * invalid operation or a stack fault.
  252. */
  253. fpusave = up->FPU.fpusave;
  254. fsw = (fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I);
  255. if(fsw & I){
  256. if(fsw & Sff){
  257. if(fsw & C1)
  258. cm = "Stack Overflow";
  259. else
  260. cm = "Stack Underflow";
  261. }
  262. else
  263. cm = "Invalid Operation";
  264. }
  265. else if(fsw & D)
  266. cm = "Denormal Operand";
  267. else if(fsw & Z)
  268. cm = "Divide-By-Zero";
  269. else if(fsw & O)
  270. cm = "Numeric Overflow";
  271. else if(fsw & U)
  272. cm = "Numeric Underflow";
  273. else if(fsw & P)
  274. cm = "Precision";
  275. else
  276. cm = "Unknown";
  277. snprint(up->genbuf, sizeof(up->genbuf),
  278. "sys: fp: %s Exception ipo=%#llux fsw=%#x",
  279. cm, fpusave->rip, fsw);
  280. return up->genbuf;
  281. }
  282. char*
  283. xfpuxf(Ureg* ureg, void* v)
  284. {
  285. Proc *up = externup();
  286. uint32_t mxcsr;
  287. Fxsave *fpusave;
  288. char *cm;
  289. /*
  290. * #XF - SIMD Floating Point Exception (Vector 18).
  291. */
  292. /*
  293. * Save FPU state to check out the error.
  294. */
  295. fpusave = up->FPU.fpusave;
  296. _fxsave(fpusave);
  297. _stts();
  298. up->FPU.fpustate = Idle;
  299. if(ureg->ip & KZERO)
  300. panic("#MF: ip=%#p", ureg->ip);
  301. /*
  302. * Notify the user process.
  303. * The path here is similar to the x87 path described
  304. * in fpupostnote above but without the fpupostnote()
  305. * call.
  306. */
  307. mxcsr = fpusave->mxcsr;
  308. if((mxcsr & (Im|I)) == I)
  309. cm = "Invalid Operation";
  310. else if((mxcsr & (Dm|D)) == D)
  311. cm = "Denormal Operand";
  312. else if((mxcsr & (Zm|Z)) == Z)
  313. cm = "Divide-By-Zero";
  314. else if((mxcsr & (Om|O)) == O)
  315. cm = "Numeric Overflow";
  316. else if((mxcsr & (Um|U)) == U)
  317. cm = "Numeric Underflow";
  318. else if((mxcsr & (Pm|P)) == P)
  319. cm = "Precision";
  320. else
  321. cm = "Unknown";
  322. snprint(up->genbuf, sizeof(up->genbuf),
  323. "sys: fp: %s Exception mxcsr=%#x", cm, mxcsr);
  324. return up->genbuf;
  325. }
  326. void
  327. fpuxf(Ureg *ureg, void *p)
  328. {
  329. Proc *up = externup();
  330. char *n;
  331. n = xfpuxf(ureg, p);
  332. if(n != nil)
  333. postnote(up, 1, n, NDebug);
  334. }
  335. char*
  336. acfpuxf(Ureg *ureg, void *p)
  337. {
  338. return xfpuxf(ureg, p);
  339. }
  340. static char*
  341. xfpumf(Ureg* ureg, void* v)
  342. {
  343. Proc *up = externup();
  344. Fxsave *fpusave;
  345. /*
  346. * #MF - x87 Floating Point Exception Pending (Vector 16).
  347. */
  348. /*
  349. * Save FPU state to check out the error.
  350. */
  351. fpusave = up->FPU.fpusave;
  352. _fxsave(fpusave);
  353. _stts();
  354. up->FPU.fpustate = Idle;
  355. if(ureg->ip & KZERO)
  356. panic("#MF: ip=%#p rip=%#p", ureg->ip, fpusave->rip);
  357. /*
  358. * Notify the user process.
  359. * The path here is
  360. * call trap->fpumf->fpupostnote->postnote
  361. * return ->fpupostnote->fpumf->trap
  362. * call notify->fpunotify
  363. * return ->notify
  364. * then either
  365. * call pexit
  366. * or
  367. * return ->trap
  368. * return ->user note handler
  369. */
  370. return fpunote();
  371. }
  372. void
  373. fpumf(Ureg *ureg, void *p)
  374. {
  375. Proc *up = externup();
  376. char *n;
  377. n = xfpumf(ureg, p);
  378. if(n != nil)
  379. postnote(up, 1, n, NDebug);
  380. }
  381. char*
  382. acfpumf(Ureg *ureg, void *p)
  383. {
  384. return xfpumf(ureg, p);
  385. }
  386. static char*
  387. xfpunm(Ureg* ureg, void* v)
  388. {
  389. Proc *up = externup();
  390. Fxsave *fpusave;
  391. /*
  392. * #NM - Device Not Available (Vector 7).
  393. */
  394. if(up == nil)
  395. panic("#NM: fpu in kernel: ip %#p\n", ureg->ip);
  396. /*
  397. * Someone tried to use the FPU in a note handler.
  398. * That's a no-no.
  399. */
  400. if(up->FPU.fpustate & Hold)
  401. return "sys: floating point in note handler";
  402. if(ureg->ip & KZERO)
  403. panic("#NM: proc %d %s state %d ip %#p\n",
  404. up->pid, up->text, up->FPU.fpustate, ureg->ip);
  405. switch(up->FPU.fpustate){
  406. case Busy:
  407. default:
  408. panic("#NM: state %d ip %#p\n", up->FPU.fpustate, ureg->ip);
  409. break;
  410. case Init:
  411. /*
  412. * A process tries to use the FPU for the
  413. * first time and generates a 'device not available'
  414. * exception.
  415. * Turn the FPU on and initialise it for use.
  416. * Set the precision and mask the exceptions
  417. * we don't care about from the generic Mach value.
  418. */
  419. _clts();
  420. _fninit();
  421. _fwait();
  422. _fldcw(&machp()->FPU.fcw);
  423. _ldmxcsr(&machp()->FPU.mxcsr);
  424. up->FPU.fpusave = (void*)((PTR2UINT(up->FPU.fxsave) + 15) & ~15);
  425. up->FPU.fpustate = Busy;
  426. break;
  427. case Idle:
  428. /*
  429. * Before restoring the state, check for any pending
  430. * exceptions, there's no way to restore the state without
  431. * generating an unmasked exception.
  432. */
  433. fpusave = up->FPU.fpusave;
  434. if((fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I))
  435. return fpunote();
  436. /*
  437. * Sff is sticky.
  438. */
  439. fpusave->fcw &= ~Sff;
  440. _clts();
  441. _fxrstor(fpusave);
  442. up->FPU.fpustate = Busy;
  443. break;
  444. }
  445. return nil;
  446. }
  447. void
  448. fpunm(Ureg *ureg, void *p)
  449. {
  450. Proc *up = externup();
  451. char *n;
  452. n = xfpunm(ureg, p);
  453. if(n != nil)
  454. postnote(up, 1, n, NDebug);
  455. }
  456. char*
  457. acfpunm(Ureg *ureg, void *p)
  458. {
  459. return xfpunm(ureg, p);
  460. }
  461. void
  462. fpuinit(void)
  463. {
  464. uint64_t r;
  465. Fxsave *fxsave;
  466. uint8_t buf[sizeof(Fxsave)+15];
  467. /*
  468. * It's assumed there is an integrated FPU, so Em is cleared;
  469. */
  470. r = cr0get();
  471. r &= ~(Ts|Em);
  472. r |= Ne|Mp;
  473. cr0put(r);
  474. r = cr4get();
  475. r |= Osxmmexcpt|Osfxsr;
  476. cr4put(r);
  477. _fninit();
  478. fxsave = (Fxsave*)((PTR2UINT(buf) + 15) & ~15);
  479. memset(fxsave, 0, sizeof(Fxsave));
  480. _fxsave(fxsave);
  481. machp()->FPU.fcw = RCn|PCd|P|U|D;
  482. if(fxsave->mxcsrmask == 0)
  483. machp()->FPU.mxcsrmask = 0x0000FFBF;
  484. else
  485. machp()->FPU.mxcsrmask = fxsave->mxcsrmask;
  486. machp()->FPU.mxcsr = (Rn|Pm|Um|Dm) & machp()->FPU.mxcsrmask;
  487. _stts();
  488. if(machp()->machno != 0)
  489. return;
  490. /*
  491. * Set up the exception handlers.
  492. */
  493. trapenable(IdtNM, fpunm, 0, "#NM");
  494. trapenable(IdtMF, fpumf, 0, "#MF");
  495. trapenable(IdtXF, fpuxf, 0, "#XF");
  496. /* Same thing, for the AC */
  497. actrapenable(IdtNM, acfpunm, 0, "#NM");
  498. actrapenable(IdtMF, acfpumf, 0, "#MF");
  499. actrapenable(IdtXF, acfpuxf, 0, "#XF");
  500. }