fpu.c.old 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. /*
  2. * SIMD Floating Point.
  3. * Assembler support to get at the individual instructions
  4. * is in l64fpu.s.
  5. * There are opportunities to be lazier about saving and
  6. * restoring the state and allocating the storage needed.
  7. */
  8. #include "u.h"
  9. #include "../port/lib.h"
  10. #include "mem.h"
  11. #include "dat.h"
  12. #include "fns.h"
  13. #include "amd64.h"
  14. #include "ureg.h"
  15. enum { /* FCW, FSW and MXCSR */
  16. I = 0x00000001, /* Invalid-Operation */
  17. D = 0x00000002, /* Denormalized-Operand */
  18. Z = 0x00000004, /* Zero-Divide */
  19. O = 0x00000008, /* Overflow */
  20. U = 0x00000010, /* Underflow */
  21. P = 0x00000020, /* Precision */
  22. };
  23. enum { /* FCW */
  24. PCs = 0x00000000, /* Precision Control -Single */
  25. PCd = 0x00000200, /* -Double */
  26. PCde = 0x00000300, /* -Double Extended */
  27. RCn = 0x00000000, /* Rounding Control -Nearest */
  28. RCd = 0x00000400, /* -Down */
  29. RCu = 0x00000800, /* -Up */
  30. RCz = 0x00000C00, /* -Toward Zero */
  31. };
  32. enum { /* FSW */
  33. Sff = 0x00000040, /* Stack Fault Flag */
  34. Es = 0x00000080, /* Error Summary Status */
  35. C0 = 0x00000100, /* ZF - Condition Code Bits */
  36. C1 = 0x00000200, /* O/U# */
  37. C2 = 0x00000400, /* PF */
  38. C3 = 0x00004000, /* ZF */
  39. B = 0x00008000, /* Busy */
  40. };
  41. enum { /* MXCSR */
  42. Daz = 0x00000040, /* Denormals are Zeros */
  43. Im = 0x00000080, /* I Mask */
  44. Dm = 0x00000100, /* D Mask */
  45. Zm = 0x00000200, /* Z Mask */
  46. Om = 0x00000400, /* O Mask */
  47. Um = 0x00000800, /* U Mask */
  48. Pm = 0x00001000, /* P Mask */
  49. Rn = 0x00000000, /* Round to Nearest */
  50. Rd = 0x00002000, /* Round Down */
  51. Ru = 0x00004000, /* Round Up */
  52. Rz = 0x00006000, /* Round toward Zero */
  53. Fz = 0x00008000, /* Flush to Zero for Um */
  54. };
  55. enum { /* PFPU.state */
  56. Init = 0, /* The FPU has not been used */
  57. Busy = 1, /* The FPU is being used */
  58. Idle = 2, /* The FPU has been used */
  59. Hold = 4, /* Handling an FPU note */
  60. };
  61. extern void _clts(void);
  62. extern void _fldcw(u16int);
  63. extern void _fnclex(void);
  64. extern void _fninit(void);
  65. extern void _fxrstor(Fxsave*);
  66. extern void _fxsave(Fxsave*);
  67. extern void _fwait(void);
  68. extern void _ldmxcsr(u32int);
  69. extern void _stts(void);
  70. int
  71. fpudevprocio(Proc* proc, void* a, long n, uintptr offset, int write)
  72. {
  73. uchar *p;
  74. /*
  75. * Called from procdevtab.read and procdevtab.write
  76. * allow user process access to the FPU registers.
  77. * This is the only FPU routine which is called directly
  78. * from the port code; it would be nice to have dynamic
  79. * creation of entries in the device file trees...
  80. */
  81. if(offset >= sizeof(Fxsave))
  82. return 0;
  83. if((p = proc->fpusave) == nil)
  84. return 0;
  85. switch(write){
  86. default:
  87. if(offset+n > sizeof(Fxsave))
  88. n = sizeof(Fxsave) - offset;
  89. memmove(p+offset, a, n);
  90. break;
  91. case 0:
  92. if(offset+n > sizeof(Fxsave))
  93. n = sizeof(Fxsave) - offset;
  94. memmove(a, p+offset, n);
  95. break;
  96. }
  97. return n;
  98. }
  99. void
  100. fpunotify(Ureg*)
  101. {
  102. /*
  103. * Called when a note is about to be delivered to a
  104. * user process, usually at the end of a system call.
  105. * Note handlers are not allowed to use the FPU so
  106. * the state is marked (after saving if necessary) and
  107. * checked in the Device Not Available handler.
  108. */
  109. if(up->fpustate == Busy){
  110. _fxsave(up->fpusave);
  111. _stts();
  112. up->fpustate = Idle;
  113. }
  114. up->fpustate |= Hold;
  115. }
  116. void
  117. fpunoted(void)
  118. {
  119. /*
  120. * Called from sysnoted() via the machine-dependent
  121. * noted() routine.
  122. * Clear the flag set above in fpunotify().
  123. */
  124. up->fpustate &= ~Hold;
  125. }
  126. void
  127. fpusysrfork(Ureg*)
  128. {
  129. /*
  130. * Called early in the non-interruptible path of
  131. * sysrfork() via the machine-dependent syscall() routine.
  132. * Save the state so that it can be easily copied
  133. * to the child process later.
  134. */
  135. if(up->fpustate != Busy)
  136. return;
  137. _fxsave(up->fpusave);
  138. _stts();
  139. up->fpustate = Idle;
  140. }
  141. void
  142. fpusysrforkchild(Proc* child, Proc* parent)
  143. {
  144. /*
  145. * Called later in sysrfork() via the machine-dependent
  146. * sysrforkchild() routine.
  147. * Copy the parent FPU state to the child.
  148. */
  149. child->fpustate = parent->fpustate;
  150. child->fpusave = (void*)((PTR2UINT(up->fxsave) + 15) & ~15);
  151. if(child->fpustate == Init)
  152. return;
  153. memmove(child->fpusave, parent->fpusave, sizeof(Fxsave));
  154. }
  155. void
  156. fpuprocsave(Proc* p)
  157. {
  158. /*
  159. * Called from sched() and sleep() via the machine-dependent
  160. * procsave() routine.
  161. * About to go in to the scheduler.
  162. * If the process wasn't using the FPU
  163. * there's nothing to do.
  164. */
  165. if(p->fpustate != Busy)
  166. return;
  167. /*
  168. * The process is dead so clear and disable the FPU
  169. * and set the state for whoever gets this proc struct
  170. * next.
  171. */
  172. if(p->state == Moribund){
  173. _clts();
  174. _fnclex();
  175. _stts();
  176. p->fpustate = Init;
  177. return;
  178. }
  179. /*
  180. * Save the FPU state without handling pending
  181. * unmasked exceptions and disable. Postnote() can't
  182. * be called here as sleep() already has up->rlock,
  183. * so the handling of pending exceptions is delayed
  184. * until the process runs again and generates a
  185. * Device Not Available exception fault to activate
  186. * the FPU.
  187. */
  188. _fxsave(p->fpusave);
  189. _stts();
  190. p->fpustate = Idle;
  191. }
  192. void
  193. fpuprocrestore(Proc* p)
  194. {
  195. /*
  196. * The process has been rescheduled and is about to run.
  197. * Nothing to do here right now. If the process tries to use
  198. * the FPU again it will cause a Device Not Available
  199. * exception and the state will then be restored.
  200. */
  201. USED(p);
  202. }
  203. void
  204. fpusysprocsetup(Proc* p)
  205. {
  206. /*
  207. * Disable the FPU.
  208. * Called from sysexec() via sysprocsetup() to
  209. * set the FPU for the new process.
  210. */
  211. if(p->fpustate != Init){
  212. _clts();
  213. _fnclex();
  214. _stts();
  215. p->fpustate = Init;
  216. }
  217. }
  218. static void
  219. fpupostnote(void)
  220. {
  221. ushort fsw;
  222. Fxsave *fpusave;
  223. char *m, n[ERRMAX];
  224. /*
  225. * The Sff bit is sticky, meaning it should be explicitly
  226. * cleared or there's no way to tell if the exception was an
  227. * invalid operation or a stack fault.
  228. */
  229. fpusave = up->fpusave;
  230. fsw = (fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I);
  231. if(fsw & I){
  232. if(fsw & Sff){
  233. if(fsw & C1)
  234. m = "Stack Overflow";
  235. else
  236. m = "Stack Underflow";
  237. }
  238. else
  239. m = "Invalid Operation";
  240. }
  241. else if(fsw & D)
  242. m = "Denormal Operand";
  243. else if(fsw & Z)
  244. m = "Divide-By-Zero";
  245. else if(fsw & O)
  246. m = "Numeric Overflow";
  247. else if(fsw & U)
  248. m = "Numeric Underflow";
  249. else if(fsw & P)
  250. m = "Precision";
  251. else
  252. m = "Unknown";
  253. snprint(n, sizeof(n), "sys: fp: %s Exception ipo=%#llux fsw=%#ux",
  254. m, fpusave->rip, fsw);
  255. postnote(up, 1, n, NDebug);
  256. }
  257. void
  258. fpuxf(Ureg* ureg, void*)
  259. {
  260. u32int mxcsr;
  261. Fxsave *fpusave;
  262. char *m, n[ERRMAX];
  263. /*
  264. * #XF - SIMD Floating Point Exception (Vector 18).
  265. */
  266. /*
  267. * Save FPU state to check out the error.
  268. */
  269. fpusave = up->fpusave;
  270. _fxsave(fpusave);
  271. _stts();
  272. up->fpustate = Idle;
  273. if(ureg->ip & KZERO)
  274. panic("#MF: ip=%#p", ureg->ip);
  275. /*
  276. * Notify the user process.
  277. * The path here is similar to the x87 path described
  278. * in fpupostnote above but without the fpupostnote()
  279. * call.
  280. */
  281. mxcsr = fpusave->mxcsr;
  282. if((mxcsr & (Im|I)) == I)
  283. m = "Invalid Operation";
  284. else if((mxcsr & (Dm|D)) == D)
  285. m = "Denormal Operand";
  286. else if((mxcsr & (Zm|Z)) == Z)
  287. m = "Divide-By-Zero";
  288. else if((mxcsr & (Om|O)) == O)
  289. m = "Numeric Overflow";
  290. else if((mxcsr & (Um|U)) == U)
  291. m = "Numeric Underflow";
  292. else if((mxcsr & (Pm|P)) == P)
  293. m = "Precision";
  294. else
  295. m = "Unknown";
  296. snprint(n, sizeof(n), "sys: fp: %s Exception mxcsr=%#ux", m, mxcsr);
  297. postnote(up, 1, n, NDebug);
  298. }
  299. void
  300. fpumf(Ureg* ureg, void*)
  301. {
  302. Fxsave *fpusave;
  303. /*
  304. * #MF - x87 Floating Point Exception Pending (Vector 16).
  305. */
  306. /*
  307. * Save FPU state to check out the error.
  308. */
  309. fpusave = up->fpusave;
  310. _fxsave(fpusave);
  311. _stts();
  312. up->fpustate = Idle;
  313. if(ureg->ip & KZERO)
  314. panic("#MF: ip=%#p rip=%#p", ureg->ip, fpusave->rip);
  315. /*
  316. * Notify the user process.
  317. * The path here is
  318. * call trap->fpumf->fpupostnote->postnote
  319. * return ->fpupostnote->fpumf->trap
  320. * call notify->fpunotify
  321. * return ->notify
  322. * then either
  323. * call pexit
  324. * or
  325. * return ->trap
  326. * return ->user note handler
  327. */
  328. fpupostnote();
  329. }
  330. void
  331. fpunm(Ureg* ureg, void*)
  332. {
  333. Fxsave *fpusave;
  334. /*
  335. * #NM - Device Not Available (Vector 7).
  336. */
  337. if(up == nil)
  338. panic("#NM: fpu in kernel: ip %#p\n", ureg->ip);
  339. /*
  340. * Someone tried to use the FPU in a note handler.
  341. * That's a no-no.
  342. */
  343. if(up->fpustate & Hold){
  344. postnote(up, 1, "sys: floating point in note handler", NDebug);
  345. return;
  346. }
  347. if(ureg->ip & KZERO)
  348. panic("#NM: proc %d %s state %d ip %#p\n",
  349. up->pid, up->text, up->fpustate, ureg->ip);
  350. switch(up->fpustate){
  351. case Busy:
  352. default:
  353. panic("#NM: state %d ip %#p\n", up->fpustate, ureg->ip);
  354. break;
  355. case Init:
  356. /*
  357. * A process tries to use the FPU for the
  358. * first time and generates a 'device not available'
  359. * exception.
  360. * Turn the FPU on and initialise it for use.
  361. * Set the precision and mask the exceptions
  362. * we don't care about from the generic Mach value.
  363. */
  364. _clts();
  365. _fninit();
  366. _fwait();
  367. _fldcw(m->fcw);
  368. _ldmxcsr(m->mxcsr);
  369. up->fpusave = (void*)((PTR2UINT(up->fxsave) + 15) & ~15);
  370. up->fpustate = Busy;
  371. break;
  372. case Idle:
  373. /*
  374. * Before restoring the state, check for any pending
  375. * exceptions, there's no way to restore the state without
  376. * generating an unmasked exception.
  377. */
  378. fpusave = up->fpusave;
  379. if((fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I)){
  380. fpupostnote();
  381. break;
  382. }
  383. /*
  384. * Sff is sticky.
  385. */
  386. fpusave->fcw &= ~Sff;
  387. _clts();
  388. _fxrstor(fpusave);
  389. up->fpustate = Busy;
  390. break;
  391. }
  392. }
  393. void
  394. fpuinit(void)
  395. {
  396. u64int r;
  397. Fxsave *fxsave;
  398. uchar buf[sizeof(Fxsave)+15];
  399. /*
  400. * It's assumed there is an integrated FPU, so Em is cleared;
  401. */
  402. r = cr0get();
  403. r &= ~(Ts|Em);
  404. r |= Ne|Mp;
  405. cr0put(r);
  406. r = cr4get();
  407. r |= Osxmmexcpt|Osfxsr;
  408. cr4put(r);
  409. _fninit();
  410. fxsave = (Fxsave*)((PTR2UINT(buf) + 15) & ~15);
  411. memset(fxsave, 0, sizeof(Fxsave));
  412. _fxsave(fxsave);
  413. m->fcw = RCn|PCd|P|U|D;
  414. if(fxsave->mxcsrmask == 0)
  415. m->mxcsrmask = 0x0000FFBF;
  416. else
  417. m->mxcsrmask = fxsave->mxcsrmask;
  418. m->mxcsr = (Rn|Pm|Um|Dm) & m->mxcsrmask;
  419. _stts();
  420. if(machp()->machno != 0)
  421. return;
  422. /*
  423. * Set up the exception handlers.
  424. */
  425. trapenable(IdtNM, fpunm, 0, "#NM");
  426. trapenable(IdtMF, fpumf, 0, "#MF");
  427. trapenable(IdtXF, fpuxf, 0, "#XF");
  428. }