123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560 |
- /*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
- /*
- * SIMD Floating Point.
- * Assembler support to get at the individual instructions
- * is in l64fpu.s.
- * There are opportunities to be lazier about saving and
- * restoring the state and allocating the storage needed.
- */
- #include "u.h"
- #include "../port/lib.h"
- #include "mem.h"
- #include "dat.h"
- #include "fns.h"
- #include "amd64.h"
- #include "ureg.h"
- enum { /* FCW, FSW and MXCSR */
- I = 0x00000001, /* Invalid-Operation */
- D = 0x00000002, /* Denormalized-Operand */
- Z = 0x00000004, /* Zero-Divide */
- O = 0x00000008, /* Overflow */
- U = 0x00000010, /* Underflow */
- P = 0x00000020, /* Precision */
- };
- enum { /* FCW */
- PCs = 0x00000000, /* Precision Control -Single */
- PCd = 0x00000200, /* -Double */
- PCde = 0x00000300, /* -Double Extended */
- RCn = 0x00000000, /* Rounding Control -Nearest */
- RCd = 0x00000400, /* -Down */
- RCu = 0x00000800, /* -Up */
- RCz = 0x00000C00, /* -Toward Zero */
- };
- enum { /* FSW */
- Sff = 0x00000040, /* Stack Fault Flag */
- Es = 0x00000080, /* Error Summary Status */
- C0 = 0x00000100, /* ZF - Condition Code Bits */
- C1 = 0x00000200, /* O/U# */
- C2 = 0x00000400, /* PF */
- C3 = 0x00004000, /* ZF */
- B = 0x00008000, /* Busy */
- };
- enum { /* MXCSR */
- Daz = 0x00000040, /* Denormals are Zeros */
- Im = 0x00000080, /* I Mask */
- Dm = 0x00000100, /* D Mask */
- Zm = 0x00000200, /* Z Mask */
- Om = 0x00000400, /* O Mask */
- Um = 0x00000800, /* U Mask */
- Pm = 0x00001000, /* P Mask */
- Rn = 0x00000000, /* Round to Nearest */
- Rd = 0x00002000, /* Round Down */
- Ru = 0x00004000, /* Round Up */
- Rz = 0x00006000, /* Round toward Zero */
- Fz = 0x00008000, /* Flush to Zero for Um */
- };
- enum { /* FPU.state */
- Init = 0, /* The FPU has not been used */
- Busy = 1, /* The FPU is being used */
- Idle = 2, /* The FPU has been used */
- Hold = 4, /* Handling an FPU note */
- };
- extern void _clts(void);
- extern void _fldcw(uint16_t*);
- extern void _fnclex(void);
- extern void _fninit(void);
- extern void _fxrstor(Fxsave*);
- extern void _fxsave(Fxsave*);
- extern void _fwait(void);
- extern void _ldmxcsr(uint32_t*);
- extern void _stts(void);
- int
- fpudevprocio(Proc* proc, void* a, int32_t n, uintptr_t offset, int write)
- {
- uint8_t *p;
- /*
- * Called from procdevtab.read and procdevtab.write
- * allow user process access to the FPU registers.
- * This is the only FPU routine which is called directly
- * from the port code; it would be nice to have dynamic
- * creation of entries in the device file trees...
- */
- if(offset >= sizeof(Fxsave))
- return 0;
- if((p = proc->FPU.fpusave) == nil)
- return 0;
- switch(write){
- default:
- if(offset+n > sizeof(Fxsave))
- n = sizeof(Fxsave) - offset;
- memmove(p+offset, a, n);
- break;
- case 0:
- if(offset+n > sizeof(Fxsave))
- n = sizeof(Fxsave) - offset;
- memmove(a, p+offset, n);
- break;
- }
- return n;
- }
- void
- fpunotify(Ureg* u)
- {
- Proc *up = externup();
- /*
- * Called when a note is about to be delivered to a
- * user process, usually at the end of a system call.
- * Note handlers are not allowed to use the FPU so
- * the state is marked (after saving if necessary) and
- * checked in the Device Not Available handler.
- */
- if(up->FPU.fpustate == Busy){
- _fxsave(up->FPU.fpusave);
- _stts();
- up->FPU.fpustate = Idle;
- }
- up->FPU.fpustate |= Hold;
- }
- void
- fpunoted(void)
- {
- Proc *up = externup();
- /*
- * Called from sysnoted() via the machine-dependent
- * noted() routine.
- * Clear the flag set above in fpunotify().
- */
- up->FPU.fpustate &= ~Hold;
- }
- void
- fpusysrfork(Ureg* u)
- {
- Proc *up = externup();
- /*
- * Called early in the non-interruptible path of
- * sysrfork() via the machine-dependent syscall() routine.
- * Save the state so that it can be easily copied
- * to the child process later.
- */
- if(up->FPU.fpustate != Busy)
- return;
- _fxsave(up->FPU.fpusave);
- _stts();
- up->FPU.fpustate = Idle;
- }
- void
- fpusysrforkchild(Proc* child, Proc* parent)
- {
- Proc *up = externup();
- /*
- * Called later in sysrfork() via the machine-dependent
- * sysrforkchild() routine.
- * Copy the parent FPU state to the child.
- */
- child->FPU.fpustate = parent->FPU.fpustate;
- child->FPU.fpusave = (void*)((PTR2UINT(up->FPU.fxsave) + 15) & ~15);
- if(child->FPU.fpustate == Init)
- return;
- memmove(child->FPU.fpusave, parent->FPU.fpusave, sizeof(Fxsave));
- }
- void
- fpuprocsave(Proc* p)
- {
- /*
- * Called from sched() and sleep() via the machine-dependent
- * procsave() routine.
- * About to go in to the scheduler.
- * If the process wasn't using the FPU
- * there's nothing to do.
- */
- if(p->FPU.fpustate != Busy)
- return;
- /*
- * The process is dead so clear and disable the FPU
- * and set the state for whoever gets this proc struct
- * next.
- */
- if(p->state == Moribund){
- _clts();
- _fnclex();
- _stts();
- p->FPU.fpustate = Init;
- return;
- }
- /*
- * Save the FPU state without handling pending
- * unmasked exceptions and disable. Postnote() can't
- * be called here as sleep() already has up->rlock,
- * so the handling of pending exceptions is delayed
- * until the process runs again and generates a
- * Device Not Available exception fault to activate
- * the FPU.
- */
- _fxsave(p->FPU.fpusave);
- _stts();
- p->FPU.fpustate = Idle;
- }
- void
- fpuprocrestore(Proc* p)
- {
- /*
- * The process has been rescheduled and is about to run.
- * Nothing to do here right now. If the process tries to use
- * the FPU again it will cause a Device Not Available
- * exception and the state will then be restored.
- */
- USED(p);
- }
- void
- fpusysprocsetup(Proc* p)
- {
- /*
- * Disable the FPU.
- * Called from sysexec() via sysprocsetup() to
- * set the FPU for the new process.
- */
- if(p->FPU.fpustate != Init){
- _clts();
- _fnclex();
- _stts();
- p->FPU.fpustate = Init;
- }
- }
- void
- acfpusysprocsetup(Proc *p)
- {
- if(p->FPU.fpustate == Init){
- /* The FPU is initialized in the TC but we must initialize
- * it in the AC.
- */
- p->FPU.fpustate = Idle;
- fpusysprocsetup(p);
- }
- }
- static char*
- fpunote(void)
- {
- Proc *up = externup();
- uint16_t fsw;
- Fxsave *fpusave;
- char *cm;
- /*
- * The Sff bit is sticky, meaning it should be explicitly
- * cleared or there's no way to tell if the exception was an
- * invalid operation or a stack fault.
- */
- fpusave = up->FPU.fpusave;
- fsw = (fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I);
- if(fsw & I){
- if(fsw & Sff){
- if(fsw & C1)
- cm = "Stack Overflow";
- else
- cm = "Stack Underflow";
- }
- else
- cm = "Invalid Operation";
- }
- else if(fsw & D)
- cm = "Denormal Operand";
- else if(fsw & Z)
- cm = "Divide-By-Zero";
- else if(fsw & O)
- cm = "Numeric Overflow";
- else if(fsw & U)
- cm = "Numeric Underflow";
- else if(fsw & P)
- cm = "Precision";
- else
- cm = "Unknown";
- snprint(up->genbuf, sizeof(up->genbuf),
- "sys: fp: %s Exception ipo=%#llux fsw=%#x",
- cm, fpusave->rip, fsw);
- return up->genbuf;
- }
- char*
- xfpuxf(Ureg* ureg, void* v)
- {
- Proc *up = externup();
- uint32_t mxcsr;
- Fxsave *fpusave;
- char *cm;
- /*
- * #XF - SIMD Floating Point Exception (Vector 18).
- */
- /*
- * Save FPU state to check out the error.
- */
- fpusave = up->FPU.fpusave;
- _fxsave(fpusave);
- _stts();
- up->FPU.fpustate = Idle;
- if(ureg->ip & KZERO)
- panic("#MF: ip=%#p", ureg->ip);
- /*
- * Notify the user process.
- * The path here is similar to the x87 path described
- * in fpupostnote above but without the fpupostnote()
- * call.
- */
- mxcsr = fpusave->mxcsr;
- if((mxcsr & (Im|I)) == I)
- cm = "Invalid Operation";
- else if((mxcsr & (Dm|D)) == D)
- cm = "Denormal Operand";
- else if((mxcsr & (Zm|Z)) == Z)
- cm = "Divide-By-Zero";
- else if((mxcsr & (Om|O)) == O)
- cm = "Numeric Overflow";
- else if((mxcsr & (Um|U)) == U)
- cm = "Numeric Underflow";
- else if((mxcsr & (Pm|P)) == P)
- cm = "Precision";
- else
- cm = "Unknown";
- snprint(up->genbuf, sizeof(up->genbuf),
- "sys: fp: %s Exception mxcsr=%#x", cm, mxcsr);
- return up->genbuf;
- }
- void
- fpuxf(Ureg *ureg, void *p)
- {
- Proc *up = externup();
- char *n;
- n = xfpuxf(ureg, p);
- if(n != nil)
- postnote(up, 1, n, NDebug);
- }
- char*
- acfpuxf(Ureg *ureg, void *p)
- {
- return xfpuxf(ureg, p);
- }
- static char*
- xfpumf(Ureg* ureg, void* v)
- {
- Proc *up = externup();
- Fxsave *fpusave;
- /*
- * #MF - x87 Floating Point Exception Pending (Vector 16).
- */
- /*
- * Save FPU state to check out the error.
- */
- fpusave = up->FPU.fpusave;
- _fxsave(fpusave);
- _stts();
- up->FPU.fpustate = Idle;
- if(ureg->ip & KZERO)
- panic("#MF: ip=%#p rip=%#p", ureg->ip, fpusave->rip);
- /*
- * Notify the user process.
- * The path here is
- * call trap->fpumf->fpupostnote->postnote
- * return ->fpupostnote->fpumf->trap
- * call notify->fpunotify
- * return ->notify
- * then either
- * call pexit
- * or
- * return ->trap
- * return ->user note handler
- */
- return fpunote();
- }
- void
- fpumf(Ureg *ureg, void *p)
- {
- Proc *up = externup();
- char *n;
- n = xfpumf(ureg, p);
- if(n != nil)
- postnote(up, 1, n, NDebug);
- }
- char*
- acfpumf(Ureg *ureg, void *p)
- {
- return xfpumf(ureg, p);
- }
- static char*
- xfpunm(Ureg* ureg, void* v)
- {
- Proc *up = externup();
- Fxsave *fpusave;
- /*
- * #NM - Device Not Available (Vector 7).
- */
- if(up == nil)
- panic("#NM: fpu in kernel: ip %#p\n", ureg->ip);
- /*
- * Someone tried to use the FPU in a note handler.
- * That's a no-no.
- */
- if(up->FPU.fpustate & Hold)
- return "sys: floating point in note handler";
- if(ureg->ip & KZERO)
- panic("#NM: proc %d %s state %d ip %#p\n",
- up->pid, up->text, up->FPU.fpustate, ureg->ip);
- switch(up->FPU.fpustate){
- case Busy:
- default:
- panic("#NM: state %d ip %#p\n", up->FPU.fpustate, ureg->ip);
- break;
- case Init:
- /*
- * A process tries to use the FPU for the
- * first time and generates a 'device not available'
- * exception.
- * Turn the FPU on and initialise it for use.
- * Set the precision and mask the exceptions
- * we don't care about from the generic Mach value.
- */
- _clts();
- _fninit();
- _fwait();
- _fldcw(&machp()->FPU.fcw);
- _ldmxcsr(&machp()->FPU.mxcsr);
- up->FPU.fpusave = (void*)((PTR2UINT(up->FPU.fxsave) + 15) & ~15);
- up->FPU.fpustate = Busy;
- break;
- case Idle:
- /*
- * Before restoring the state, check for any pending
- * exceptions, there's no way to restore the state without
- * generating an unmasked exception.
- */
- fpusave = up->FPU.fpusave;
- if((fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I))
- return fpunote();
- /*
- * Sff is sticky.
- */
- fpusave->fcw &= ~Sff;
- _clts();
- _fxrstor(fpusave);
- up->FPU.fpustate = Busy;
- break;
- }
- return nil;
- }
- void
- fpunm(Ureg *ureg, void *p)
- {
- Proc *up = externup();
- char *n;
- n = xfpunm(ureg, p);
- if(n != nil)
- postnote(up, 1, n, NDebug);
- }
- char*
- acfpunm(Ureg *ureg, void *p)
- {
- return xfpunm(ureg, p);
- }
- void
- fpuinit(void)
- {
- uint64_t r;
- Fxsave *fxsave;
- uint8_t buf[sizeof(Fxsave)+15];
- /*
- * It's assumed there is an integrated FPU, so Em is cleared;
- */
- r = cr0get();
- r &= ~(Ts|Em);
- r |= Ne|Mp;
- cr0put(r);
- r = cr4get();
- r |= Osxmmexcpt|Osfxsr;
- cr4put(r);
- _fninit();
- fxsave = (Fxsave*)((PTR2UINT(buf) + 15) & ~15);
- memset(fxsave, 0, sizeof(Fxsave));
- _fxsave(fxsave);
- machp()->FPU.fcw = RCn|PCd|P|U|D;
- if(fxsave->mxcsrmask == 0)
- machp()->FPU.mxcsrmask = 0x0000FFBF;
- else
- machp()->FPU.mxcsrmask = fxsave->mxcsrmask;
- machp()->FPU.mxcsr = (Rn|Pm|Um|Dm) & machp()->FPU.mxcsrmask;
- _stts();
- if(machp()->machno != 0)
- return;
- /*
- * Set up the exception handlers.
- */
- trapenable(IdtNM, fpunm, 0, "#NM");
- trapenable(IdtMF, fpumf, 0, "#MF");
- trapenable(IdtXF, fpuxf, 0, "#XF");
- /* Same thing, for the AC */
- actrapenable(IdtNM, acfpunm, 0, "#NM");
- actrapenable(IdtMF, acfpumf, 0, "#MF");
- actrapenable(IdtXF, acfpuxf, 0, "#XF");
- }
|