|
@@ -12,7 +12,6 @@
|
|
|
#include <thread.h>
|
|
|
#include "threadimpl.h"
|
|
|
|
|
|
-/* first argument goes in a register; simplest just to ignore it */
|
|
|
static void
|
|
|
launcheramd64(void (*f)(void *arg), void *arg)
|
|
|
{
|
|
@@ -23,14 +22,15 @@ launcheramd64(void (*f)(void *arg), void *arg)
|
|
|
void
|
|
|
_threadinitstack(Thread *t, void (*f)(void*), void *arg)
|
|
|
{
|
|
|
- uint64_t *tos;
|
|
|
+ // Ensure tos is aligned to 16 bytes
|
|
|
+ uint64_t *tos = (uint64_t*)&t->stk[t->stksize&~0x0F];
|
|
|
|
|
|
- tos = (uint64_t*)&t->stk[t->stksize&~7];
|
|
|
- *--tos = (uint64_t)arg;
|
|
|
- *--tos = (uint64_t)f;
|
|
|
t->sched[JMPBUFPC] = (uint64_t)launcheramd64+JMPBUFDPC;
|
|
|
- //t->sched[JMPBUFSP] = (uint64_t)tos - 2*8; /* old PC and new PC */
|
|
|
- t->sched[JMPBUFSP] = (uint64_t)tos - 2*8; /* old PC and new PC */
|
|
|
- t->sched[JMPBUFARG1] = (uint64_t)f; /* old PC and new PC */
|
|
|
- t->sched[JMPBUFARG2] = (uint64_t)arg; /* old PC and new PC */
|
|
|
+ // Ensure we'll still be aligned to 16 bytes after return address is
|
|
|
+ // pushed onto the stack. Important because otherwise we'll get a GP
|
|
|
+ // exception if we try to use some SIMD instructions with a stack that's
|
|
|
+ // not aligned to 16 bytes.
|
|
|
+ t->sched[JMPBUFSP] = (uint64_t)tos - 8;
|
|
|
+ t->sched[JMPBUFARG1] = (uint64_t)f;
|
|
|
+ t->sched[JMPBUFARG2] = (uint64_t)arg;
|
|
|
}
|