syscall.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "../port/error.h"
  15. #include "sys.h"
  16. #include <tos.h>
  17. #include "amd64.h"
  18. #include "ureg.h"
  19. extern int nosmp;
  20. typedef struct {
  21. uintptr_t ip;
  22. Ureg* arg0;
  23. char* arg1;
  24. char msg[ERRMAX];
  25. Ureg* old;
  26. Ureg ureg;
  27. } NFrame;
  28. /*
  29. * Return user to state before notify()
  30. */
  31. void
  32. noted(Ureg* cur, uintptr_t arg0)
  33. {
  34. Proc *up = externup();
  35. NFrame *nf;
  36. Note note;
  37. Ureg *nur;
  38. qlock(&up->debug);
  39. if(arg0 != NRSTR && !up->notified){
  40. qunlock(&up->debug);
  41. pprint("noted:suicide: call to noted when not notified\n");
  42. pexit("Suicide in noted", 0);
  43. }
  44. up->notified = 0;
  45. fpunoted();
  46. nf = up->ureg;
  47. /* sanity clause */
  48. if(!okaddr(PTR2UINT(nf), sizeof(NFrame), 0)){
  49. qunlock(&up->debug);
  50. pprint("noted:suicide: bad ureg %#p in noted\n", nf);
  51. pexit("Suicide in noted", 0);
  52. }
  53. /*
  54. * Check the segment selectors are all valid.
  55. */
  56. nur = &nf->ureg;
  57. if(nur->cs != SSEL(SiUCS, SsRPL3) || nur->ss != SSEL(SiUDS, SsRPL3)) {
  58. qunlock(&up->debug);
  59. pprint("noted: suicide: bad segment selector (cs %p want %p, ss %p want %p), in noted\n",
  60. nur->cs, SSEL(SiUCS, SsRPL3),
  61. nur->ss, SSEL(SiUDS, SsRPL3)
  62. );
  63. pexit("Suicide in noted", 0);
  64. }
  65. /* don't let user change system flags */
  66. nur->flags &= (Of|Df|Sf|Zf|Af|Pf|Cf);
  67. nur->flags |= cur->flags & ~(Of|Df|Sf|Zf|Af|Pf|Cf);
  68. memmove(cur, nur, sizeof(Ureg));
  69. switch((int)arg0){
  70. case NCONT:
  71. case NRSTR:
  72. if(!okaddr(nur->ip, BY2SE, 0) || !okaddr(nur->sp, BY2SE, 0)){
  73. qunlock(&up->debug);
  74. pprint("suicide: trap in noted pc=%#p sp=%#p\n",
  75. nur->ip, nur->sp);
  76. pexit("Suicide", 0);
  77. }
  78. up->ureg = nf->old;
  79. qunlock(&up->debug);
  80. break;
  81. case NSAVE:
  82. if(!okaddr(nur->ip, BY2SE, 0) || !okaddr(nur->sp, BY2SE, 0)){
  83. qunlock(&up->debug);
  84. pprint("suicide: trap in noted pc=%#p sp=%#p\n",
  85. nur->ip, nur->sp);
  86. pexit("Suicide", 0);
  87. }
  88. qunlock(&up->debug);
  89. splhi();
  90. nf->arg1 = nf->msg;
  91. nf->arg0 = &nf->ureg;
  92. cur->bp = PTR2UINT(nf->arg0);
  93. // nf->ip = 0;
  94. cur->di = (uint64_t) nf->arg0;
  95. cur->si = (uint64_t) nf->arg1;
  96. cur->sp = PTR2UINT(nf);
  97. break;
  98. default:
  99. memmove(&note, &up->lastnote, sizeof(Note));
  100. qunlock(&up->debug);
  101. pprint("suicide: bad arg %#p in noted: %s\n", arg0, note.msg);
  102. pexit(note.msg, 0);
  103. break;
  104. case NDFLT:
  105. memmove(&note, &up->lastnote, sizeof(Note));
  106. qunlock(&up->debug);
  107. if(note.flag == NDebug)
  108. pprint("suicide: %s\n", note.msg);
  109. pexit(note.msg, note.flag != NDebug);
  110. break;
  111. }
  112. }
  113. /*
  114. * Call user, if necessary, with note.
  115. * Pass user the Ureg struct and the note on his stack.
  116. */
  117. int
  118. notify(Ureg* ureg)
  119. {
  120. Proc *up = externup();
  121. int l;
  122. Mpl pl;
  123. Note note;
  124. uintptr_t sp;
  125. NFrame *nf;
  126. /*
  127. * Calls procctl splhi, see comment in procctl for the reasoning.
  128. */
  129. if(up->procctl)
  130. procctl(up);
  131. if(up->nnote == 0)
  132. return 0;
  133. fpunotify(ureg);
  134. pl = spllo();
  135. qlock(&up->debug);
  136. up->notepending = 0;
  137. memmove(&note, &up->note[0], sizeof(Note));
  138. if(strncmp(note.msg, "sys:", 4) == 0){
  139. l = strlen(note.msg);
  140. if(l > ERRMAX-sizeof(" pc=0x0123456789abcdef"))
  141. l = ERRMAX-sizeof(" pc=0x0123456789abcdef");
  142. sprint(note.msg+l, " pc=%#p", ureg->ip);
  143. }
  144. if(note.flag != NUser && (up->notified || up->notify == nil)){
  145. qunlock(&up->debug);
  146. if(note.flag == NDebug)
  147. pprint("suicide: %s\n", note.msg);
  148. pexit(note.msg, note.flag != NDebug);
  149. }
  150. if(up->notified){
  151. qunlock(&up->debug);
  152. splhi();
  153. return 0;
  154. }
  155. if(up->notify == nil){
  156. qunlock(&up->debug);
  157. pexit(note.msg, note.flag != NDebug);
  158. }
  159. if(!okaddr(PTR2UINT(up->notify), sizeof(ureg->ip), 0)){
  160. qunlock(&up->debug);
  161. pprint("suicide: bad function address %#p in notify\n",
  162. up->notify);
  163. pexit("Suicide", 0);
  164. }
  165. sp = ureg->sp - ROUNDUP(sizeof(NFrame), 16) - 128; // amd64 red zone, also wanted by go stack traces
  166. if(!okaddr(sp, sizeof(NFrame), 1)){
  167. qunlock(&up->debug);
  168. pprint("suicide: bad stack address %#p in notify\n", sp);
  169. pexit("Suicide", 0);
  170. }
  171. nf = UINT2PTR(sp);
  172. memmove(&nf->ureg, ureg, sizeof(Ureg));
  173. nf->old = up->ureg;
  174. up->ureg = nf; /* actually the NFrame, for noted */
  175. memmove(nf->msg, note.msg, ERRMAX);
  176. nf->arg1 = nf->msg;
  177. nf->arg0 = &nf->ureg;
  178. ureg->di = (uintptr)nf->arg0;
  179. ureg->si = (uintptr)nf->arg1;
  180. //print("Setting di to %p and si to %p\n", ureg->di, ureg->si);
  181. ureg->bp = PTR2UINT(nf->arg0);
  182. nf->ip = 0;
  183. ureg->sp = sp;
  184. ureg->ip = PTR2UINT(up->notify);
  185. up->notified = 1;
  186. up->nnote--;
  187. memmove(&up->lastnote, &note, sizeof(Note));
  188. memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
  189. qunlock(&up->debug);
  190. splx(pl);
  191. return 1;
  192. }
  193. void
  194. noerrorsleft(void)
  195. {
  196. Proc *up = externup();
  197. int i;
  198. if(up->nerrlab){
  199. /* NIX processes will have a waserror in their handler */
  200. if(up->ac != nil && up->nerrlab == 1)
  201. return;
  202. print("bad errstack: %d extra\n", up->nerrlab);
  203. for(i = 0; i < NERR; i++)
  204. print("sp=%#p pc=%#p\n",
  205. up->errlab[i].sp, up->errlab[i].pc);
  206. panic("error stack");
  207. }
  208. }
  209. int printallsyscalls;
  210. void
  211. syscall(unsigned int scallnr, Ureg *ureg)
  212. {
  213. // can only handle 6 args right now.
  214. uintptr_t a0, a1, a2, a3;
  215. uintptr_t a4, a5;
  216. a0 = ureg->di;
  217. a1 = ureg->si;
  218. a2 = ureg->dx;
  219. a3 = ureg->r10;
  220. a4 = ureg->r8;
  221. a5 = ureg->r9;
  222. Proc *up = externup();
  223. if (0) iprint("Syscall %d, %lx, %lx, %lx %lx %lx %lx\n", scallnr, a0, a1, a2, a3, a4, a5);
  224. char *e;
  225. uintptr_t sp;
  226. int s;
  227. int64_t startns, stopns;
  228. Ar0 ar0;
  229. static Ar0 zar0;
  230. if(!userureg(ureg))
  231. panic("syscall: cs %#llx\n", ureg->cs);
  232. cycles(&up->kentry);
  233. machp()->syscall++;
  234. up->nsyscall++;
  235. up->nqsyscall++;
  236. up->insyscall = 1;
  237. up->pc = ureg->ip;
  238. up->dbgreg = ureg;
  239. sp = ureg->sp;
  240. startns = stopns = 0;
  241. if (0) hi("so far syscall!\n");
  242. if (up->pid == 0 || printallsyscalls) {
  243. syscallfmt('E', scallnr, nil, startns, stopns, a0, a1, a2, a3, a4, a5);
  244. if(up->syscalltrace) {
  245. print("E %s\n", up->syscalltrace);
  246. free(up->syscalltrace);
  247. up->syscalltrace = nil;
  248. }
  249. }
  250. if(up->procctl == Proc_tracesyscall){
  251. /*
  252. * Redundant validaddr. Do we care?
  253. * Tracing syscalls is not exactly a fast path...
  254. * Beware, validaddr currently does a pexit rather
  255. * than an error if there's a problem; that might
  256. * change in the future.
  257. */
  258. if(sp < (USTKTOP-BIGPGSZ) || sp > (USTKTOP-sizeof(up->arg)-BY2SE))
  259. validaddr(UINT2PTR(sp), sizeof(up->arg)+BY2SE, 0);
  260. syscallfmt('E', scallnr, &ar0, startns, stopns, a0, a1, a2, a3, a4, a5);
  261. up->procctl = Proc_stopme;
  262. procctl(up);
  263. if(up->syscalltrace)
  264. free(up->syscalltrace);
  265. up->syscalltrace = nil;
  266. startns = todget(nil);
  267. }
  268. if(up->strace_on) {
  269. /*
  270. * Redundant validaddr. Do we care?
  271. * Tracing syscalls is not exactly a fast path...
  272. * Beware, validaddr currently does a pexit rather
  273. * than an error if there's a problem; that might
  274. * change in the future.
  275. */
  276. if(sp < (USTKTOP-BIGPGSZ) || sp > (USTKTOP-sizeof(up->arg)-BY2SE))
  277. validaddr(UINT2PTR(sp), sizeof(up->arg)+BY2SE, 0);
  278. syscallfmt('E', scallnr, &ar0, startns, stopns, a0, a1, a2, a3, a4, a5);
  279. // TODO: make this all use blocks so we have zero copy.
  280. // We can do that when we retire the old system call tracing stuff.
  281. qwrite(up->strace->q, up->syscalltrace, strlen(up->syscalltrace));
  282. free(up->syscalltrace);
  283. up->syscalltrace = nil;
  284. startns = todget(nil);
  285. }
  286. if (0) hi("more syscall!\n");
  287. up->scallnr = scallnr;
  288. if(scallnr == RFORK)
  289. fpusysrfork(ureg);
  290. spllo();
  291. sp = ureg->sp;
  292. up->nerrlab = 0;
  293. ar0 = zar0;
  294. if(!waserror()){
  295. if(scallnr >= nsyscall || systab[scallnr].f == nil){
  296. pprint("bad sys call number %d pc %#llx\n",
  297. scallnr, ureg->ip);
  298. postnote(up, 1, "sys: bad sys call", NDebug);
  299. error(Ebadarg);
  300. }
  301. if(sp < (USTKTOP-BIGPGSZ) || sp > (USTKTOP-sizeof(up->arg)-BY2SE))
  302. validaddr(UINT2PTR(sp), sizeof(up->arg)+BY2SE, 0);
  303. memmove(up->arg, UINT2PTR(sp+BY2SE), sizeof(up->arg));
  304. up->psstate = systab[scallnr].n;
  305. if (0) hi("call syscall!\n");
  306. systab[scallnr].f(&ar0, a0, a1, a2, a3, a4, a5);
  307. if (0) hi("it returned!\n");
  308. poperror();
  309. }
  310. else{
  311. /* failure: save the error buffer for errstr */
  312. e = up->syserrstr;
  313. up->syserrstr = up->errstr;
  314. up->errstr = e;
  315. if(DBGFLG && up->pid == 1)
  316. iprint("%s: syscall %s error %s\n",
  317. up->text, systab[scallnr].n, up->syserrstr);
  318. ar0 = systab[scallnr].r;
  319. }
  320. /*
  321. * NIX: for the execac() syscall, what follows is done within
  322. * the system call, because it never returns.
  323. * See acore.c:/^retfromsyscall
  324. */
  325. noerrorsleft();
  326. /*
  327. * Put return value in frame.
  328. */
  329. ureg->ax = ar0.p;
  330. if (up->pid == 0 || printallsyscalls) {
  331. stopns = todget(nil);
  332. syscallfmt('X', scallnr, &ar0, startns, stopns, a0, a1, a2, a3, a4, a5);
  333. if(up->syscalltrace) {
  334. print("X %s\n", up->syscalltrace);
  335. free(up->syscalltrace);
  336. up->syscalltrace = nil;
  337. }
  338. }
  339. if(up->strace_on) {
  340. uint8_t what = 'X';
  341. stopns = todget(nil);
  342. if (scallnr == RFORK && a0 & RFPROC && ar0.i > 0)
  343. what = 'F';
  344. syscallfmt(what, scallnr, &ar0, startns, stopns, a0, a1, a2, a3, a4, a5);
  345. qwrite(up->strace->q, up->syscalltrace, strlen(up->syscalltrace));
  346. free(up->syscalltrace);
  347. up->syscalltrace = nil;
  348. }
  349. if(up->procctl == Proc_tracesyscall){
  350. uint8_t what = 'X';
  351. stopns = todget(nil);
  352. up->procctl = Proc_stopme;
  353. if (scallnr == RFORK && a0 & RFPROC && ar0.i > 0)
  354. what = 'F';
  355. syscallfmt(what, scallnr, &ar0, startns, stopns, a0, a1, a2, a3, a4, a5);
  356. s = splhi();
  357. procctl(up);
  358. splx(s);
  359. if(up->syscalltrace)
  360. free(up->syscalltrace);
  361. up->syscalltrace = nil;
  362. }else if(up->procctl == Proc_totc || up->procctl == Proc_toac)
  363. procctl(up);
  364. if (0) hi("past sysretfmt\n");
  365. up->insyscall = 0;
  366. up->psstate = 0;
  367. if(scallnr == NOTED)
  368. noted(ureg, a0);
  369. if (0) hi("now to splhi\n");
  370. splhi();
  371. if(scallnr != RFORK && (up->procctl || up->nnote))
  372. notify(ureg);
  373. /* if we delayed sched because we held a lock, sched now */
  374. if(up->delaysched){
  375. sched();
  376. splhi();
  377. }
  378. kexit(ureg);
  379. if (0) hi("done kexit\n");
  380. }
  381. uintptr_t
  382. sysexecstack(uintptr_t stack, int argc)
  383. {
  384. uintptr_t sp;
  385. /*
  386. * Given a current bottom-of-stack and a count
  387. * of pointer arguments to be pushed onto it followed
  388. * by an integer argument count, return a suitably
  389. * aligned new bottom-of-stack which will satisfy any
  390. * hardware stack-alignment contraints.
  391. * Rounding the stack down to be aligned with the
  392. * natural size of a pointer variable usually suffices,
  393. * but some architectures impose further restrictions,
  394. * e.g. 32-bit SPARC, where the stack must be 8-byte
  395. * aligned although pointers and integers are 32-bits.
  396. */
  397. USED(argc);
  398. sp = STACKALIGN(stack);
  399. /* but we need to align the stack to 16 bytes, not 8, once
  400. * nil
  401. * argv
  402. * argc
  403. * are pushed. So if we have odd arguments, we need an odd-8-byte
  404. * aligned stack; else, an even aligned stack.
  405. */
  406. if (argc & 1)
  407. sp -= sp & 8 ? 0 : 8;
  408. else
  409. sp -= sp & 8 ? 8 : 0;
  410. //print("For %d args, sp is now %p\n", argc, sp);
  411. return sp;
  412. }
  413. void*
  414. sysexecregs(uintptr_t entry, uint32_t ssize, void *tos)
  415. {
  416. Proc *up = externup();
  417. uintptr_t *sp;
  418. Ureg *ureg;
  419. // We made sure it was correctly aligned in sysexecstack, above.
  420. if (ssize & 0xf) {
  421. print("your stack is wrong: stacksize is not 16-byte aligned: %d\n", ssize);
  422. panic("misaligned stack in sysexecregs");
  423. }
  424. sp = (uintptr_t*)(USTKTOP - ssize);
  425. ureg = up->dbgreg;
  426. ureg->sp = PTR2UINT(sp);
  427. ureg->ip = entry;
  428. ureg->type = 64; /* fiction for acid */
  429. ureg->dx = (uintptr_t)tos;
  430. /*
  431. * return the address of kernel/user shared data
  432. * (e.g. clock stuff)
  433. */
  434. return UINT2PTR(USTKTOP-sizeof(Tos));
  435. }
  436. void
  437. sysprocsetup(Proc* p)
  438. {
  439. fpusysprocsetup(p);
  440. }
  441. void
  442. sysrforkchild(Proc* child, Proc* parent)
  443. {
  444. Ureg *cureg;
  445. // If STACKPAD is 1 things go very bad very quickly.
  446. // But it is the right value ...
  447. #define STACKPAD 1 /* for return PC? */
  448. /*
  449. * Add STACKPAD*BY2SE to the stack to account for
  450. * - the return PC
  451. * (NOT NOW) - trap's arguments (syscallnr, ureg)
  452. */
  453. child->sched.sp = PTR2UINT(child->kstack+KSTACK-((sizeof(Ureg)+STACKPAD*BY2SE)));
  454. child->sched.pc = PTR2UINT(sysrforkret);
  455. cureg = (Ureg*)(child->sched.sp+STACKPAD*BY2SE);
  456. memmove(cureg, parent->dbgreg, sizeof(Ureg));
  457. /* Things from bottom of syscall which were never executed */
  458. child->psstate = 0;
  459. child->insyscall = 0;
  460. //iprint("Child SP set tp %p\n", (void *)child->sched.sp);
  461. fpusysrforkchild(child, parent);
  462. }