sysproc.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "tos.h"
  11. #include "../port/lib.h"
  12. #include "mem.h"
  13. #include "dat.h"
  14. #include "fns.h"
  15. #include "../port/error.h"
  16. #include "../port/edf.h"
  17. #include <a.out.h>
  18. #include <trace.h>
  19. void
  20. sysrfork(Ar0* ar0, va_list list)
  21. {
  22. Proc *p;
  23. int flag, i, n, pid;
  24. Fgrp *ofg;
  25. Pgrp *opg;
  26. Rgrp *org;
  27. Egrp *oeg;
  28. Mach *wm;
  29. /*
  30. * int rfork(int);
  31. */
  32. flag = va_arg(list, int);
  33. /* Check flags before we commit */
  34. if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
  35. error(Ebadarg);
  36. if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
  37. error(Ebadarg);
  38. if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
  39. error(Ebadarg);
  40. if((flag & (RFPREPAGE|RFCPREPAGE)) == (RFPREPAGE|RFCPREPAGE))
  41. error(Ebadarg);
  42. if((flag & (RFCORE|RFCCORE)) == (RFCORE|RFCCORE))
  43. error(Ebadarg);
  44. if(flag & RFCORE && up->wired != nil)
  45. error("wired proc cannot move to ac");
  46. if((flag&RFPROC) == 0) {
  47. if(flag & (RFMEM|RFNOWAIT))
  48. error(Ebadarg);
  49. if(flag & (RFFDG|RFCFDG)) {
  50. ofg = up->fgrp;
  51. if(flag & RFFDG)
  52. up->fgrp = dupfgrp(ofg);
  53. else
  54. up->fgrp = dupfgrp(nil);
  55. closefgrp(ofg);
  56. }
  57. if(flag & (RFNAMEG|RFCNAMEG)) {
  58. opg = up->pgrp;
  59. up->pgrp = newpgrp();
  60. if(flag & RFNAMEG)
  61. pgrpcpy(up->pgrp, opg);
  62. /* inherit noattach */
  63. up->pgrp->noattach = opg->noattach;
  64. closepgrp(opg);
  65. }
  66. if(flag & RFNOMNT)
  67. up->pgrp->noattach = 1;
  68. if(flag & RFREND) {
  69. org = up->rgrp;
  70. up->rgrp = newrgrp();
  71. closergrp(org);
  72. }
  73. if(flag & (RFENVG|RFCENVG)) {
  74. oeg = up->egrp;
  75. up->egrp = smalloc(sizeof(Egrp));
  76. up->egrp->ref = 1;
  77. if(flag & RFENVG)
  78. envcpy(up->egrp, oeg);
  79. closeegrp(oeg);
  80. }
  81. if(flag & RFNOTEG)
  82. up->noteid = incref(&noteidalloc);
  83. if(flag & (RFPREPAGE|RFCPREPAGE)){
  84. up->prepagemem = flag&RFPREPAGE;
  85. nixprepage(-1);
  86. }
  87. if(flag & RFCORE){
  88. up->ac = getac(up, -1);
  89. up->procctl = Proc_toac;
  90. }else if(flag & RFCCORE){
  91. if(up->ac != nil)
  92. up->procctl = Proc_totc;
  93. }
  94. ar0->i = 0;
  95. return;
  96. }
  97. p = newproc();
  98. if(flag & RFCORE){
  99. if(!waserror()){
  100. p->ac = getac(p, -1);
  101. p->procctl = Proc_toac;
  102. poperror();
  103. }else{
  104. print("warning: rfork: no available ac for the child, it runs in the tc\n");
  105. p->procctl = 0;
  106. }
  107. }
  108. if(up->trace)
  109. p->trace = 1;
  110. p->scallnr = up->scallnr;
  111. memmove(p->arg, up->arg, sizeof(up->arg));
  112. p->nerrlab = 0;
  113. p->slash = up->slash;
  114. p->dot = up->dot;
  115. incref(p->dot);
  116. memmove(p->note, up->note, sizeof(p->note));
  117. p->privatemem = up->privatemem;
  118. p->noswap = up->noswap;
  119. p->nnote = up->nnote;
  120. p->notified = 0;
  121. p->lastnote = up->lastnote;
  122. p->notify = up->notify;
  123. p->ureg = up->ureg;
  124. p->prepagemem = up->prepagemem;
  125. p->dbgreg = 0;
  126. /* Make a new set of memory segments */
  127. n = flag & RFMEM;
  128. qlock(&p->seglock);
  129. if(waserror()){
  130. qunlock(&p->seglock);
  131. nexterror();
  132. }
  133. for(i = 0; i < NSEG; i++)
  134. if(up->seg[i])
  135. p->seg[i] = dupseg(up->seg, i, n);
  136. qunlock(&p->seglock);
  137. poperror();
  138. /* File descriptors */
  139. if(flag & (RFFDG|RFCFDG)) {
  140. if(flag & RFFDG)
  141. p->fgrp = dupfgrp(up->fgrp);
  142. else
  143. p->fgrp = dupfgrp(nil);
  144. }
  145. else {
  146. p->fgrp = up->fgrp;
  147. incref(p->fgrp);
  148. }
  149. /* Process groups */
  150. if(flag & (RFNAMEG|RFCNAMEG)) {
  151. p->pgrp = newpgrp();
  152. if(flag & RFNAMEG)
  153. pgrpcpy(p->pgrp, up->pgrp);
  154. /* inherit noattach */
  155. p->pgrp->noattach = up->pgrp->noattach;
  156. }
  157. else {
  158. p->pgrp = up->pgrp;
  159. incref(p->pgrp);
  160. }
  161. if(flag & RFNOMNT)
  162. up->pgrp->noattach = 1;
  163. if(flag & RFREND)
  164. p->rgrp = newrgrp();
  165. else {
  166. incref(up->rgrp);
  167. p->rgrp = up->rgrp;
  168. }
  169. /* Environment group */
  170. if(flag & (RFENVG|RFCENVG)) {
  171. p->egrp = smalloc(sizeof(Egrp));
  172. p->egrp->ref = 1;
  173. if(flag & RFENVG)
  174. envcpy(p->egrp, up->egrp);
  175. }
  176. else {
  177. p->egrp = up->egrp;
  178. incref(p->egrp);
  179. }
  180. p->hang = up->hang;
  181. p->procmode = up->procmode;
  182. /* Craft a return frame which will cause the child to pop out of
  183. * the scheduler in user mode with the return register zero
  184. */
  185. sysrforkchild(p, up);
  186. p->parent = up;
  187. p->parentpid = up->pid;
  188. if(flag&RFNOWAIT)
  189. p->parentpid = 0;
  190. else {
  191. lock(&up->exl);
  192. up->nchild++;
  193. unlock(&up->exl);
  194. }
  195. if((flag&RFNOTEG) == 0)
  196. p->noteid = up->noteid;
  197. pid = p->pid;
  198. memset(p->time, 0, sizeof(p->time));
  199. p->time[TReal] = sys->ticks;
  200. if(flag & (RFPREPAGE|RFCPREPAGE)){
  201. p->prepagemem = flag&RFPREPAGE;
  202. /*
  203. * BUG: this is prepaging our memory, not
  204. * that of the child, but at least we
  205. * will do the copy on write.
  206. */
  207. nixprepage(-1);
  208. }
  209. kstrdup(&p->text, up->text);
  210. kstrdup(&p->user, up->user);
  211. /*
  212. * since the bss/data segments are now shareable,
  213. * any mmu info about this process is now stale
  214. * (i.e. has bad properties) and has to be discarded.
  215. */
  216. mmuflush();
  217. p->basepri = up->basepri;
  218. p->priority = up->basepri;
  219. p->fixedpri = up->fixedpri;
  220. p->mp = up->mp;
  221. wm = up->wired;
  222. if(wm)
  223. procwired(p, wm->machno);
  224. p->color = up->color;
  225. ready(p);
  226. sched();
  227. ar0->i = pid;
  228. }
  229. static uint64_t
  230. vl2be(uint64_t v)
  231. {
  232. uint8_t *p;
  233. p = (uint8_t*)&v;
  234. return ((uint64_t)((p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3])<<32)
  235. |((uint64_t)(p[4]<<24)|(p[5]<<16)|(p[6]<<8)|p[7]);
  236. }
  237. static uint32_t
  238. l2be(int32_t l)
  239. {
  240. uint8_t *cp;
  241. cp = (uint8_t*)&l;
  242. return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
  243. }
  244. typedef struct {
  245. Exec;
  246. uvlong hdr[1];
  247. } Hdr;
  248. /*
  249. * flags can ONLY specify that you want an AC for you, or
  250. * that you want an XC for you.
  251. *
  252. */
  253. static void
  254. execac(Ar0* ar0, int flags, char *ufile, char **argv)
  255. {
  256. Hdr hdr;
  257. Fgrp *f;
  258. Tos *tos;
  259. Chan *chan, *ichan;
  260. Image *img;
  261. Segment *s;
  262. int argc, i, n;
  263. char *a, *elem, *file, *p;
  264. char line[sizeof(Exec)], *progarg[sizeof(Exec)/2+1];
  265. int32_t hdrsz, magic, textsz, datasz, bsssz;
  266. uintptr textlim, datalim, bsslim, entry, stack;
  267. static int colorgen;
  268. file = nil;
  269. elem = nil;
  270. switch(flags){
  271. case EXTC:
  272. case EXXC:
  273. break;
  274. case EXAC:
  275. up->ac = getac(up, -1);
  276. break;
  277. default:
  278. error("unknown execac flag");
  279. }
  280. if(waserror()){
  281. DBG("execac: failing: %s\n", up->errstr);
  282. free(file);
  283. free(elem);
  284. if(flags == EXAC && up->ac != nil)
  285. up->ac->proc = nil;
  286. up->ac = nil;
  287. nexterror();
  288. }
  289. /*
  290. * Open the file, remembering the final element and the full name.
  291. */
  292. argc = 0;
  293. file = validnamedup(ufile, 1);
  294. DBG("execac: up %#p file %s\n", up, file);
  295. if(up->trace)
  296. proctracepid(up);
  297. ichan = namec(file, Aopen, OEXEC, 0);
  298. if(waserror()){
  299. cclose(ichan);
  300. nexterror();
  301. }
  302. kstrdup(&elem, up->genbuf);
  303. /*
  304. * Read the header.
  305. * If it's a #!, fill in progarg[] with info then read a new header
  306. * from the file indicated by the #!.
  307. * The #! line must be less than sizeof(Exec) in size,
  308. * including the terminating \n.
  309. */
  310. hdrsz = ichan->dev->read(ichan, &hdr, sizeof(Hdr), 0);
  311. if(hdrsz < 2)
  312. error(Ebadexec);
  313. p = (char*)&hdr;
  314. if(p[0] == '#' && p[1] == '!'){
  315. p = memccpy(line, (char*)&hdr, '\n',
  316. MIN(sizeof(Exec), hdrsz));
  317. if(p == nil)
  318. error(Ebadexec);
  319. *(p-1) = '\0';
  320. argc = tokenize(line+2, progarg, nelem(progarg));
  321. if(argc == 0)
  322. error(Ebadexec);
  323. /* The original file becomes an extra arg after #! line */
  324. progarg[argc++] = file;
  325. /*
  326. * Take the #! $0 as a file to open, and replace
  327. * $0 with the original path's name.
  328. */
  329. p = progarg[0];
  330. progarg[0] = elem;
  331. chan = nil; /* in case namec errors out */
  332. USED(chan);
  333. chan = namec(p, Aopen, OEXEC, 0);
  334. hdrsz = chan->dev->read(chan, &hdr, sizeof(Hdr), 0);
  335. if(hdrsz < 2)
  336. error(Ebadexec);
  337. }else{
  338. chan = ichan;
  339. incref(ichan);
  340. }
  341. /* chan is the chan to use, initial or not. ichan is irrelevant now */
  342. cclose(ichan);
  343. poperror();
  344. /*
  345. * #! has had its chance, now we need a real binary.
  346. */
  347. magic = l2be(hdr.magic);
  348. if(hdrsz != sizeof(Hdr) || magic != AOUT_MAGIC)
  349. error(Ebadexec);
  350. if(magic & HDR_MAGIC){
  351. entry = vl2be(hdr.hdr[0]);
  352. hdrsz = sizeof(Hdr);
  353. }
  354. else{
  355. entry = l2be(hdr.entry);
  356. hdrsz = sizeof(Exec);
  357. }
  358. textsz = l2be(hdr.text);
  359. datasz = l2be(hdr.data);
  360. bsssz = l2be(hdr.bss);
  361. textlim = UTROUND(UTZERO+hdrsz+textsz);
  362. datalim = BIGPGROUND(textlim+datasz);
  363. bsslim = BIGPGROUND(textlim+datasz+bsssz);
  364. /*
  365. * Check the binary header for consistency,
  366. * e.g. the entry point is within the text segment and
  367. * the segments don't overlap each other.
  368. */
  369. if(entry < UTZERO+hdrsz || entry >= UTZERO+hdrsz+textsz)
  370. error(Ebadexec);
  371. if(textsz >= textlim || datasz > datalim || bsssz > bsslim
  372. || textlim >= USTKTOP || datalim >= USTKTOP || bsslim >= USTKTOP
  373. || datalim < textlim || bsslim < datalim)
  374. error(Ebadexec);
  375. if(up->ac != nil && up->ac != m)
  376. up->color = corecolor(up->ac->machno);
  377. else
  378. up->color = corecolor(m->machno);
  379. /*
  380. * The new stack is created in ESEG, temporarily mapped elsewhere.
  381. * The stack contains, in descending address order:
  382. * a structure containing housekeeping and profiling data (Tos);
  383. * argument strings;
  384. * array of vectors to the argument strings with a terminating
  385. * nil (argv).
  386. * When the exec is committed, this temporary stack in ESEG will
  387. * become SSEG.
  388. * The architecture-dependent code which jumps to the new image
  389. * will also push a count of the argument array onto the stack (argc).
  390. */
  391. qlock(&up->seglock);
  392. if(waserror()){
  393. if(up->seg[ESEG] != nil){
  394. putseg(up->seg[ESEG]);
  395. up->seg[ESEG] = nil;
  396. }
  397. qunlock(&up->seglock);
  398. nexterror();
  399. }
  400. up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BIGPGSZ);
  401. up->seg[ESEG]->color = up->color;
  402. /*
  403. * Stack is a pointer into the temporary stack
  404. * segment, and will move as items are pushed.
  405. */
  406. stack = TSTKTOP-sizeof(Tos);
  407. /*
  408. * First, the top-of-stack structure.
  409. */
  410. tos = (Tos*)stack;
  411. tos->cyclefreq = m->cyclefreq;
  412. cycles((uint64_t*)&tos->pcycles);
  413. tos->pcycles = -tos->pcycles;
  414. tos->kcycles = tos->pcycles;
  415. tos->clock = 0;
  416. /*
  417. * Next push any arguments found from a #! header.
  418. */
  419. for(i = 0; i < argc; i++){
  420. n = strlen(progarg[i])+1;
  421. stack -= n;
  422. memmove(UINT2PTR(stack), progarg[i], n);
  423. }
  424. /*
  425. * Copy the strings pointed to by the syscall argument argv into
  426. * the temporary stack segment, being careful to check
  427. * the strings argv points to are valid.
  428. */
  429. for(i = 0;; i++, argv++){
  430. a = *(char**)validaddr(argv, sizeof(char**), 0);
  431. if(a == nil)
  432. break;
  433. a = validaddr(a, 1, 0);
  434. n = ((char*)vmemchr(a, 0, 0x7fffffff) - a) + 1;
  435. /*
  436. * This futzing is so argv[0] gets validated even
  437. * though it will be thrown away if this is a shell
  438. * script.
  439. */
  440. if(argc > 0 && i == 0)
  441. continue;
  442. /*
  443. * Before copying the string into the temporary stack,
  444. * which might involve a demand-page, check the string
  445. * will not overflow the bottom of the stack.
  446. */
  447. stack -= n;
  448. if(stack < TSTKTOP-USTKSIZE)
  449. error(Enovmem);
  450. p = UINT2PTR(stack);
  451. memmove(p, a, n);
  452. p[n-1] = 0;
  453. argc++;
  454. }
  455. if(argc < 1)
  456. error(Ebadexec);
  457. /*
  458. * Before pushing the argument pointers onto the temporary stack,
  459. * which might involve a demand-page, check there is room for the
  460. * terminating nil pointer, plus pointers, plus some slop for however
  461. * argc might be passed on the stack by sysexecregs (give a page
  462. * of slop, it is an overestimate, but why not).
  463. * Sysexecstack does any architecture-dependent stack alignment.
  464. * Keep a copy of the start of the argument strings before alignment
  465. * so up->args can be created later.
  466. * Although the argument vectors are being pushed onto the stack in
  467. * the temporary segment, the values must be adjusted to reflect
  468. * the segment address after it replaces the current SSEG.
  469. */
  470. a = p = UINT2PTR(stack);
  471. stack = sysexecstack(stack, argc);
  472. if(stack-(argc+1)*sizeof(char**)-BIGPGSZ < TSTKTOP-USTKSIZE)
  473. error(Ebadexec);
  474. argv = (char**)stack;
  475. *--argv = nil;
  476. for(i = 0; i < argc; i++){
  477. *--argv = p + (USTKTOP-TSTKTOP);
  478. p += strlen(p) + 1;
  479. }
  480. /*
  481. * Make a good faith copy of the args in up->args using the strings
  482. * in the temporary stack segment. The length must be > 0 as it
  483. * includes the \0 on the last argument and argc was checked earlier
  484. * to be > 0. After the memmove, compensate for any UTF character
  485. * boundary before placing the terminating \0.
  486. */
  487. n = p - a;
  488. if(n <= 0)
  489. error(Egreg);
  490. if(n > 128)
  491. n = 128;
  492. p = smalloc(n);
  493. if(waserror()){
  494. free(p);
  495. nexterror();
  496. }
  497. memmove(p, a, n);
  498. while(n > 0 && (p[n-1] & 0xc0) == 0x80)
  499. n--;
  500. p[n-1] = '\0';
  501. /*
  502. * All the argument processing is now done, ready to commit.
  503. */
  504. free(up->text);
  505. up->text = elem;
  506. elem = nil;
  507. free(up->args);
  508. up->args = p;
  509. up->nargs = n;
  510. poperror(); /* p (up->args) */
  511. /*
  512. * Close on exec
  513. */
  514. f = up->fgrp;
  515. for(i=0; i<=f->maxfd; i++)
  516. fdclose(i, CCEXEC);
  517. /*
  518. * Free old memory.
  519. * Special segments maintained across exec.
  520. */
  521. for(i = SSEG; i <= HSEG; i++) {
  522. putseg(up->seg[i]);
  523. up->seg[i] = nil; /* in case of error */
  524. }
  525. for(i = HSEG+1; i< NSEG; i++) {
  526. s = up->seg[i];
  527. if(s && (s->type&SG_CEXEC)) {
  528. putseg(s);
  529. up->seg[i] = nil;
  530. }
  531. }
  532. /* Text. Shared. Attaches to cache image if possible
  533. * but prepaged if EXAC
  534. */
  535. img = attachimage(SG_TEXT|SG_RONLY, chan, up->color, UTZERO, (textlim-UTZERO)/BIGPGSZ);
  536. s = img->s;
  537. up->seg[TSEG] = s;
  538. s->flushme = 1;
  539. s->fstart = 0;
  540. s->flen = hdrsz+textsz;
  541. if(img->color != up->color){
  542. up->color = img->color;
  543. }
  544. unlock(img);
  545. /* Data. Shared. */
  546. s = newseg(SG_DATA, textlim, (datalim-textlim)/BIGPGSZ);
  547. up->seg[DSEG] = s;
  548. s->color = up->color;
  549. /* Attached by hand */
  550. incref(img);
  551. s->image = img;
  552. s->fstart = hdrsz+textsz;
  553. s->flen = datasz;
  554. /* BSS. Zero fill on demand for TS */
  555. up->seg[BSEG] = newseg(SG_BSS, datalim, (bsslim-datalim)/BIGPGSZ);
  556. up->seg[BSEG]->color= up->color;
  557. /*
  558. * Move the stack
  559. */
  560. s = up->seg[ESEG];
  561. up->seg[ESEG] = nil;
  562. up->seg[SSEG] = s;
  563. /* the color of the stack was decided when we created it before,
  564. * it may have nothing to do with the color of other segments.
  565. */
  566. qunlock(&up->seglock);
  567. poperror(); /* seglock */
  568. s->base = USTKTOP-USTKSIZE;
  569. s->top = USTKTOP;
  570. relocateseg(s, USTKTOP-TSTKTOP);
  571. /*
  572. * '/' processes are higher priority.
  573. */
  574. if(chan->dev->dc == L'/')
  575. up->basepri = PriRoot;
  576. up->priority = up->basepri;
  577. poperror(); /* chan, elem, file */
  578. cclose(chan);
  579. free(file);
  580. /*
  581. * At this point, the mmu contains info about the old address
  582. * space and needs to be flushed
  583. */
  584. mmuflush();
  585. if(up->prepagemem || flags == EXAC)
  586. nixprepage(-1);
  587. qlock(&up->debug);
  588. up->nnote = 0;
  589. up->notify = 0;
  590. up->notified = 0;
  591. up->privatemem = 0;
  592. sysprocsetup(up);
  593. qunlock(&up->debug);
  594. if(up->hang)
  595. up->procctl = Proc_stopme;
  596. ar0->v = sysexecregs(entry, TSTKTOP - PTR2UINT(argv), argc);
  597. if(flags == EXAC){
  598. up->procctl = Proc_toac;
  599. up->prepagemem = 1;
  600. }
  601. DBG("execac up %#p done\n"
  602. "textsz %lx datasz %lx bsssz %lx hdrsz %lx\n"
  603. "textlim %ullx datalim %ullx bsslim %ullx\n", up,
  604. textsz, datasz, bsssz, hdrsz, textlim, datalim, bsslim);
  605. }
  606. void
  607. sysexecac(Ar0* ar0, va_list list)
  608. {
  609. int flags;
  610. char *file, **argv;
  611. /*
  612. * void* execac(int flags, char* name, char* argv[]);
  613. */
  614. flags = va_arg(list, unsigned int);
  615. file = va_arg(list, char*);
  616. file = validaddr(file, 1, 0);
  617. argv = va_arg(list, char**);
  618. evenaddr(PTR2UINT(argv));
  619. execac(ar0, flags, file, argv);
  620. }
  621. void
  622. sysexec(Ar0* ar0, va_list list)
  623. {
  624. char *file, **argv;
  625. /*
  626. * void* exec(char* name, char* argv[]);
  627. */
  628. file = va_arg(list, char*);
  629. file = validaddr(file, 1, 0);
  630. argv = va_arg(list, char**);
  631. evenaddr(PTR2UINT(argv));
  632. execac(ar0, EXTC, file, argv);
  633. }
  634. void
  635. sysr1(Ar0* , va_list )
  636. {
  637. print("sysr1() called. recompile your binary\n");
  638. }
  639. void
  640. sysnixsyscall(Ar0* , va_list )
  641. {
  642. print("nixsyscall() called. recompile your binary\n");
  643. }
  644. int
  645. return0(void*)
  646. {
  647. return 0;
  648. }
  649. void
  650. syssleep(Ar0* ar0, va_list list)
  651. {
  652. int32_t ms;
  653. /*
  654. * int sleep(long millisecs);
  655. */
  656. ms = va_arg(list, int32_t);
  657. ar0->i = 0;
  658. if(ms <= 0) {
  659. if (up->edf && (up->edf->flags & Admitted))
  660. edfyield();
  661. else
  662. yield();
  663. return;
  664. }
  665. if(ms < TK2MS(1))
  666. ms = TK2MS(1);
  667. tsleep(&up->sleep, return0, 0, ms);
  668. }
  669. void
  670. sysalarm(Ar0* ar0, va_list list)
  671. {
  672. unsigned long ms;
  673. /*
  674. * long alarm(unsigned long millisecs);
  675. * Odd argument type...
  676. */
  677. ms = va_arg(list, unsigned long);
  678. ar0->l = procalarm(ms);
  679. }
  680. void
  681. sysexits(Ar0*, va_list list)
  682. {
  683. char *status;
  684. char *inval = "invalid exit string";
  685. char buf[ERRMAX];
  686. /*
  687. * void exits(char *msg);
  688. */
  689. status = va_arg(list, char*);
  690. if(status){
  691. if(waserror())
  692. status = inval;
  693. else{
  694. status = validaddr(status, 1, 0);
  695. if(vmemchr(status, 0, ERRMAX) == 0){
  696. memmove(buf, status, ERRMAX);
  697. buf[ERRMAX-1] = 0;
  698. status = buf;
  699. }
  700. poperror();
  701. }
  702. }
  703. pexit(status, 1);
  704. }
  705. void
  706. sys_wait(Ar0* ar0, va_list list)
  707. {
  708. int pid;
  709. Waitmsg w;
  710. OWaitmsg *ow;
  711. /*
  712. * int wait(Waitmsg* w);
  713. *
  714. * Deprecated; backwards compatibility only.
  715. */
  716. ow = va_arg(list, OWaitmsg*);
  717. if(ow == nil){
  718. ar0->i = pwait(nil);
  719. return;
  720. }
  721. ow = validaddr(ow, sizeof(OWaitmsg), 1);
  722. evenaddr(PTR2UINT(ow));
  723. pid = pwait(&w);
  724. if(pid >= 0){
  725. readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
  726. readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
  727. readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
  728. readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
  729. strncpy(ow->msg, w.msg, sizeof(ow->msg));
  730. ow->msg[sizeof(ow->msg)-1] = '\0';
  731. }
  732. ar0->i = pid;
  733. }
  734. void
  735. sysawait(Ar0* ar0, va_list list)
  736. {
  737. int i;
  738. int pid;
  739. Waitmsg w;
  740. usize n;
  741. char *p;
  742. /*
  743. * int await(char* s, int n);
  744. * should really be
  745. * usize await(char* s, usize n);
  746. */
  747. p = va_arg(list, char*);
  748. n = va_arg(list, int32_t);
  749. p = validaddr(p, n, 1);
  750. pid = pwait(&w);
  751. if(pid < 0){
  752. ar0->i = -1;
  753. return;
  754. }
  755. i = snprint(p, n, "%d %lud %lud %lud %q",
  756. w.pid,
  757. w.time[TUser], w.time[TSys], w.time[TReal],
  758. w.msg);
  759. ar0->i = i;
  760. }
  761. void
  762. werrstr(char *fmt, ...)
  763. {
  764. va_list va;
  765. if(up == nil)
  766. return;
  767. va_start(va, fmt);
  768. vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
  769. va_end(va);
  770. }
  771. static void
  772. generrstr(char *buf, int32_t n)
  773. {
  774. char *p, tmp[ERRMAX];
  775. if(n <= 0)
  776. error(Ebadarg);
  777. p = validaddr(buf, n, 1);
  778. if(n > sizeof tmp)
  779. n = sizeof tmp;
  780. memmove(tmp, p, n);
  781. /* make sure it's NUL-terminated */
  782. tmp[n-1] = '\0';
  783. memmove(p, up->syserrstr, n);
  784. p[n-1] = '\0';
  785. memmove(up->syserrstr, tmp, n);
  786. }
  787. void
  788. syserrstr(Ar0* ar0, va_list list)
  789. {
  790. char *err;
  791. usize nerr;
  792. /*
  793. * int errstr(char* err, uint nerr);
  794. * should really be
  795. * usize errstr(char* err, usize nerr);
  796. * but errstr always returns 0.
  797. */
  798. err = va_arg(list, char*);
  799. nerr = va_arg(list, usize);
  800. generrstr(err, nerr);
  801. ar0->i = 0;
  802. }
  803. void
  804. sys_errstr(Ar0* ar0, va_list list)
  805. {
  806. char *p;
  807. /*
  808. * int errstr(char* err);
  809. *
  810. * Deprecated; backwards compatibility only.
  811. */
  812. p = va_arg(list, char*);
  813. generrstr(p, 64);
  814. ar0->i = 0;
  815. }
  816. void
  817. sysnotify(Ar0* ar0, va_list list)
  818. {
  819. void (*f)(void*, char*);
  820. /*
  821. * int notify(void (*f)(void*, char*));
  822. */
  823. f = (void (*)(void*, char*))va_arg(list, void*);
  824. if(f != nil)
  825. validaddr(f, sizeof(void (*)(void*, char*)), 0);
  826. up->notify = f;
  827. ar0->i = 0;
  828. }
  829. void
  830. sysnoted(Ar0* ar0, va_list list)
  831. {
  832. int v;
  833. /*
  834. * int noted(int v);
  835. */
  836. v = va_arg(list, int);
  837. if(v != NRSTR && !up->notified)
  838. error(Egreg);
  839. ar0->i = 0;
  840. }
  841. void
  842. sysrendezvous(Ar0* ar0, va_list list)
  843. {
  844. Proc *p, **l;
  845. uintptr tag, val;
  846. /*
  847. * void* rendezvous(void*, void*);
  848. */
  849. tag = PTR2UINT(va_arg(list, void*));
  850. l = &REND(up->rgrp, tag);
  851. up->rendval = ~0;
  852. lock(up->rgrp);
  853. for(p = *l; p; p = p->rendhash) {
  854. if(p->rendtag == tag) {
  855. *l = p->rendhash;
  856. val = p->rendval;
  857. p->rendval = PTR2UINT(va_arg(list, void*));
  858. while(p->mach != 0)
  859. ;
  860. ready(p);
  861. unlock(up->rgrp);
  862. ar0->v = UINT2PTR(val);
  863. return;
  864. }
  865. l = &p->rendhash;
  866. }
  867. /* Going to sleep here */
  868. up->rendtag = tag;
  869. up->rendval = PTR2UINT(va_arg(list, void*));
  870. up->rendhash = *l;
  871. *l = up;
  872. up->state = Rendezvous;
  873. if(up->trace)
  874. proctrace(up, SLock, 0);
  875. unlock(up->rgrp);
  876. sched();
  877. ar0->v = UINT2PTR(up->rendval);
  878. }
  879. /*
  880. * The implementation of semaphores is complicated by needing
  881. * to avoid rescheduling in syssemrelease, so that it is safe
  882. * to call from real-time processes. This means syssemrelease
  883. * cannot acquire any qlocks, only spin locks.
  884. *
  885. * Semacquire and semrelease must both manipulate the semaphore
  886. * wait list. Lock-free linked lists only exist in theory, not
  887. * in practice, so the wait list is protected by a spin lock.
  888. *
  889. * The semaphore value *addr is stored in user memory, so it
  890. * cannot be read or written while holding spin locks.
  891. *
  892. * Thus, we can access the list only when holding the lock, and
  893. * we can access the semaphore only when not holding the lock.
  894. * This makes things interesting. Note that sleep's condition function
  895. * is called while holding two locks - r and up->rlock - so it cannot
  896. * access the semaphore value either.
  897. *
  898. * An acquirer announces its intention to try for the semaphore
  899. * by putting a Sema structure onto the wait list and then
  900. * setting Sema.waiting. After one last check of semaphore,
  901. * the acquirer sleeps until Sema.waiting==0. A releaser of n
  902. * must wake up n acquirers who have Sema.waiting set. It does
  903. * this by clearing Sema.waiting and then calling wakeup.
  904. *
  905. * There are three interesting races here.
  906. * The first is that in this particular sleep/wakeup usage, a single
  907. * wakeup can rouse a process from two consecutive sleeps!
  908. * The ordering is:
  909. *
  910. * (a) set Sema.waiting = 1
  911. * (a) call sleep
  912. * (b) set Sema.waiting = 0
  913. * (a) check Sema.waiting inside sleep, return w/o sleeping
  914. * (a) try for semaphore, fail
  915. * (a) set Sema.waiting = 1
  916. * (a) call sleep
  917. * (b) call wakeup(a)
  918. * (a) wake up again
  919. *
  920. * This is okay - semacquire will just go around the loop
  921. * again. It does mean that at the top of the for(;;) loop in
  922. * semacquire, phore.waiting might already be set to 1.
  923. *
  924. * The second is that a releaser might wake an acquirer who is
  925. * interrupted before he can acquire the lock. Since
  926. * release(n) issues only n wakeup calls -- only n can be used
  927. * anyway -- if the interrupted process is not going to use his
  928. * wakeup call he must pass it on to another acquirer.
  929. *
  930. * The third race is similar to the second but more subtle. An
  931. * acquirer sets waiting=1 and then does a final canacquire()
  932. * before going to sleep. The opposite order would result in
  933. * missing wakeups that happen between canacquire and
  934. * waiting=1. (In fact, the whole point of Sema.waiting is to
  935. * avoid missing wakeups between canacquire() and sleep().) But
  936. * there can be spurious wakeups between a successful
  937. * canacquire() and the following semdequeue(). This wakeup is
  938. * not useful to the acquirer, since he has already acquired
  939. * the semaphore. Like in the previous case, though, the
  940. * acquirer must pass the wakeup call along.
  941. *
  942. * This is all rather subtle. The code below has been verified
  943. * with the spin model /sys/src/9/port/semaphore.p. The
  944. * original code anticipated the second race but not the first
  945. * or third, which were caught only with spin. The first race
  946. * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
  947. * It was lucky that my abstract model of sleep/wakeup still managed
  948. * to preserve that behavior.
  949. *
  950. * I remain slightly concerned about memory coherence
  951. * outside of locks. The spin model does not take
  952. * queued processor writes into account so we have to
  953. * think hard. The only variables accessed outside locks
  954. * are the semaphore value itself and the boolean flag
  955. * Sema.waiting. The value is only accessed with CAS,
  956. * whose job description includes doing the right thing as
  957. * far as memory coherence across processors. That leaves
  958. * Sema.waiting. To handle it, we call coherence() before each
  959. * read and after each write. - rsc
  960. */
  961. /* Add semaphore p with addr a to list in seg. */
  962. static void
  963. semqueue(Segment* s, int* addr, Sema* p)
  964. {
  965. memset(p, 0, sizeof *p);
  966. p->addr = addr;
  967. lock(&s->sema); /* uses s->sema.Rendez.Lock, but no one else is */
  968. p->next = &s->sema;
  969. p->prev = s->sema.prev;
  970. p->next->prev = p;
  971. p->prev->next = p;
  972. unlock(&s->sema);
  973. }
  974. /* Remove semaphore p from list in seg. */
  975. static void
  976. semdequeue(Segment* s, Sema* p)
  977. {
  978. lock(&s->sema);
  979. p->next->prev = p->prev;
  980. p->prev->next = p->next;
  981. unlock(&s->sema);
  982. }
  983. /* Wake up n waiters with addr on list in seg. */
  984. static void
  985. semwakeup(Segment* s, int* addr, int n)
  986. {
  987. Sema *p;
  988. lock(&s->sema);
  989. for(p = s->sema.next; p != &s->sema && n > 0; p = p->next){
  990. if(p->addr == addr && p->waiting){
  991. p->waiting = 0;
  992. coherence();
  993. wakeup(p);
  994. n--;
  995. }
  996. }
  997. unlock(&s->sema);
  998. }
  999. /* Add delta to semaphore and wake up waiters as appropriate. */
  1000. static int
  1001. semrelease(Segment* s, int* addr, int delta)
  1002. {
  1003. int value;
  1004. do
  1005. value = *addr;
  1006. while(!CASW(addr, value, value+delta));
  1007. semwakeup(s, addr, delta);
  1008. return value+delta;
  1009. }
  1010. /* Try to acquire semaphore using compare-and-swap */
  1011. static int
  1012. canacquire(int* addr)
  1013. {
  1014. int value;
  1015. while((value = *addr) > 0){
  1016. if(CASW(addr, value, value-1))
  1017. return 1;
  1018. }
  1019. return 0;
  1020. }
  1021. /* Should we wake up? */
  1022. static int
  1023. semawoke(void* p)
  1024. {
  1025. coherence();
  1026. return !((Sema*)p)->waiting;
  1027. }
  1028. /* Acquire semaphore (subtract 1). */
  1029. static int
  1030. semacquire(Segment* s, int* addr, int block)
  1031. {
  1032. int acquired;
  1033. Sema phore;
  1034. if(canacquire(addr))
  1035. return 1;
  1036. if(!block)
  1037. return 0;
  1038. acquired = 0;
  1039. semqueue(s, addr, &phore);
  1040. for(;;){
  1041. phore.waiting = 1;
  1042. coherence();
  1043. if(canacquire(addr)){
  1044. acquired = 1;
  1045. break;
  1046. }
  1047. if(waserror())
  1048. break;
  1049. sleep(&phore, semawoke, &phore);
  1050. poperror();
  1051. }
  1052. semdequeue(s, &phore);
  1053. coherence(); /* not strictly necessary due to lock in semdequeue */
  1054. if(!phore.waiting)
  1055. semwakeup(s, addr, 1);
  1056. if(!acquired)
  1057. nexterror();
  1058. return 1;
  1059. }
  1060. /* Acquire semaphore or time-out */
  1061. static int
  1062. tsemacquire(Segment* s, int* addr, int32_t ms)
  1063. {
  1064. int acquired;
  1065. uint32_t t;
  1066. Sema phore;
  1067. if(canacquire(addr))
  1068. return 1;
  1069. if(ms == 0)
  1070. return 0;
  1071. acquired = 0;
  1072. semqueue(s, addr, &phore);
  1073. for(;;){
  1074. phore.waiting = 1;
  1075. coherence();
  1076. if(canacquire(addr)){
  1077. acquired = 1;
  1078. break;
  1079. }
  1080. if(waserror())
  1081. break;
  1082. t = sys->ticks;
  1083. tsleep(&phore, semawoke, &phore, ms);
  1084. ms -= TK2MS(sys->ticks-t);
  1085. poperror();
  1086. if(ms <= 0)
  1087. break;
  1088. }
  1089. semdequeue(s, &phore);
  1090. coherence(); /* not strictly necessary due to lock in semdequeue */
  1091. if(!phore.waiting)
  1092. semwakeup(s, addr, 1);
  1093. if(ms <= 0)
  1094. return 0;
  1095. if(!acquired)
  1096. nexterror();
  1097. return 1;
  1098. }
  1099. void
  1100. syssemacquire(Ar0* ar0, va_list list)
  1101. {
  1102. Segment *s;
  1103. int *addr, block;
  1104. /*
  1105. * int semacquire(long* addr, int block);
  1106. * should be (and will be implemented below as) perhaps
  1107. * int semacquire(int* addr, int block);
  1108. */
  1109. addr = va_arg(list, int*);
  1110. addr = validaddr(addr, sizeof(int), 1);
  1111. evenaddr(PTR2UINT(addr));
  1112. block = va_arg(list, int);
  1113. if((s = seg(up, PTR2UINT(addr), 0)) == nil)
  1114. error(Ebadarg);
  1115. if(*addr < 0)
  1116. error(Ebadarg);
  1117. ar0->i = semacquire(s, addr, block);
  1118. }
  1119. void
  1120. systsemacquire(Ar0* ar0, va_list list)
  1121. {
  1122. Segment *s;
  1123. int *addr, ms;
  1124. /*
  1125. * int tsemacquire(long* addr, ulong ms);
  1126. * should be (and will be implemented below as) perhaps
  1127. * int tsemacquire(int* addr, ulong ms);
  1128. */
  1129. addr = va_arg(list, int*);
  1130. addr = validaddr(addr, sizeof(int), 1);
  1131. evenaddr(PTR2UINT(addr));
  1132. ms = va_arg(list, uint32_t);
  1133. if((s = seg(up, PTR2UINT(addr), 0)) == nil)
  1134. error(Ebadarg);
  1135. if(*addr < 0)
  1136. error(Ebadarg);
  1137. ar0->i = tsemacquire(s, addr, ms);
  1138. }
  1139. void
  1140. syssemrelease(Ar0* ar0, va_list list)
  1141. {
  1142. Segment *s;
  1143. int *addr, delta;
  1144. /*
  1145. * long semrelease(long* addr, long count);
  1146. * should be (and will be implemented below as) perhaps
  1147. * int semrelease(int* addr, int count);
  1148. */
  1149. addr = va_arg(list, int*);
  1150. addr = validaddr(addr, sizeof(int), 1);
  1151. evenaddr(PTR2UINT(addr));
  1152. delta = va_arg(list, int);
  1153. if((s = seg(up, PTR2UINT(addr), 0)) == nil)
  1154. error(Ebadarg);
  1155. if(delta < 0 || *addr < 0)
  1156. error(Ebadarg);
  1157. ar0->i = semrelease(s, addr, delta);
  1158. }