sysproc.c 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "tos.h"
  11. #include "../port/lib.h"
  12. #include "mem.h"
  13. #include "dat.h"
  14. #include "fns.h"
  15. #include "../port/error.h"
  16. #include "../port/edf.h"
  17. #include <trace.h>
  18. #undef DBG
  19. #define DBG \
  20. if(0) \
  21. print
  22. void
  23. sysrfork(Ar0 *ar0, ...)
  24. {
  25. Proc *up = externup();
  26. Proc *p;
  27. int flag, i, n, pid;
  28. Fgrp *ofg;
  29. Pgrp *opg;
  30. Rgrp *org;
  31. Egrp *oeg;
  32. Mach *wm;
  33. va_list list;
  34. va_start(list, ar0);
  35. /*
  36. * int rfork(int);
  37. */
  38. flag = va_arg(list, int);
  39. va_end(list);
  40. /* Check flags before we commit */
  41. if((flag & (RFFDG | RFCFDG)) == (RFFDG | RFCFDG))
  42. error(Ebadarg);
  43. if((flag & (RFNAMEG | RFCNAMEG)) == (RFNAMEG | RFCNAMEG))
  44. error(Ebadarg);
  45. if((flag & (RFENVG | RFCENVG)) == (RFENVG | RFCENVG))
  46. error(Ebadarg);
  47. if((flag & (RFPREPAGE | RFCPREPAGE)) == (RFPREPAGE | RFCPREPAGE))
  48. error(Ebadarg);
  49. if((flag & (RFCORE | RFCCORE)) == (RFCORE | RFCCORE))
  50. error(Ebadarg);
  51. if(flag & RFCORE && up->wired != nil)
  52. error("wired proc cannot move to ac");
  53. if((flag & RFPROC) == 0){
  54. if(flag & (RFMEM | RFNOWAIT))
  55. error(Ebadarg);
  56. if(flag & (RFFDG | RFCFDG)){
  57. ofg = up->fgrp;
  58. if(flag & RFFDG)
  59. up->fgrp = dupfgrp(ofg);
  60. else
  61. up->fgrp = dupfgrp(nil);
  62. closefgrp(ofg);
  63. }
  64. if(flag & (RFNAMEG | RFCNAMEG)){
  65. opg = up->pgrp;
  66. up->pgrp = newpgrp();
  67. if(flag & RFNAMEG)
  68. pgrpcpy(up->pgrp, opg);
  69. /* inherit noattach */
  70. up->pgrp->noattach = opg->noattach;
  71. closepgrp(opg);
  72. }
  73. if(flag & RFNOMNT)
  74. up->pgrp->noattach = 1;
  75. if(flag & RFREND){
  76. org = up->rgrp;
  77. up->rgrp = newrgrp();
  78. closergrp(org);
  79. }
  80. if(flag & (RFENVG | RFCENVG)){
  81. oeg = up->egrp;
  82. up->egrp = smalloc(sizeof(Egrp));
  83. up->egrp->r.ref = 1;
  84. if(flag & RFENVG)
  85. envcpy(up->egrp, oeg);
  86. closeegrp(oeg);
  87. }
  88. if(flag & RFNOTEG)
  89. up->noteid = incref(&noteidalloc);
  90. if(flag & (RFPREPAGE | RFCPREPAGE)){
  91. up->prepagemem = flag & RFPREPAGE;
  92. nixprepage(-1);
  93. }
  94. if(flag & RFCORE){
  95. up->ac = getac(up, -1);
  96. up->procctl = Proc_toac;
  97. } else if(flag & RFCCORE){
  98. if(up->ac != nil)
  99. up->procctl = Proc_totc;
  100. }
  101. ar0->i = 0;
  102. return;
  103. }
  104. p = newproc();
  105. if(flag & RFCORE){
  106. if(!waserror()){
  107. p->ac = getac(p, -1);
  108. p->procctl = Proc_toac;
  109. poperror();
  110. } else {
  111. print("warning: rfork: no available ac for the child, it runs in the tc\n");
  112. p->procctl = 0;
  113. }
  114. }
  115. if(up->trace)
  116. p->trace = 1;
  117. p->scallnr = up->scallnr;
  118. memmove(p->arg, up->arg, sizeof(up->arg));
  119. p->nerrlab = 0;
  120. p->slash = up->slash;
  121. p->dot = up->dot;
  122. incref(&p->dot->r);
  123. memmove(p->note, up->note, sizeof(p->note));
  124. p->privatemem = up->privatemem;
  125. p->noswap = up->noswap;
  126. p->nnote = up->nnote;
  127. p->plan9 = up->plan9;
  128. p->notified = 0;
  129. p->lastnote = up->lastnote;
  130. p->notify = up->notify;
  131. p->ureg = up->ureg;
  132. p->prepagemem = up->prepagemem;
  133. p->dbgreg = 0;
  134. /* Make a new set of memory segments */
  135. n = flag & RFMEM;
  136. qlock(&p->seglock);
  137. if(waserror()){
  138. qunlock(&p->seglock);
  139. nexterror();
  140. }
  141. for(i = 0; i < NSEG; i++)
  142. if(up->seg[i])
  143. p->seg[i] = dupseg(up->seg, i, n);
  144. qunlock(&p->seglock);
  145. poperror();
  146. /* File descriptors */
  147. if(flag & (RFFDG | RFCFDG)){
  148. if(flag & RFFDG)
  149. p->fgrp = dupfgrp(up->fgrp);
  150. else
  151. p->fgrp = dupfgrp(nil);
  152. } else {
  153. p->fgrp = up->fgrp;
  154. incref(&p->fgrp->r);
  155. }
  156. /* Process groups */
  157. if(flag & (RFNAMEG | RFCNAMEG)){
  158. p->pgrp = newpgrp();
  159. if(flag & RFNAMEG)
  160. pgrpcpy(p->pgrp, up->pgrp);
  161. /* inherit noattach */
  162. p->pgrp->noattach = up->pgrp->noattach;
  163. } else {
  164. p->pgrp = up->pgrp;
  165. incref(&p->pgrp->r);
  166. }
  167. if(flag & RFNOMNT)
  168. up->pgrp->noattach = 1;
  169. if(flag & RFREND)
  170. p->rgrp = newrgrp();
  171. else {
  172. incref(&up->rgrp->r);
  173. p->rgrp = up->rgrp;
  174. }
  175. /* Environment group */
  176. if(flag & (RFENVG | RFCENVG)){
  177. p->egrp = smalloc(sizeof(Egrp));
  178. p->egrp->r.ref = 1;
  179. if(flag & RFENVG)
  180. envcpy(p->egrp, up->egrp);
  181. } else {
  182. p->egrp = up->egrp;
  183. incref(&p->egrp->r);
  184. }
  185. p->hang = up->hang;
  186. p->procmode = up->procmode;
  187. /* Craft a return frame which will cause the child to pop out of
  188. * the scheduler in user mode with the return register zero
  189. */
  190. sysrforkchild(p, up);
  191. p->parent = up;
  192. p->parentpid = up->pid;
  193. if(flag & RFNOWAIT)
  194. p->parentpid = 0;
  195. else {
  196. lock(&up->exl);
  197. up->nchild++;
  198. unlock(&up->exl);
  199. }
  200. if((flag & RFNOTEG) == 0)
  201. p->noteid = up->noteid;
  202. pid = p->pid;
  203. memset(p->time, 0, sizeof(p->time));
  204. p->time[TReal] = sys->ticks;
  205. if(flag & (RFPREPAGE | RFCPREPAGE)){
  206. p->prepagemem = flag & RFPREPAGE;
  207. /*
  208. * BUG: this is prepaging our memory, not
  209. * that of the child, but at least we
  210. * will do the copy on write.
  211. */
  212. nixprepage(-1);
  213. }
  214. kstrdup(&p->text, up->text);
  215. kstrdup(&p->user, up->user);
  216. /*
  217. * since the bss/data segments are now shareable,
  218. * any mmu info about this process is now stale
  219. * (i.e. has bad properties) and has to be discarded.
  220. */
  221. mmuflush();
  222. p->basepri = up->basepri;
  223. p->priority = up->basepri;
  224. p->fixedpri = up->fixedpri;
  225. p->mp = up->mp;
  226. wm = up->wired;
  227. if(wm)
  228. procwired(p, wm->machno);
  229. p->color = up->color;
  230. ready(p);
  231. sched();
  232. ar0->i = pid;
  233. }
  234. #if 0
  235. static u64
  236. vl2be(u64 v)
  237. {
  238. u8 *p;
  239. p = (u8*)&v;
  240. return ((u64)((p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3])<<32)
  241. |((u64)(p[4]<<24)|(p[5]<<16)|(p[6]<<8)|p[7]);
  242. }
  243. static u32
  244. l2be(i32 l)
  245. {
  246. u8 *cp;
  247. cp = (u8*)&l;
  248. return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
  249. }
  250. #endif
  251. /*
  252. * flags can ONLY specify that you want an AC for you, or
  253. * that you want an XC for you.
  254. */
  255. static void
  256. execac(Ar0 *ar0, int flags, char *ufile, char **argv)
  257. {
  258. Proc *up = externup();
  259. Fgrp *fg;
  260. Tos *tos;
  261. Chan *chan, *ichan;
  262. Image *img;
  263. Segment *s;
  264. Ldseg *ldseg;
  265. int argc, i, n, nldseg;
  266. char *a, *elem, *file, *p;
  267. // This line array is an accident waiting to happen but ...
  268. char line[64], aoutheader[64], *progarg[sizeof(line) / 2 + 1];
  269. i32 hdrsz;
  270. usize entry, stack;
  271. int plan9 = 0;
  272. file = nil;
  273. elem = nil;
  274. switch(flags){
  275. case EXTC:
  276. case EXXC:
  277. break;
  278. case EXAC:
  279. up->ac = getac(up, -1);
  280. break;
  281. default:
  282. error("unknown execac flag");
  283. }
  284. if(waserror()){
  285. DBG("execac: failing: %s\n", up->errstr);
  286. free(file);
  287. free(elem);
  288. if(flags == EXAC && up->ac != nil)
  289. up->ac->proc = nil;
  290. up->ac = nil;
  291. nexterror();
  292. }
  293. /*
  294. * Open the file, remembering the final element and the full name.
  295. */
  296. argc = 0;
  297. file = validnamedup(ufile, 1);
  298. DBG("execac: up %#p file %s\n", up, file);
  299. if(up->trace)
  300. proctracepid(up);
  301. ichan = namec(file, Aopen, OEXEC, 0);
  302. if(waserror()){
  303. iprint("ERROR ON OPEN\n");
  304. cclose(ichan);
  305. nexterror();
  306. }
  307. kstrdup(&elem, up->genbuf);
  308. // TODO: we really messed this up at some point. This needs to be a loop.
  309. // See 9legacy to get some idea of what it has to look like.
  310. /*
  311. * Read the header.
  312. * If it's a #!, fill in progarg[] with info then read a new header
  313. * from the file indicated by the #!.
  314. * The #! line must be less than sizeof(Exec) in size,
  315. * including the terminating \n.
  316. */
  317. hdrsz = ichan->dev->read(ichan, line, sizeof line, 0);
  318. if(hdrsz < 2)
  319. error(Ebadexec);
  320. if(line[0] == '#' && line[1] == '!'){
  321. p = memchr(line, '\n', MIN(sizeof line, hdrsz));
  322. if(p == nil)
  323. error(Ebadexec);
  324. *p = '\0';
  325. // N.B.: does not copy line array, just sets pointers into it.
  326. argc = tokenize(line + 2, progarg, nelem(progarg));
  327. if(argc == 0)
  328. error(Ebadexec);
  329. /* The original file becomes an extra arg after #! line */
  330. progarg[argc++] = file;
  331. /*
  332. * Take the #! $0 as a file to open, and replace
  333. * $0 with the original path's name.
  334. */
  335. p = progarg[0];
  336. progarg[0] = elem;
  337. chan = nil; /* in case namec errors out */
  338. USED(chan);
  339. chan = namec(p, Aopen, OEXEC, 0);
  340. } else {
  341. chan = ichan;
  342. incref(&ichan->r);
  343. }
  344. /* chan is the chan to use, initial or not. ichan is irrelevant now */
  345. cclose(ichan);
  346. poperror();
  347. /*
  348. * #! has had its chance, now we need a real binary.
  349. */
  350. // line has the command and arguments as text.
  351. // OR it has a file header of a binary.
  352. // Just reread it. This entire function needs a redo,
  353. // but for now, let's just try to make it work correctly.
  354. // aoutldseg does no file i/o for its test.
  355. // so give it first dibs.
  356. // -1 means it's not a.out
  357. // 0 means a.out but something did not end well
  358. // > 0 means it's a good a.out
  359. hdrsz = chan->dev->read(chan, aoutheader, sizeof aoutheader, 0);
  360. if(hdrsz < 2)
  361. error(Ebadexec);
  362. nldseg = aoutldseg(aoutheader, &entry, &ldseg, cputype, BIGPGSZ);
  363. switch(nldseg){
  364. default:
  365. plan9 = 1;
  366. break;
  367. case 0:
  368. print("execac: execaout returned 0 segs!\n");
  369. error(Ebadexec);
  370. case -1:
  371. nldseg = elf64ldseg(chan, &entry, &ldseg, cputype, BIGPGSZ);
  372. if(nldseg == 0){
  373. print("execac: elf64ldseg returned 0 segs!\n");
  374. error(Ebadexec);
  375. }
  376. }
  377. /* TODO(aki): not sure I see the point
  378. if(up->ac != nil && up->ac != machp())
  379. up->color = corecolor(up->ac->machno);
  380. else
  381. up->color = corecolor(machp()->machno);
  382. */
  383. /*
  384. * The new stack is temporarily mapped elsewhere.
  385. * The stack contains, in descending address order:
  386. * a structure containing housekeeping and profiling data (Tos);
  387. * argument strings;
  388. * array of vectors to the argument strings with a terminating
  389. * nil (argv).
  390. * When the exec is committed, this temporary stack is relocated
  391. * to become the actual stack segment.
  392. * The architecture-dependent code which jumps to the new image
  393. * will also push a count of the argument array onto the stack (argc).
  394. */
  395. qlock(&up->seglock);
  396. int sno = -1;
  397. if(waserror()){
  398. if(sno != -1 && up->seg[sno] != nil){
  399. putseg(up->seg[sno]);
  400. up->seg[sno] = nil;
  401. }
  402. qunlock(&up->seglock);
  403. nexterror();
  404. }
  405. for(i = 0; i < NSEG; i++)
  406. if(up->seg[i] == nil)
  407. break;
  408. if(i == NSEG)
  409. error("exeac: no free segment slots");
  410. sno = i;
  411. up->seg[sno] = newseg(SG_STACK | SG_READ | SG_WRITE, TSTKTOP - USTKSIZE, USTKSIZE / BIGPGSZ);
  412. up->seg[sno]->color = up->color;
  413. /*
  414. * Stack is a pointer into the temporary stack
  415. * segment, and will move as items are pushed.
  416. */
  417. stack = TSTKTOP - sizeof(Tos);
  418. /*
  419. * First, the top-of-stack structure.
  420. */
  421. tos = (Tos *)(USTKTOP - sizeof(Tos));
  422. tos->cyclefreq = sys->cyclefreq;
  423. cycles((u64 *)&tos->pcycles);
  424. tos->pcycles = -tos->pcycles;
  425. tos->kcycles = tos->pcycles;
  426. tos->clock = 0;
  427. /*
  428. * Next push any arguments found from a #! header.
  429. */
  430. for(i = 0; i < argc; i++){
  431. n = strlen(progarg[i]) + 1;
  432. stack -= n;
  433. memmove(UINT2PTR(stack), progarg[i], n);
  434. }
  435. /*
  436. * Copy the strings pointed to by the syscall argument argv into
  437. * the temporary stack segment, being careful to check
  438. * the strings argv points to are valid.
  439. */
  440. for(i = 0;; i++, argv++){
  441. a = *(char **)validaddr(argv, sizeof(char **), 0);
  442. if(a == nil)
  443. break;
  444. a = validaddr(a, 1, 0);
  445. n = ((char *)vmemchr(a, 0, 0x7fffffff) - a) + 1;
  446. /*
  447. * This futzing is so argv[0] gets validated even
  448. * though it will be thrown away if this is a shell
  449. * script.
  450. */
  451. if(argc > 0 && i == 0)
  452. continue;
  453. /*
  454. * Before copying the string into the temporary stack,
  455. * which might involve a demand-page, check the string
  456. * will not overflow the bottom of the stack.
  457. */
  458. stack -= n;
  459. if(stack < TSTKTOP - USTKSIZE)
  460. error(Enovmem);
  461. p = UINT2PTR(stack);
  462. memmove(p, a, n);
  463. p[n - 1] = 0;
  464. argc++;
  465. }
  466. if(argc < 1)
  467. error(Ebadexec);
  468. /*
  469. * Before pushing the argument pointers onto the temporary stack,
  470. * which might involve a demand-page, check there is room for the
  471. * terminating nil pointer, plus pointers, plus some slop for however
  472. * argc might be passed on the stack by sysexecregs (give a page
  473. * of slop, it is an overestimate, but why not).
  474. * Sysexecstack does any architecture-dependent stack alignment.
  475. * Keep a copy of the start of the argument strings before alignment
  476. * so up->args can be created later.
  477. * Although the argument vectors are being pushed onto the stack in
  478. * the temporary segment, the values must be adjusted to reflect
  479. * the segment address after it replaces the current SSEG.
  480. */
  481. a = p = UINT2PTR(stack);
  482. stack = sysexecstack(stack, argc);
  483. if(stack - (argc + 2) * sizeof(char **) - BIGPGSZ < TSTKTOP - USTKSIZE){
  484. //iprint("stck too small?\n");
  485. error(Ebadexec);
  486. }
  487. argv = (char **)stack;
  488. *--argv = nil;
  489. for(i = 0; i < argc; i++){
  490. *--argv = p + (USTKTOP - TSTKTOP);
  491. p += strlen(p) + 1;
  492. }
  493. *--argv = (void *)(usize)argc;
  494. /*
  495. * Make a good faith copy of the args in up->args using the strings
  496. * in the temporary stack segment. The length must be > 0 as it
  497. * includes the \0 on the last argument and argc was checked earlier
  498. * to be > 0. After the memmove, compensate for any UTF character
  499. * boundary before placing the terminating \0.
  500. */
  501. n = p - a;
  502. if(n <= 0)
  503. error(Egreg);
  504. if(n > 128)
  505. n = 128;
  506. p = smalloc(n);
  507. if(waserror()){
  508. free(p);
  509. nexterror();
  510. }
  511. memmove(p, a, n);
  512. while(n > 0 && (p[n - 1] & 0xc0) == 0x80)
  513. n--;
  514. p[n - 1] = '\0';
  515. /*
  516. * All the argument processing is now done, ready to commit.
  517. */
  518. free(up->text);
  519. up->text = elem;
  520. elem = nil;
  521. free(up->args);
  522. up->args = p;
  523. up->nargs = n;
  524. poperror(); /* p (up->args) */
  525. /*
  526. * Close on exec
  527. */
  528. fg = up->fgrp;
  529. for(i = 0; i <= fg->maxfd; i++)
  530. fdclose(i, CCEXEC);
  531. /*
  532. * Free old memory, except for the temp stack (obviously)
  533. */
  534. s = up->seg[sno];
  535. for(i = 0; i < NSEG; i++){
  536. if(up->seg[i] != s)
  537. putseg(up->seg[i]);
  538. up->seg[i] = nil;
  539. }
  540. /* put the stack in first */
  541. sno = 0;
  542. up->seg[sno++] = s;
  543. s->base = USTKTOP - USTKSIZE;
  544. s->top = USTKTOP;
  545. relocateseg(s, USTKTOP - TSTKTOP);
  546. img = nil;
  547. usize datalim;
  548. datalim = 0;
  549. for(i = 0; i < nldseg; i++){
  550. if(img == nil){
  551. img = attachimage(ldseg[i].type, chan, up->color,
  552. ldseg[i].pg0vaddr,
  553. (ldseg[i].pg0off + ldseg[i].memsz + BIGPGSZ - 1) / BIGPGSZ);
  554. s = img->s;
  555. s->flushme = 1;
  556. if(img->color != up->color)
  557. up->color = img->color;
  558. unlock(&img->r.l);
  559. } else {
  560. s = newseg(ldseg[i].type, ldseg[i].pg0vaddr, (ldseg[i].pg0off + ldseg[i].memsz + BIGPGSZ - 1) / BIGPGSZ);
  561. s->color = up->color;
  562. incref(&img->r);
  563. s->image = img;
  564. }
  565. s->ldseg = ldseg[i];
  566. up->seg[sno++] = s;
  567. if(datalim < ldseg[i].pg0vaddr + ldseg[i].memsz)
  568. datalim = ldseg[i].pg0vaddr + ldseg[i].memsz;
  569. }
  570. /* BSS. Zero fill on demand for TS */
  571. s = newseg(SG_BSS | SG_READ | SG_WRITE, (datalim + BIGPGSZ - 1) & ~(BIGPGSZ - 1), 0);
  572. up->seg[sno++] = s;
  573. s->color = up->color;
  574. for(i = 0; i < sno; i++){
  575. s = up->seg[i];
  576. DBG(
  577. "execac %d %s(%c%c%c) %p:%p va %p off %p fsz %d msz %d\n",
  578. up->pid, segtypes[s->type & SG_TYPE],
  579. (s->type & SG_READ) != 0 ? 'r' : '-',
  580. (s->type & SG_WRITE) != 0 ? 'w' : '-',
  581. (s->type & SG_EXEC) != 0 ? 'x' : '-',
  582. s->base, s->top,
  583. s->ldseg.pg0vaddr + s->ldseg.pg0off,
  584. s->ldseg.pg0fileoff + s->ldseg.pg0off,
  585. s->ldseg.filesz,
  586. s->ldseg.memsz);
  587. }
  588. /* the color of the stack was decided when we created it before,
  589. * it may have nothing to do with the color of other segments.
  590. */
  591. qunlock(&up->seglock);
  592. poperror(); /* seglock */
  593. /*
  594. * '/' processes are higher priority
  595. * aki: why bother?
  596. *
  597. * if(chan->dev->dc == L'/')
  598. * up->basepri = PriRoot;
  599. */
  600. up->priority = up->basepri;
  601. poperror(); /* chan, elem, file */
  602. cclose(chan);
  603. free(file);
  604. /*
  605. * At this point, the mmu contains info about the old address
  606. * space and needs to be flushed
  607. */
  608. mmuflush();
  609. if(up->prepagemem || flags == EXAC)
  610. nixprepage(-1);
  611. qlock(&up->debug);
  612. up->nnote = 0;
  613. up->notify = 0;
  614. up->notified = 0;
  615. up->privatemem = 0;
  616. sysprocsetup(up);
  617. qunlock(&up->debug);
  618. if(up->hang)
  619. up->procctl = Proc_stopme;
  620. /* we need to compte the value of &argv in user mode and then push that. */
  621. ar0->v = sysexecregs(entry, TSTKTOP - PTR2UINT(argv), (void *)(USTKTOP - sizeof(Tos)));
  622. if(flags == EXAC){
  623. up->procctl = Proc_toac;
  624. up->prepagemem = 1;
  625. }
  626. up->plan9 = plan9;
  627. }
  628. void
  629. sysexecac(Ar0 *ar0, ...)
  630. {
  631. int flags;
  632. char *file, **argv;
  633. va_list list;
  634. va_start(list, ar0);
  635. /*
  636. * void* execac(int flags, char* name, char* argv[]);
  637. */
  638. flags = va_arg(list, unsigned int);
  639. file = va_arg(list, char *);
  640. file = validaddr(file, 1, 0);
  641. argv = va_arg(list, char **);
  642. va_end(list);
  643. evenaddr(PTR2UINT(argv));
  644. execac(ar0, flags, file, argv);
  645. }
  646. void
  647. sysexec(Ar0 *ar0, ...)
  648. {
  649. char *file, **argv;
  650. va_list list;
  651. va_start(list, ar0);
  652. /*
  653. * void* exec(char* name, char* argv[]);
  654. */
  655. file = va_arg(list, char *);
  656. file = validaddr(file, 1, 0);
  657. argv = va_arg(list, char **);
  658. va_end(list);
  659. evenaddr(PTR2UINT(argv));
  660. execac(ar0, EXTC, file, argv);
  661. }
  662. int
  663. return0(void *v)
  664. {
  665. return 0;
  666. }
  667. void
  668. syssleep(Ar0 *ar0, ...)
  669. {
  670. Proc *up = externup();
  671. i64 ms;
  672. va_list list;
  673. va_start(list, ar0);
  674. /*
  675. * int sleep(long millisecs);
  676. */
  677. ms = va_arg(list, i64);
  678. va_end(list);
  679. ar0->i = 0;
  680. if(ms <= 0){
  681. if(up->edf && (up->edf->flags & Admitted))
  682. edfyield();
  683. else
  684. yield();
  685. return;
  686. }
  687. if(ms < TK2MS(1))
  688. ms = TK2MS(1);
  689. tsleep(&up->sleep, return0, 0, ms);
  690. }
  691. void
  692. sysalarm(Ar0 *ar0, ...)
  693. {
  694. u64 ms;
  695. va_list list;
  696. va_start(list, ar0);
  697. /*
  698. * long alarm(unsigned long millisecs);
  699. * Odd argument type...
  700. */
  701. ms = va_arg(list, u64);
  702. va_end(list);
  703. ar0->vl = procalarm(ms);
  704. }
  705. void
  706. sysexits(Ar0 *ar0, ...)
  707. {
  708. Proc *up = externup();
  709. char *status;
  710. char *inval = "invalid exit string";
  711. char buf[ERRMAX];
  712. va_list list;
  713. va_start(list, ar0);
  714. /*
  715. * void exits(char *msg);
  716. */
  717. status = va_arg(list, char *);
  718. va_end(list);
  719. if(status){
  720. if(waserror())
  721. status = inval;
  722. else {
  723. status = validaddr(status, 1, 0);
  724. if(vmemchr(status, 0, ERRMAX) == 0){
  725. memmove(buf, status, ERRMAX);
  726. buf[ERRMAX - 1] = 0;
  727. status = buf;
  728. }
  729. poperror();
  730. }
  731. }
  732. pexit(status, 1);
  733. }
  734. void
  735. sysawait(Ar0 *ar0, ...)
  736. {
  737. int i;
  738. int pid;
  739. Waitmsg w;
  740. usize n;
  741. char *p;
  742. va_list list;
  743. va_start(list, ar0);
  744. /*
  745. * int await(char* s, int n);
  746. * should really be
  747. * usize await(char* s, usize n);
  748. */
  749. p = va_arg(list, char *);
  750. n = va_arg(list, i32);
  751. va_end(list);
  752. p = validaddr(p, n, 1);
  753. pid = pwait(&w);
  754. if(pid < 0){
  755. ar0->i = -1;
  756. return;
  757. }
  758. i = snprint(p, n, "%d %lu %lu %lu %q",
  759. w.pid,
  760. w.time[TUser], w.time[TSys], w.time[TReal],
  761. w.msg);
  762. ar0->i = i;
  763. }
  764. void
  765. werrstr(char *fmt, ...)
  766. {
  767. Proc *up = externup();
  768. va_list va;
  769. if(up == nil)
  770. return;
  771. va_start(va, fmt);
  772. vseprint(up->syserrstr, up->syserrstr + ERRMAX, fmt, va);
  773. va_end(va);
  774. }
  775. static void
  776. generrstr(char *buf, i32 n)
  777. {
  778. Proc *up = externup();
  779. char *p, tmp[ERRMAX];
  780. if(n <= 0)
  781. error(Ebadarg);
  782. p = validaddr(buf, n, 1);
  783. if(n > sizeof tmp)
  784. n = sizeof tmp;
  785. memmove(tmp, p, n);
  786. /* make sure it's NUL-terminated */
  787. tmp[n - 1] = '\0';
  788. memmove(p, up->syserrstr, n);
  789. p[n - 1] = '\0';
  790. memmove(up->syserrstr, tmp, n);
  791. }
  792. void
  793. syserrstr(Ar0 *ar0, ...)
  794. {
  795. char *err;
  796. usize nerr;
  797. va_list list;
  798. va_start(list, ar0);
  799. /*
  800. * int errstr(char* err, u32 nerr);
  801. * should really be
  802. * usize errstr(char* err, usize nerr);
  803. * but errstr always returns 0.
  804. */
  805. err = va_arg(list, char *);
  806. nerr = va_arg(list, usize);
  807. va_end(list);
  808. generrstr(err, nerr);
  809. ar0->i = 0;
  810. }
  811. void
  812. sysnotify(Ar0 *ar0, ...)
  813. {
  814. Proc *up = externup();
  815. void (*f)(void *, char *);
  816. va_list list;
  817. va_start(list, ar0);
  818. /*
  819. * int notify(void (*f)(void*, char*));
  820. */
  821. f = (void (*)(void *, char *))va_arg(list, void *);
  822. va_end(list);
  823. if(f != nil)
  824. validaddr(f, sizeof(void (*)(void *, char *)), 0);
  825. up->notify = f;
  826. ar0->i = 0;
  827. }
  828. void
  829. sysnoted(Ar0 *ar0, ...)
  830. {
  831. Proc *up = externup();
  832. int v;
  833. va_list list;
  834. va_start(list, ar0);
  835. /*
  836. * int noted(int v);
  837. */
  838. v = va_arg(list, int);
  839. va_end(list);
  840. if(v != NRSTR && !up->notified)
  841. error(Egreg);
  842. ar0->i = 0;
  843. }
  844. void
  845. sysr0(Ar0 *ar0, ...)
  846. {
  847. Proc *up = externup();
  848. dumpgpr(up->ureg);
  849. ar0->i = 0;
  850. }
  851. void
  852. sysrendezvous(Ar0 *ar0, ...)
  853. {
  854. Proc *up = externup();
  855. Proc *p, **l;
  856. usize tag, val;
  857. va_list list;
  858. va_start(list, ar0);
  859. /*
  860. * void* rendezvous(void*, void*);
  861. */
  862. tag = PTR2UINT(va_arg(list, void *));
  863. l = &REND(up->rgrp, tag);
  864. up->rendval = ~0;
  865. lock(&up->rgrp->r.l);
  866. for(p = *l; p; p = p->rendhash){
  867. if(p->rendtag == tag){
  868. *l = p->rendhash;
  869. val = p->rendval;
  870. p->rendval = PTR2UINT(va_arg(list, void *));
  871. while(p->mach != 0)
  872. ;
  873. ready(p);
  874. unlock(&up->rgrp->r.l);
  875. ar0->v = UINT2PTR(val);
  876. return;
  877. }
  878. l = &p->rendhash;
  879. }
  880. /* Going to sleep here */
  881. up->rendtag = tag;
  882. up->rendval = PTR2UINT(va_arg(list, void *));
  883. va_end(list);
  884. up->rendhash = *l;
  885. *l = up;
  886. up->state = Rendezvous;
  887. if(up->trace)
  888. proctrace(up, SLock, 0);
  889. unlock(&up->rgrp->r.l);
  890. sched();
  891. ar0->v = UINT2PTR(up->rendval);
  892. }
  893. /*
  894. * The implementation of semaphores is complicated by needing
  895. * to avoid rescheduling in syssemrelease, so that it is safe
  896. * to call from real-time processes. This means syssemrelease
  897. * cannot acquire any qlocks, only spin locks.
  898. *
  899. * Semacquire and semrelease must both manipulate the semaphore
  900. * wait list. Lock-free linked lists only exist in theory, not
  901. * in practice, so the wait list is protected by a spin lock.
  902. *
  903. * The semaphore value *addr is stored in user memory, so it
  904. * cannot be read or written while holding spin locks.
  905. *
  906. * Thus, we can access the list only when holding the lock, and
  907. * we can access the semaphore only when not holding the lock.
  908. * This makes things interesting. Note that sleep's condition function
  909. * is called while holding two locks - r and up->rlock - so it cannot
  910. * access the semaphore value either.
  911. *
  912. * An acquirer announces its intention to try for the semaphore
  913. * by putting a Sema structure onto the wait list and then
  914. * setting Sema.waiting. After one last check of semaphore,
  915. * the acquirer sleeps until Sema.waiting==0. A releaser of n
  916. * must wake up n acquirers who have Sema.waiting set. It does
  917. * this by clearing Sema.waiting and then calling wakeup.
  918. *
  919. * There are three interesting races here.
  920. * The first is that in this particular sleep/wakeup usage, a single
  921. * wakeup can rouse a process from two consecutive sleeps!
  922. * The ordering is:
  923. *
  924. * (a) set Sema.waiting = 1
  925. * (a) call sleep
  926. * (b) set Sema.waiting = 0
  927. * (a) check Sema.waiting inside sleep, return w/o sleeping
  928. * (a) try for semaphore, fail
  929. * (a) set Sema.waiting = 1
  930. * (a) call sleep
  931. * (b) call wakeup(a)
  932. * (a) wake up again
  933. *
  934. * This is okay - semacquire will just go around the loop
  935. * again. It does mean that at the top of the for(;;) loop in
  936. * semacquire, phore.waiting might already be set to 1.
  937. *
  938. * The second is that a releaser might wake an acquirer who is
  939. * interrupted before he can acquire the lock. Since
  940. * release(n) issues only n wakeup calls -- only n can be used
  941. * anyway -- if the interrupted process is not going to use his
  942. * wakeup call he must pass it on to another acquirer.
  943. *
  944. * The third race is similar to the second but more subtle. An
  945. * acquirer sets waiting=1 and then does a final canacquire()
  946. * before going to sleep. The opposite order would result in
  947. * missing wakeups that happen between canacquire and
  948. * waiting=1. (In fact, the whole point of Sema.waiting is to
  949. * avoid missing wakeups between canacquire() and sleep().) But
  950. * there can be spurious wakeups between a successful
  951. * canacquire() and the following semdequeue(). This wakeup is
  952. * not useful to the acquirer, since he has already acquired
  953. * the semaphore. Like in the previous case, though, the
  954. * acquirer must pass the wakeup call along.
  955. *
  956. * This is all rather subtle. The code below has been verified
  957. * with the spin model /sys/src/9/port/semaphore.p. The
  958. * original code anticipated the second race but not the first
  959. * or third, which were caught only with spin. The first race
  960. * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
  961. * It was lucky that my abstract model of sleep/wakeup still managed
  962. * to preserve that behavior.
  963. *
  964. * I remain slightly concerned about memory coherence
  965. * outside of locks. The spin model does not take
  966. * queued processor writes into account so we have to
  967. * think hard. The only variables accessed outside locks
  968. * are the semaphore value itself and the boolean flag
  969. * Sema.waiting. The value is only accessed with CAS,
  970. * whose job description includes doing the right thing as
  971. * far as memory coherence across processors. That leaves
  972. * Sema.waiting. To handle it, we call coherence() before each
  973. * read and after each write. - rsc
  974. */
  975. /* Add semaphore p with addr a to list in seg. */
  976. static void
  977. semqueue(Segment *s, int *addr, Sema *p)
  978. {
  979. memset(p, 0, sizeof *p);
  980. p->addr = addr;
  981. lock(&s->sema.rend.l); /* uses s->sema.Rendez.Lock, but no one else is */
  982. p->next = &s->sema;
  983. p->prev = s->sema.prev;
  984. p->next->prev = p;
  985. p->prev->next = p;
  986. unlock(&s->sema.rend.l);
  987. }
  988. /* Remove semaphore p from list in seg. */
  989. static void
  990. semdequeue(Segment *s, Sema *p)
  991. {
  992. lock(&s->sema.rend.l);
  993. p->next->prev = p->prev;
  994. p->prev->next = p->next;
  995. unlock(&s->sema.rend.l);
  996. }
  997. /* Wake up n waiters with addr on list in seg. */
  998. static void
  999. semwakeup(Segment *s, int *addr, int n)
  1000. {
  1001. Sema *p;
  1002. lock(&s->sema.rend.l);
  1003. for(p = s->sema.next; p != &s->sema && n > 0; p = p->next){
  1004. if(p->addr == addr && p->waiting){
  1005. p->waiting = 0;
  1006. coherence();
  1007. wakeup(&p->rend);
  1008. n--;
  1009. }
  1010. }
  1011. unlock(&s->sema.rend.l);
  1012. }
  1013. /* Add delta to semaphore and wake up waiters as appropriate. */
  1014. static int
  1015. semrelease(Segment *s, int *addr, int delta)
  1016. {
  1017. int value;
  1018. do
  1019. value = *addr;
  1020. while(!CASW(addr, value, value + delta));
  1021. semwakeup(s, addr, delta);
  1022. return value + delta;
  1023. }
  1024. /* Try to acquire semaphore using compare-and-swap */
  1025. static int
  1026. canacquire(int *addr)
  1027. {
  1028. int value;
  1029. while((value = *addr) > 0){
  1030. if(CASW(addr, value, value - 1))
  1031. return 1;
  1032. }
  1033. return 0;
  1034. }
  1035. /* Should we wake up? */
  1036. static int
  1037. semawoke(void *p)
  1038. {
  1039. coherence();
  1040. return !((Sema *)p)->waiting;
  1041. }
  1042. /* Acquire semaphore (subtract 1). */
  1043. static int
  1044. semacquire(Segment *s, int *addr, int block)
  1045. {
  1046. Proc *up = externup();
  1047. int acquired;
  1048. Sema phore;
  1049. if(canacquire(addr))
  1050. return 1;
  1051. if(!block)
  1052. return 0;
  1053. acquired = 0;
  1054. semqueue(s, addr, &phore);
  1055. for(;;){
  1056. phore.waiting = 1;
  1057. coherence();
  1058. if(canacquire(addr)){
  1059. acquired = 1;
  1060. break;
  1061. }
  1062. if(waserror())
  1063. break;
  1064. sleep(&phore.rend, semawoke, &phore);
  1065. poperror();
  1066. }
  1067. semdequeue(s, &phore);
  1068. coherence(); /* not strictly necessary due to lock in semdequeue */
  1069. if(!phore.waiting)
  1070. semwakeup(s, addr, 1);
  1071. if(!acquired)
  1072. nexterror();
  1073. return 1;
  1074. }
  1075. /* Acquire semaphore or time-out */
  1076. static int
  1077. tsemacquire(Segment *s, int *addr, i64 ms)
  1078. {
  1079. Proc *up = externup();
  1080. int acquired;
  1081. u64 t;
  1082. Sema phore;
  1083. if(canacquire(addr))
  1084. return 1;
  1085. if(ms == 0)
  1086. return 0;
  1087. acquired = 0;
  1088. semqueue(s, addr, &phore);
  1089. for(;;){
  1090. phore.waiting = 1;
  1091. coherence();
  1092. if(canacquire(addr)){
  1093. acquired = 1;
  1094. break;
  1095. }
  1096. if(waserror())
  1097. break;
  1098. t = sys->ticks;
  1099. tsleep(&phore.rend, semawoke, &phore, ms);
  1100. ms -= TK2MS(sys->ticks - t);
  1101. poperror();
  1102. if(ms <= 0)
  1103. break;
  1104. }
  1105. semdequeue(s, &phore);
  1106. coherence(); /* not strictly necessary due to lock in semdequeue */
  1107. if(!phore.waiting)
  1108. semwakeup(s, addr, 1);
  1109. if(ms <= 0)
  1110. return 0;
  1111. if(!acquired)
  1112. nexterror();
  1113. return 1;
  1114. }
  1115. void
  1116. syssemacquire(Ar0 *ar0, ...)
  1117. {
  1118. Proc *up = externup();
  1119. Segment *s;
  1120. int *addr, block;
  1121. va_list list;
  1122. va_start(list, ar0);
  1123. /*
  1124. * int semacquire(long* addr, int block);
  1125. * should be (and will be implemented below as) perhaps
  1126. * int semacquire(int* addr, int block);
  1127. */
  1128. addr = va_arg(list, int *);
  1129. addr = validaddr(addr, sizeof(int), 1);
  1130. evenaddr(PTR2UINT(addr));
  1131. block = va_arg(list, int);
  1132. va_end(list);
  1133. if((s = seg(up, PTR2UINT(addr), 0)) == nil)
  1134. error(Ebadarg);
  1135. if(*addr < 0)
  1136. error(Ebadarg);
  1137. ar0->i = semacquire(s, addr, block);
  1138. }
  1139. void
  1140. systsemacquire(Ar0 *ar0, ...)
  1141. {
  1142. Proc *up = externup();
  1143. Segment *s;
  1144. int *addr;
  1145. u64 ms;
  1146. va_list list;
  1147. va_start(list, ar0);
  1148. /*
  1149. * int tsemacquire(long* addr, u64 ms);
  1150. * should be (and will be implemented below as) perhaps
  1151. * int tsemacquire(int* addr, u64 ms);
  1152. */
  1153. addr = va_arg(list, int *);
  1154. addr = validaddr(addr, sizeof(int), 1);
  1155. evenaddr(PTR2UINT(addr));
  1156. ms = va_arg(list, u64);
  1157. va_end(list);
  1158. if((s = seg(up, PTR2UINT(addr), 0)) == nil)
  1159. error(Ebadarg);
  1160. if(*addr < 0)
  1161. error(Ebadarg);
  1162. ar0->i = tsemacquire(s, addr, ms);
  1163. }
  1164. void
  1165. syssemrelease(Ar0 *ar0, ...)
  1166. {
  1167. Proc *up = externup();
  1168. Segment *s;
  1169. int *addr, delta;
  1170. va_list list;
  1171. va_start(list, ar0);
  1172. /*
  1173. * long semrelease(long* addr, long count);
  1174. * should be (and will be implemented below as) perhaps
  1175. * int semrelease(int* addr, int count);
  1176. */
  1177. addr = va_arg(list, int *);
  1178. addr = validaddr(addr, sizeof(int), 1);
  1179. evenaddr(PTR2UINT(addr));
  1180. delta = va_arg(list, int);
  1181. va_end(list);
  1182. if((s = seg(up, PTR2UINT(addr), 0)) == nil)
  1183. error(Ebadarg);
  1184. if(delta < 0 || *addr < 0)
  1185. error(Ebadarg);
  1186. ar0->i = semrelease(s, addr, delta);
  1187. }